Optimize Global Surface Atlas objects culling to cache up to 32 objects locally per-thread
This commit is contained in:
@@ -34,7 +34,7 @@
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes)
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES 0 // Forces to redraw all object tiles every frame
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS 0 // Debug draws object bounds on redraw (and tile draw projection locations)
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty)
|
||||
|
||||
#if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS || GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS
|
||||
#include "Engine/Debug/DebugDraw.h"
|
||||
@@ -704,11 +704,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
{
|
||||
for (int32 x = 0; x < GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION; x++)
|
||||
{
|
||||
Float3 chunkCoord(x, y, z);
|
||||
Float3 chunkMin = result.GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * result.GlobalSurfaceAtlas.ChunkSize;
|
||||
Float3 chunkMax = chunkMin + result.GlobalSurfaceAtlas.ChunkSize;
|
||||
Float3 chunkCoord((float)x, (float)y, (float)z);
|
||||
Float3 chunkMin = result.Constants.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * result.Constants.ChunkSize;
|
||||
Float3 chunkMax = chunkMin + result.Constants.ChunkSize;
|
||||
BoundingBox chunkBounds(chunkMin, chunkMax);
|
||||
if (Float3::Distance(chunkBounds.GetCenter(), result.GlobalSurfaceAtlas.ViewPos) >= 2000.0f)
|
||||
if (Float3::Distance(chunkBounds.GetCenter(), result.Constants.ViewPos) >= 2000.0f)
|
||||
continue;
|
||||
|
||||
int32 count = 0;
|
||||
|
||||
@@ -190,6 +190,8 @@ RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0);
|
||||
RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1);
|
||||
Buffer<float4> GlobalSurfaceAtlasObjects : register(t0);
|
||||
|
||||
#define GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE 32 // Amount of objects to cache locally per-thread for culling
|
||||
|
||||
// Compute shader for culling objects into chunks
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)]
|
||||
@@ -203,7 +205,7 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
// Count objects in this chunk
|
||||
uint objectAddress = 0, objectsCount = 0;
|
||||
// TODO: pre-cull objects within a thread group
|
||||
// TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reuse chunk size to get smaller objects count per chunk)?
|
||||
uint localCulledObjects[GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE];
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
|
||||
{
|
||||
@@ -211,6 +213,7 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
|
||||
{
|
||||
localCulledObjects[objectsCount % GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE] = objectAddress;
|
||||
objectsCount++;
|
||||
}
|
||||
objectAddress += objectSize;
|
||||
@@ -240,19 +243,34 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectsCount);
|
||||
|
||||
// Copy objects data in this chunk
|
||||
objectAddress = 0;
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
|
||||
if (objectsCount <= GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE)
|
||||
{
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
|
||||
{
|
||||
objectsStart++;
|
||||
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress);
|
||||
}
|
||||
objectAddress += objectSize;
|
||||
// Reuse locally cached objects
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++)
|
||||
{
|
||||
objectAddress = localCulledObjects[objectIndex];
|
||||
objectsStart++;
|
||||
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Brute-force culling
|
||||
objectAddress = 0;
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
|
||||
{
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
|
||||
{
|
||||
objectsStart++;
|
||||
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress);
|
||||
}
|
||||
objectAddress += objectSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user