diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 38fa9c481..c8814c7a6 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -34,7 +34,7 @@ #define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes) #define GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES 0 // Forces to redraw all object tiles every frame #define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS 0 // Debug draws object bounds on redraw (and tile draw projection locations) -#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty +#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty) #if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS || GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS #include "Engine/Debug/DebugDraw.h" @@ -704,11 +704,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { for (int32 x = 0; x < GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION; x++) { - Float3 chunkCoord(x, y, z); - Float3 chunkMin = result.GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * result.GlobalSurfaceAtlas.ChunkSize; - Float3 chunkMax = chunkMin + result.GlobalSurfaceAtlas.ChunkSize; + Float3 chunkCoord((float)x, (float)y, (float)z); + Float3 chunkMin = result.Constants.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * result.Constants.ChunkSize; + Float3 chunkMax = chunkMin + result.Constants.ChunkSize; BoundingBox chunkBounds(chunkMin, chunkMax); - if (Float3::Distance(chunkBounds.GetCenter(), result.GlobalSurfaceAtlas.ViewPos) >= 2000.0f) + if (Float3::Distance(chunkBounds.GetCenter(), result.Constants.ViewPos) >= 2000.0f) continue; int32 count = 0; diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index 41f16e819..07f48d2ae 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -190,6 +190,8 @@ RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0); RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1); Buffer GlobalSurfaceAtlasObjects : register(t0); +#define GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE 32 // Amount of objects to cache locally per-thread for culling + // Compute shader for culling objects into chunks META_CS(true, FEATURE_LEVEL_SM5) [numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)] @@ -203,7 +205,7 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID) // Count objects in this chunk uint objectAddress = 0, objectsCount = 0; // TODO: pre-cull objects within a thread group - // TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reuse chunk size to get smaller objects count per chunk)? + uint localCulledObjects[GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE]; LOOP for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++) { @@ -211,6 +213,7 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID) uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress); if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) { + localCulledObjects[objectsCount % GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE] = objectAddress; objectsCount++; } objectAddress += objectSize; @@ -240,19 +243,34 @@ void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID) RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectsCount); // Copy objects data in this chunk - objectAddress = 0; - LOOP - for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++) + if (objectsCount <= GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE) { - float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress); - uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress); - if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) - { - objectsStart++; - RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress); - } - objectAddress += objectSize; + // Reuse locally cached objects + LOOP + for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++) + { + objectAddress = localCulledObjects[objectIndex]; + objectsStart++; + RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress); + } } + else + { + // Brute-force culling + objectAddress = 0; + LOOP + for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++) + { + float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress); + uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress); + if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) + { + objectsStart++; + RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress); + } + objectAddress += objectSize; + } + } } #endif