From d8e79101e7a5ce6c77b4a7f10e316992961d6311 Mon Sep 17 00:00:00 2001 From: Wojciech Figat Date: Mon, 27 Jun 2022 16:07:54 +0200 Subject: [PATCH] Refactor Global Surface Atlas to not rewrite objects data and use indirection for faster culling --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 11 ++-- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 56 ++++++++++--------- .../Renderer/GI/GlobalSurfaceAtlasPass.h | 2 +- Source/Shaders/GI/DDGI.shader | 15 ++--- Source/Shaders/GI/GlobalSurfaceAtlas.hlsl | 34 ++++++----- Source/Shaders/GI/GlobalSurfaceAtlas.shader | 41 ++++++-------- 6 files changed, 80 insertions(+), 79 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index c8975dba8..f428aa28a 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -564,11 +564,12 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr); context->BindSR(2, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr); context->BindSR(3, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr); - context->BindSR(4, bindingDataSurfaceAtlas.AtlasDepth->View()); - context->BindSR(5, bindingDataSurfaceAtlas.AtlasLighting->View()); - context->BindSR(6, ddgiData.Result.ProbesState); - context->BindSR(7, skybox); - context->BindSR(8, ddgiData.ActiveProbes->View()); + context->BindSR(4, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr); + context->BindSR(5, bindingDataSurfaceAtlas.AtlasDepth->View()); + context->BindSR(6, bindingDataSurfaceAtlas.AtlasLighting->View()); + context->BindSR(7, ddgiData.Result.ProbesState); + context->BindSR(8, skybox); + context->BindSR(9, ddgiData.ActiveProbes->View()); context->BindUA(0, ddgiData.ProbesTrace->View()); context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg); context->ResetUA(); diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index cb34dba2f..38fa9c481 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -134,6 +134,7 @@ public: GPUTexture* AtlasLighting = nullptr; GPUBuffer* ChunksBuffer = nullptr; GPUBuffer* CulledObjectsBuffer = nullptr; + DynamicTypedBuffer ObjectsBuffer; int32 CulledObjectsCounterIndex = -1; GlobalSurfaceAtlasPass::BindingData Result; GlobalSurfaceAtlasTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles @@ -148,6 +149,11 @@ public: float DistanceScalingEnd; float DistanceScaling; + GlobalSurfaceAtlasCustomBuffer() + : ObjectsBuffer(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer")) + { + } + FORCE_INLINE void ClearObjects() { CulledObjectsCounterIndex = -1; @@ -309,7 +315,6 @@ void GlobalSurfaceAtlasPass::Dispose() // Cleanup SAFE_DELETE(_vertexBuffer); - SAFE_DELETE(_objectsBuffer); SAFE_DELETE_GPU_RESOURCE(_culledObjectsSizeBuffer); SAFE_DELETE_GPU_RESOURCE(_psClear); SAFE_DELETE_GPU_RESOURCE(_psDirectLighting0); @@ -395,8 +400,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co surfaceAtlasData.AtlasTiles = New(0, 0, resolution, resolution); if (!_vertexBuffer) _vertexBuffer = New(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer")); - if (!_objectsBuffer) - _objectsBuffer = New(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer")); // Utility for writing into tiles vertex buffer const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv); @@ -431,7 +434,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Add objects into the atlas { PROFILE_CPU_NAMED("Draw"); - _objectsBuffer->Clear(); + surfaceAtlasData.ObjectsBuffer.Clear(); _dirtyObjectsBuffer.Clear(); _surfaceAtlasData = &surfaceAtlasData; renderContext.View.Pass = DrawPass::GlobalSurfaceAtlas; @@ -590,7 +593,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Send objects data to the GPU { PROFILE_GPU_CPU("Update Objects"); - _objectsBuffer->Flush(context); + surfaceAtlasData.ObjectsBuffer.Flush(context); } // Init constants @@ -608,10 +611,10 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Each chunk (ChunksBuffer) contains uint with address of the culled objects data start in CulledObjectsBuffer. // If chunk has address=0 then it's unused/empty. // Chunk [0,0,0] is unused and it's address=0 is used for atomic counter for writing into CulledObjectsBuffer. - // Each chunk data contains objects count + all objects with tiles copied into buffer. - // This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects (less objects and data already in place). + // Each chunk data contains objects count + all objects addresses. + // This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects. PROFILE_GPU_CPU("Cull Objects"); - uint32 objectsBufferCapacity = (uint32)((float)_objectsBuffer->Data.Count() * 1.3f); + uint32 objectsBufferCapacity = (uint32)((float)surfaceAtlasData.Objects.Count() * 1.3f); // Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity if (surfaceAtlasData.ChunksBuffer) @@ -635,7 +638,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co _culledObjectsSizeBuffer->Unmap(); if (counter > 0) { - objectsBufferCapacity = counter * sizeof(Float4); + objectsBufferCapacity = counter; notReady = false; } } @@ -653,28 +656,28 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co } } } - if (surfaceAtlasData.CulledObjectsCounterIndex != -1) + if (surfaceAtlasData.CulledObjectsCounterIndex != -1 && surfaceAtlasData.CulledObjectsBuffer) { // Copy current counter value _culledObjectsSizeFrames[surfaceAtlasData.CulledObjectsCounterIndex] = currentFrame; - context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.ChunksBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0); + context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.CulledObjectsBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0); } } // Allocate buffer for culled objects (estimated size) - objectsBufferCapacity = Math::Min(Math::AlignUp(objectsBufferCapacity, 4096u), (uint32)MAX_int32); + objectsBufferCapacity = Math::Min(Math::AlignUp(objectsBufferCapacity * sizeof(uint32), 4096u), (uint32)MAX_int32); if (!surfaceAtlasData.CulledObjectsBuffer) surfaceAtlasData.CulledObjectsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.CulledObjectsBuffer")); if (surfaceAtlasData.CulledObjectsBuffer->GetSize() < objectsBufferCapacity) { - const GPUBufferDescription desc = GPUBufferDescription::Buffer(objectsBufferCapacity, GPUBufferFlags::UnorderedAccess | GPUBufferFlags::ShaderResource, PixelFormat::R32G32B32A32_Float, nullptr, sizeof(Float4)); + const auto desc = GPUBufferDescription::Raw(objectsBufferCapacity, GPUBufferFlags::UnorderedAccess | GPUBufferFlags::ShaderResource); if (surfaceAtlasData.CulledObjectsBuffer->Init(desc)) return true; } - // Clear chunks counter (chunk at 0 is used for a counter so chunks buffer is aligned) - uint32 counter = 1; // Indicate that 1st float4 is used so value 0 can be used as invalid chunk address - context->UpdateBuffer(surfaceAtlasData.ChunksBuffer, &counter, sizeof(counter), 0); + // Clear chunks counter (uint at 0 is used for a counter) + uint32 counter = 1; // Move write location for culled objects after counter + context->UpdateBuffer(surfaceAtlasData.CulledObjectsBuffer, &counter, sizeof(counter), 0); // Cull objects into chunks (1 thread per chunk) Data0 data; @@ -687,7 +690,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->BindCB(0, _cb0); static_assert(GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION % GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE == 0, "Invalid chunks resolution/groups setting."); const int32 chunkDispatchGroups = GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION / GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE; - context->BindSR(0, _objectsBuffer->GetBuffer()->View()); + context->BindSR(0, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View()); context->BindUA(0, surfaceAtlasData.ChunksBuffer->View()); context->BindUA(1, surfaceAtlasData.CulledObjectsBuffer->View()); context->Dispatch(_csCullObjects, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups); @@ -734,6 +737,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co result.Atlas[4] = surfaceAtlasData.AtlasLighting; result.Chunks = surfaceAtlasData.ChunksBuffer; result.CulledObjects = surfaceAtlasData.CulledObjectsBuffer; + result.Objects = surfaceAtlasData.ObjectsBuffer.GetBuffer(); surfaceAtlasData.Result = result; // Render direct lighting into atlas @@ -754,7 +758,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co context->BindSR(1, surfaceAtlasData.AtlasGBuffer1->View()); context->BindSR(2, surfaceAtlasData.AtlasGBuffer2->View()); context->BindSR(3, surfaceAtlasData.AtlasDepth->View()); - context->BindSR(4, _objectsBuffer->GetBuffer()->View()); + context->BindSR(4, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View()); context->BindSR(5, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr); context->BindSR(6, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr); context->BindCB(0, _cb0); @@ -932,8 +936,9 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr); context->BindSR(2, bindingData.Chunks ? bindingData.Chunks->View() : nullptr); context->BindSR(3, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr); - context->BindSR(4, bindingData.AtlasDepth->View()); - context->BindSR(6, skybox); + context->BindSR(4, bindingData.Objects ? bindingData.Objects->View() : nullptr); + context->BindSR(6, bindingData.AtlasDepth->View()); + context->BindSR(7, skybox); context->SetState(_psDebug); { Float2 outputSizeThird = outputSize * 0.333f; @@ -962,8 +967,9 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr); context->BindSR(2, bindingData.Chunks ? bindingData.Chunks->View() : nullptr); context->BindSR(3, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr); - context->BindSR(4, bindingData.AtlasDepth->View()); - context->BindSR(6, skybox); + context->BindSR(4, bindingData.Objects ? bindingData.Objects->View() : nullptr); + context->BindSR(6, bindingData.AtlasDepth->View()); + context->BindSR(7, skybox); context->BindCB(0, _cb0); context->SetState(_psDebug); context->SetRenderTarget(output->View()); @@ -1079,8 +1085,8 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con object->Bounds.Transformation.GetWorld(localToWorldBounds); Matrix worldToLocalBounds; Matrix::Invert(localToWorldBounds, worldToLocalBounds); - uint32 objectAddress = _objectsBuffer->Data.Count() / sizeof(Float4); - auto* objectData = _objectsBuffer->WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); + uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4); + auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); objectData[0] = *(Float4*)&actorObjectBounds; objectData[1] = Float4::Zero; // w unused objectData[2] = Float4(worldToLocalBounds.M11, worldToLocalBounds.M12, worldToLocalBounds.M13, worldToLocalBounds.M41); @@ -1131,7 +1137,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con // Per-tile data const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING; const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING; - auto* tileData = _objectsBuffer->WriteReserve(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE); + auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE); tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv; tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41); tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42); diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h index 394f3bb7a..0e971af64 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.h @@ -38,6 +38,7 @@ public: }; GPUBuffer* Chunks; GPUBuffer* CulledObjects; + GPUBuffer* Objects; ConstantsData Constants; }; @@ -54,7 +55,6 @@ private: // Cache class GPUBuffer* _culledObjectsSizeBuffer = nullptr; - class DynamicTypedBuffer* _objectsBuffer = nullptr; class DynamicVertexBuffer* _vertexBuffer = nullptr; class GlobalSurfaceAtlasCustomBuffer* _surfaceAtlasData; Array _dirtyObjectsBuffer; diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 6ae11515d..75a2f68a8 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -202,12 +202,13 @@ RWTexture2D RWProbesTrace : register(u0); Texture3D GlobalSDFTex : register(t0); Texture3D GlobalSDFMip : register(t1); ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2); -Buffer GlobalSurfaceAtlasCulledObjects : register(t3); -Texture2D GlobalSurfaceAtlasDepth : register(t4); -Texture2D GlobalSurfaceAtlasTex : register(t5); -Texture2D ProbesState : register(t6); -TextureCube Skybox : register(t7); -ByteAddressBuffer ActiveProbes : register(t8); +ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t3); +Buffer GlobalSurfaceAtlasObjects : register(t4); +Texture2D GlobalSurfaceAtlasDepth : register(t5); +Texture2D GlobalSurfaceAtlasTex : register(t6); +Texture2D ProbesState : register(t7); +TextureCube Skybox : register(t8); +ByteAddressBuffer ActiveProbes : register(t9); // Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas. META_CS(true, FEATURE_LEVEL_SM5) @@ -248,7 +249,7 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Sample Global Surface Atlas to get the lighting at the hit location float3 hitPosition = hit.GetHitPosition(trace); float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit); - float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold); + float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, RWGlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold); radiance = float4(surfaceColor.rgb, hit.HitTime); // Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl b/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl index 5df1b5475..012756128 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl @@ -163,7 +163,7 @@ float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSur // Samples the Global Surface Atlas and returns the lighting (with opacity) at the given world location (and direction). // surfaceThreshold - Additional threshold (in world-units) between object or tile size compared with input data (error due to SDF or LOD incorrect appearance) -float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, Buffer culledObjects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f) +float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, ByteAddressBuffer culledObjects, Buffer objects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f) { float4 result = float4(0, 0, 0, 0); @@ -178,24 +178,22 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu } // Read objects counter - float4 chunkHeader = culledObjects[objectsStart]; - objectsStart++; - uint objectsCount = asuint(chunkHeader.x); + uint objectsCount = culledObjects.Load(objectsStart * 4); if (objectsCount > data.ObjectsCount) // Prevents crashing - don't know why the data is invalid here (rare issue when moving fast though scene with terrain) return result; + objectsStart++; // Loop over culled objects inside the chunk LOOP for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++) { // Cull point vs sphere - uint objectAddress = objectsStart; - float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(culledObjects, objectAddress); - uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(culledObjects, objectAddress); - objectsStart += objectSize; + uint objectAddress = culledObjects.Load(objectsStart * 4); + objectsStart++; + float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(objects, objectAddress); if (distance(objectBounds.xyz, worldPosition) > objectBounds.w) continue; - GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(culledObjects, objectAddress); + GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(objects, objectAddress); float3 localPosition = mul(float4(worldPosition, 1), object.WorldToLocal).xyz; float3 localExtent = object.Extent + surfaceThreshold; if (any(localPosition > localExtent) || any(localPosition < -localExtent)) @@ -221,56 +219,56 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu uint tileOffset = object.TileOffsets[localNormal.x > 0.0f ? 0 : 1]; if (localNormalSq.x > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } tileOffset = object.TileOffsets[localNormal.y > 0.0f ? 2 : 3]; if (localNormalSq.y > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } tileOffset = object.TileOffsets[localNormal.z > 0.0f ? 4 : 5]; if (localNormalSq.z > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } #else uint tileOffset = object.TileOffsets[0]; if (tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } tileOffset = object.TileOffsets[1]; if (tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } tileOffset = object.TileOffsets[2]; if (tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } tileOffset = object.TileOffsets[3]; if (tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } tileOffset = object.TileOffsets[4]; if (tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } tileOffset = object.TileOffsets[5]; if (tileOffset != 0) { - GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset); + GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset); result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold); } #endif diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index 1c6c22571..41f16e819 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -187,24 +187,23 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target #include "./Flax/Collisions.hlsl" RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0); -RWBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1); +RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1); Buffer GlobalSurfaceAtlasObjects : register(t0); // Compute shader for culling objects into chunks META_CS(true, FEATURE_LEVEL_SM5) [numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)] -void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) +void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID) { uint3 chunkCoord = DispatchThreadId; uint chunkAddress = (chunkCoord.z * (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION) + chunkCoord.y * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION + chunkCoord.x) * 4; - if (chunkAddress == 0) - return; // Skip chunk at 0,0,0 (used for counter) float3 chunkMin = GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * GlobalSurfaceAtlas.ChunkSize; float3 chunkMax = chunkMin + GlobalSurfaceAtlas.ChunkSize; - // Count objects data size in this chunk (amount of float4s) - uint objectsSize = 0, objectAddress = 0, objectsCount = 0; - // TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reude chunk size to get smaller objects count per chunk)? + // Count objects in this chunk + uint objectAddress = 0, objectsCount = 0; + // TODO: pre-cull objects within a thread group + // TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reuse chunk size to get smaller objects count per chunk)? LOOP for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++) { @@ -212,22 +211,21 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress); if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) { - objectsSize += objectSize; objectsCount++; } objectAddress += objectSize; } - if (objectsSize == 0) + if (objectsCount == 0) { // Empty chunk RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0); return; } - objectsSize++; // Include objects count before actual objects data // Allocate object data size in the buffer uint objectsStart; - RWGlobalSurfaceAtlasChunks.InterlockedAdd(0, objectsSize, objectsStart); + uint objectsSize = objectsCount + 1; // Include objects count before actual objects data + RWGlobalSurfaceAtlasCulledObjects.InterlockedAdd(0, objectsSize, objectsStart); // Counter at 0 if (objectsStart + objectsSize > CulledObjectsCapacity) { // Not enough space in the buffer @@ -238,9 +236,8 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp // Write object data start RWGlobalSurfaceAtlasChunks.Store(chunkAddress, objectsStart); - // Write objects count before actual objects data - RWGlobalSurfaceAtlasCulledObjects[objectsStart] = float4(asfloat(objectsCount), 0, 0, 0); - objectsStart++; + // Write objects count before actual objects indices + RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectsCount); // Copy objects data in this chunk objectAddress = 0; @@ -251,11 +248,8 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress); if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w)) { - for (uint i = 0; i < objectSize; i++) - { - RWGlobalSurfaceAtlasCulledObjects[objectsStart + i] = GlobalSurfaceAtlasObjects[objectAddress + i]; - } - objectsStart += objectSize; + objectsStart++; + RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress); } objectAddress += objectSize; } @@ -268,10 +262,11 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp Texture3D GlobalSDFTex : register(t0); Texture3D GlobalSDFMip : register(t1); ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2); -Buffer GlobalSurfaceAtlasCulledObjects : register(t3); -Texture2D GlobalSurfaceAtlasDepth : register(t4); +ByteAddressBuffer GlobalSurfaceAtlasCulledObjects : register(t3); +Buffer GlobalSurfaceAtlasObjects : register(t4); Texture2D GlobalSurfaceAtlasTex : register(t5); -TextureCube Skybox : register(t6); +Texture2D GlobalSurfaceAtlasDepth : register(t6); +TextureCube Skybox : register(t7); // Pixel shader for Global Surface Atlas debug drawing META_PS(true, FEATURE_LEVEL_SM5) @@ -295,7 +290,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target { // Sample Global Surface Atlas at the hit location float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit); - color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb; + color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb; //color = hit.HitNormal * 0.5f + 0.5f; } else