Refactor Global Surface Atlas to not rewrite objects data and use indirection for faster culling
This commit is contained in:
@@ -564,11 +564,12 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
|
||||
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
|
||||
context->BindSR(2, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
|
||||
context->BindSR(3, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
|
||||
context->BindSR(4, bindingDataSurfaceAtlas.AtlasDepth->View());
|
||||
context->BindSR(5, bindingDataSurfaceAtlas.AtlasLighting->View());
|
||||
context->BindSR(6, ddgiData.Result.ProbesState);
|
||||
context->BindSR(7, skybox);
|
||||
context->BindSR(8, ddgiData.ActiveProbes->View());
|
||||
context->BindSR(4, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr);
|
||||
context->BindSR(5, bindingDataSurfaceAtlas.AtlasDepth->View());
|
||||
context->BindSR(6, bindingDataSurfaceAtlas.AtlasLighting->View());
|
||||
context->BindSR(7, ddgiData.Result.ProbesState);
|
||||
context->BindSR(8, skybox);
|
||||
context->BindSR(9, ddgiData.ActiveProbes->View());
|
||||
context->BindUA(0, ddgiData.ProbesTrace->View());
|
||||
context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg);
|
||||
context->ResetUA();
|
||||
|
||||
@@ -134,6 +134,7 @@ public:
|
||||
GPUTexture* AtlasLighting = nullptr;
|
||||
GPUBuffer* ChunksBuffer = nullptr;
|
||||
GPUBuffer* CulledObjectsBuffer = nullptr;
|
||||
DynamicTypedBuffer ObjectsBuffer;
|
||||
int32 CulledObjectsCounterIndex = -1;
|
||||
GlobalSurfaceAtlasPass::BindingData Result;
|
||||
GlobalSurfaceAtlasTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles
|
||||
@@ -148,6 +149,11 @@ public:
|
||||
float DistanceScalingEnd;
|
||||
float DistanceScaling;
|
||||
|
||||
GlobalSurfaceAtlasCustomBuffer()
|
||||
: ObjectsBuffer(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer"))
|
||||
{
|
||||
}
|
||||
|
||||
FORCE_INLINE void ClearObjects()
|
||||
{
|
||||
CulledObjectsCounterIndex = -1;
|
||||
@@ -309,7 +315,6 @@ void GlobalSurfaceAtlasPass::Dispose()
|
||||
|
||||
// Cleanup
|
||||
SAFE_DELETE(_vertexBuffer);
|
||||
SAFE_DELETE(_objectsBuffer);
|
||||
SAFE_DELETE_GPU_RESOURCE(_culledObjectsSizeBuffer);
|
||||
SAFE_DELETE_GPU_RESOURCE(_psClear);
|
||||
SAFE_DELETE_GPU_RESOURCE(_psDirectLighting0);
|
||||
@@ -395,8 +400,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
surfaceAtlasData.AtlasTiles = New<GlobalSurfaceAtlasTile>(0, 0, resolution, resolution);
|
||||
if (!_vertexBuffer)
|
||||
_vertexBuffer = New<DynamicVertexBuffer>(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer"));
|
||||
if (!_objectsBuffer)
|
||||
_objectsBuffer = New<DynamicTypedBuffer>(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer"));
|
||||
|
||||
// Utility for writing into tiles vertex buffer
|
||||
const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv);
|
||||
@@ -431,7 +434,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
// Add objects into the atlas
|
||||
{
|
||||
PROFILE_CPU_NAMED("Draw");
|
||||
_objectsBuffer->Clear();
|
||||
surfaceAtlasData.ObjectsBuffer.Clear();
|
||||
_dirtyObjectsBuffer.Clear();
|
||||
_surfaceAtlasData = &surfaceAtlasData;
|
||||
renderContext.View.Pass = DrawPass::GlobalSurfaceAtlas;
|
||||
@@ -590,7 +593,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
// Send objects data to the GPU
|
||||
{
|
||||
PROFILE_GPU_CPU("Update Objects");
|
||||
_objectsBuffer->Flush(context);
|
||||
surfaceAtlasData.ObjectsBuffer.Flush(context);
|
||||
}
|
||||
|
||||
// Init constants
|
||||
@@ -608,10 +611,10 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
// Each chunk (ChunksBuffer) contains uint with address of the culled objects data start in CulledObjectsBuffer.
|
||||
// If chunk has address=0 then it's unused/empty.
|
||||
// Chunk [0,0,0] is unused and it's address=0 is used for atomic counter for writing into CulledObjectsBuffer.
|
||||
// Each chunk data contains objects count + all objects with tiles copied into buffer.
|
||||
// This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects (less objects and data already in place).
|
||||
// Each chunk data contains objects count + all objects addresses.
|
||||
// This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects.
|
||||
PROFILE_GPU_CPU("Cull Objects");
|
||||
uint32 objectsBufferCapacity = (uint32)((float)_objectsBuffer->Data.Count() * 1.3f);
|
||||
uint32 objectsBufferCapacity = (uint32)((float)surfaceAtlasData.Objects.Count() * 1.3f);
|
||||
|
||||
// Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity
|
||||
if (surfaceAtlasData.ChunksBuffer)
|
||||
@@ -635,7 +638,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
_culledObjectsSizeBuffer->Unmap();
|
||||
if (counter > 0)
|
||||
{
|
||||
objectsBufferCapacity = counter * sizeof(Float4);
|
||||
objectsBufferCapacity = counter;
|
||||
notReady = false;
|
||||
}
|
||||
}
|
||||
@@ -653,28 +656,28 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
}
|
||||
}
|
||||
}
|
||||
if (surfaceAtlasData.CulledObjectsCounterIndex != -1)
|
||||
if (surfaceAtlasData.CulledObjectsCounterIndex != -1 && surfaceAtlasData.CulledObjectsBuffer)
|
||||
{
|
||||
// Copy current counter value
|
||||
_culledObjectsSizeFrames[surfaceAtlasData.CulledObjectsCounterIndex] = currentFrame;
|
||||
context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.ChunksBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0);
|
||||
context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.CulledObjectsBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate buffer for culled objects (estimated size)
|
||||
objectsBufferCapacity = Math::Min(Math::AlignUp(objectsBufferCapacity, 4096u), (uint32)MAX_int32);
|
||||
objectsBufferCapacity = Math::Min(Math::AlignUp<uint32>(objectsBufferCapacity * sizeof(uint32), 4096u), (uint32)MAX_int32);
|
||||
if (!surfaceAtlasData.CulledObjectsBuffer)
|
||||
surfaceAtlasData.CulledObjectsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.CulledObjectsBuffer"));
|
||||
if (surfaceAtlasData.CulledObjectsBuffer->GetSize() < objectsBufferCapacity)
|
||||
{
|
||||
const GPUBufferDescription desc = GPUBufferDescription::Buffer(objectsBufferCapacity, GPUBufferFlags::UnorderedAccess | GPUBufferFlags::ShaderResource, PixelFormat::R32G32B32A32_Float, nullptr, sizeof(Float4));
|
||||
const auto desc = GPUBufferDescription::Raw(objectsBufferCapacity, GPUBufferFlags::UnorderedAccess | GPUBufferFlags::ShaderResource);
|
||||
if (surfaceAtlasData.CulledObjectsBuffer->Init(desc))
|
||||
return true;
|
||||
}
|
||||
|
||||
// Clear chunks counter (chunk at 0 is used for a counter so chunks buffer is aligned)
|
||||
uint32 counter = 1; // Indicate that 1st float4 is used so value 0 can be used as invalid chunk address
|
||||
context->UpdateBuffer(surfaceAtlasData.ChunksBuffer, &counter, sizeof(counter), 0);
|
||||
// Clear chunks counter (uint at 0 is used for a counter)
|
||||
uint32 counter = 1; // Move write location for culled objects after counter
|
||||
context->UpdateBuffer(surfaceAtlasData.CulledObjectsBuffer, &counter, sizeof(counter), 0);
|
||||
|
||||
// Cull objects into chunks (1 thread per chunk)
|
||||
Data0 data;
|
||||
@@ -687,7 +690,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
context->BindCB(0, _cb0);
|
||||
static_assert(GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION % GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE == 0, "Invalid chunks resolution/groups setting.");
|
||||
const int32 chunkDispatchGroups = GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION / GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE;
|
||||
context->BindSR(0, _objectsBuffer->GetBuffer()->View());
|
||||
context->BindSR(0, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View());
|
||||
context->BindUA(0, surfaceAtlasData.ChunksBuffer->View());
|
||||
context->BindUA(1, surfaceAtlasData.CulledObjectsBuffer->View());
|
||||
context->Dispatch(_csCullObjects, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
|
||||
@@ -734,6 +737,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
result.Atlas[4] = surfaceAtlasData.AtlasLighting;
|
||||
result.Chunks = surfaceAtlasData.ChunksBuffer;
|
||||
result.CulledObjects = surfaceAtlasData.CulledObjectsBuffer;
|
||||
result.Objects = surfaceAtlasData.ObjectsBuffer.GetBuffer();
|
||||
surfaceAtlasData.Result = result;
|
||||
|
||||
// Render direct lighting into atlas
|
||||
@@ -754,7 +758,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
context->BindSR(1, surfaceAtlasData.AtlasGBuffer1->View());
|
||||
context->BindSR(2, surfaceAtlasData.AtlasGBuffer2->View());
|
||||
context->BindSR(3, surfaceAtlasData.AtlasDepth->View());
|
||||
context->BindSR(4, _objectsBuffer->GetBuffer()->View());
|
||||
context->BindSR(4, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View());
|
||||
context->BindSR(5, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
|
||||
context->BindSR(6, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
|
||||
context->BindCB(0, _cb0);
|
||||
@@ -932,8 +936,9 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex
|
||||
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
|
||||
context->BindSR(2, bindingData.Chunks ? bindingData.Chunks->View() : nullptr);
|
||||
context->BindSR(3, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr);
|
||||
context->BindSR(4, bindingData.AtlasDepth->View());
|
||||
context->BindSR(6, skybox);
|
||||
context->BindSR(4, bindingData.Objects ? bindingData.Objects->View() : nullptr);
|
||||
context->BindSR(6, bindingData.AtlasDepth->View());
|
||||
context->BindSR(7, skybox);
|
||||
context->SetState(_psDebug);
|
||||
{
|
||||
Float2 outputSizeThird = outputSize * 0.333f;
|
||||
@@ -962,8 +967,9 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex
|
||||
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
|
||||
context->BindSR(2, bindingData.Chunks ? bindingData.Chunks->View() : nullptr);
|
||||
context->BindSR(3, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr);
|
||||
context->BindSR(4, bindingData.AtlasDepth->View());
|
||||
context->BindSR(6, skybox);
|
||||
context->BindSR(4, bindingData.Objects ? bindingData.Objects->View() : nullptr);
|
||||
context->BindSR(6, bindingData.AtlasDepth->View());
|
||||
context->BindSR(7, skybox);
|
||||
context->BindCB(0, _cb0);
|
||||
context->SetState(_psDebug);
|
||||
context->SetRenderTarget(output->View());
|
||||
@@ -1079,8 +1085,8 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
|
||||
object->Bounds.Transformation.GetWorld(localToWorldBounds);
|
||||
Matrix worldToLocalBounds;
|
||||
Matrix::Invert(localToWorldBounds, worldToLocalBounds);
|
||||
uint32 objectAddress = _objectsBuffer->Data.Count() / sizeof(Float4);
|
||||
auto* objectData = _objectsBuffer->WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
|
||||
uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4);
|
||||
auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
|
||||
objectData[0] = *(Float4*)&actorObjectBounds;
|
||||
objectData[1] = Float4::Zero; // w unused
|
||||
objectData[2] = Float4(worldToLocalBounds.M11, worldToLocalBounds.M12, worldToLocalBounds.M13, worldToLocalBounds.M41);
|
||||
@@ -1131,7 +1137,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
|
||||
// Per-tile data
|
||||
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
auto* tileData = _objectsBuffer->WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
|
||||
auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
|
||||
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv;
|
||||
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
|
||||
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
|
||||
|
||||
@@ -38,6 +38,7 @@ public:
|
||||
};
|
||||
GPUBuffer* Chunks;
|
||||
GPUBuffer* CulledObjects;
|
||||
GPUBuffer* Objects;
|
||||
ConstantsData Constants;
|
||||
};
|
||||
|
||||
@@ -54,7 +55,6 @@ private:
|
||||
|
||||
// Cache
|
||||
class GPUBuffer* _culledObjectsSizeBuffer = nullptr;
|
||||
class DynamicTypedBuffer* _objectsBuffer = nullptr;
|
||||
class DynamicVertexBuffer* _vertexBuffer = nullptr;
|
||||
class GlobalSurfaceAtlasCustomBuffer* _surfaceAtlasData;
|
||||
Array<void*> _dirtyObjectsBuffer;
|
||||
|
||||
@@ -202,12 +202,13 @@ RWTexture2D<float4> RWProbesTrace : register(u0);
|
||||
Texture3D<float> GlobalSDFTex : register(t0);
|
||||
Texture3D<float> GlobalSDFMip : register(t1);
|
||||
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2);
|
||||
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t3);
|
||||
Texture2D GlobalSurfaceAtlasDepth : register(t4);
|
||||
Texture2D GlobalSurfaceAtlasTex : register(t5);
|
||||
Texture2D<snorm float4> ProbesState : register(t6);
|
||||
TextureCube Skybox : register(t7);
|
||||
ByteAddressBuffer ActiveProbes : register(t8);
|
||||
ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t3);
|
||||
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
|
||||
Texture2D GlobalSurfaceAtlasDepth : register(t5);
|
||||
Texture2D GlobalSurfaceAtlasTex : register(t6);
|
||||
Texture2D<snorm float4> ProbesState : register(t7);
|
||||
TextureCube Skybox : register(t8);
|
||||
ByteAddressBuffer ActiveProbes : register(t9);
|
||||
|
||||
// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas.
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
@@ -248,7 +249,7 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
// Sample Global Surface Atlas to get the lighting at the hit location
|
||||
float3 hitPosition = hit.GetHitPosition(trace);
|
||||
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit);
|
||||
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
|
||||
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, RWGlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
|
||||
radiance = float4(surfaceColor.rgb, hit.HitTime);
|
||||
|
||||
// Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale
|
||||
|
||||
@@ -163,7 +163,7 @@ float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSur
|
||||
|
||||
// Samples the Global Surface Atlas and returns the lighting (with opacity) at the given world location (and direction).
|
||||
// surfaceThreshold - Additional threshold (in world-units) between object or tile size compared with input data (error due to SDF or LOD incorrect appearance)
|
||||
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, Buffer<float4> culledObjects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f)
|
||||
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, ByteAddressBuffer culledObjects, Buffer<float4> objects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f)
|
||||
{
|
||||
float4 result = float4(0, 0, 0, 0);
|
||||
|
||||
@@ -178,24 +178,22 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu
|
||||
}
|
||||
|
||||
// Read objects counter
|
||||
float4 chunkHeader = culledObjects[objectsStart];
|
||||
objectsStart++;
|
||||
uint objectsCount = asuint(chunkHeader.x);
|
||||
uint objectsCount = culledObjects.Load(objectsStart * 4);
|
||||
if (objectsCount > data.ObjectsCount) // Prevents crashing - don't know why the data is invalid here (rare issue when moving fast though scene with terrain)
|
||||
return result;
|
||||
objectsStart++;
|
||||
|
||||
// Loop over culled objects inside the chunk
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++)
|
||||
{
|
||||
// Cull point vs sphere
|
||||
uint objectAddress = objectsStart;
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(culledObjects, objectAddress);
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(culledObjects, objectAddress);
|
||||
objectsStart += objectSize;
|
||||
uint objectAddress = culledObjects.Load(objectsStart * 4);
|
||||
objectsStart++;
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(objects, objectAddress);
|
||||
if (distance(objectBounds.xyz, worldPosition) > objectBounds.w)
|
||||
continue;
|
||||
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(culledObjects, objectAddress);
|
||||
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(objects, objectAddress);
|
||||
float3 localPosition = mul(float4(worldPosition, 1), object.WorldToLocal).xyz;
|
||||
float3 localExtent = object.Extent + surfaceThreshold;
|
||||
if (any(localPosition > localExtent) || any(localPosition < -localExtent))
|
||||
@@ -221,56 +219,56 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu
|
||||
uint tileOffset = object.TileOffsets[localNormal.x > 0.0f ? 0 : 1];
|
||||
if (localNormalSq.x > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileOffset = object.TileOffsets[localNormal.y > 0.0f ? 2 : 3];
|
||||
if (localNormalSq.y > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileOffset = object.TileOffsets[localNormal.z > 0.0f ? 4 : 5];
|
||||
if (localNormalSq.z > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
#else
|
||||
uint tileOffset = object.TileOffsets[0];
|
||||
if (tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileOffset = object.TileOffsets[1];
|
||||
if (tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileOffset = object.TileOffsets[2];
|
||||
if (tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileOffset = object.TileOffsets[3];
|
||||
if (tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileOffset = object.TileOffsets[4];
|
||||
if (tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileOffset = object.TileOffsets[5];
|
||||
if (tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -187,24 +187,23 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target
|
||||
#include "./Flax/Collisions.hlsl"
|
||||
|
||||
RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0);
|
||||
RWBuffer<float4> RWGlobalSurfaceAtlasCulledObjects : register(u1);
|
||||
RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1);
|
||||
Buffer<float4> GlobalSurfaceAtlasObjects : register(t0);
|
||||
|
||||
// Compute shader for culling objects into chunks
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)]
|
||||
void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
|
||||
void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
{
|
||||
uint3 chunkCoord = DispatchThreadId;
|
||||
uint chunkAddress = (chunkCoord.z * (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION) + chunkCoord.y * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION + chunkCoord.x) * 4;
|
||||
if (chunkAddress == 0)
|
||||
return; // Skip chunk at 0,0,0 (used for counter)
|
||||
float3 chunkMin = GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * GlobalSurfaceAtlas.ChunkSize;
|
||||
float3 chunkMax = chunkMin + GlobalSurfaceAtlas.ChunkSize;
|
||||
|
||||
// Count objects data size in this chunk (amount of float4s)
|
||||
uint objectsSize = 0, objectAddress = 0, objectsCount = 0;
|
||||
// TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reude chunk size to get smaller objects count per chunk)?
|
||||
// Count objects in this chunk
|
||||
uint objectAddress = 0, objectsCount = 0;
|
||||
// TODO: pre-cull objects within a thread group
|
||||
// TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reuse chunk size to get smaller objects count per chunk)?
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
|
||||
{
|
||||
@@ -212,22 +211,21 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
|
||||
{
|
||||
objectsSize += objectSize;
|
||||
objectsCount++;
|
||||
}
|
||||
objectAddress += objectSize;
|
||||
}
|
||||
if (objectsSize == 0)
|
||||
if (objectsCount == 0)
|
||||
{
|
||||
// Empty chunk
|
||||
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0);
|
||||
return;
|
||||
}
|
||||
objectsSize++; // Include objects count before actual objects data
|
||||
|
||||
// Allocate object data size in the buffer
|
||||
uint objectsStart;
|
||||
RWGlobalSurfaceAtlasChunks.InterlockedAdd(0, objectsSize, objectsStart);
|
||||
uint objectsSize = objectsCount + 1; // Include objects count before actual objects data
|
||||
RWGlobalSurfaceAtlasCulledObjects.InterlockedAdd(0, objectsSize, objectsStart); // Counter at 0
|
||||
if (objectsStart + objectsSize > CulledObjectsCapacity)
|
||||
{
|
||||
// Not enough space in the buffer
|
||||
@@ -238,9 +236,8 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
|
||||
// Write object data start
|
||||
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, objectsStart);
|
||||
|
||||
// Write objects count before actual objects data
|
||||
RWGlobalSurfaceAtlasCulledObjects[objectsStart] = float4(asfloat(objectsCount), 0, 0, 0);
|
||||
objectsStart++;
|
||||
// Write objects count before actual objects indices
|
||||
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectsCount);
|
||||
|
||||
// Copy objects data in this chunk
|
||||
objectAddress = 0;
|
||||
@@ -251,11 +248,8 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
|
||||
{
|
||||
for (uint i = 0; i < objectSize; i++)
|
||||
{
|
||||
RWGlobalSurfaceAtlasCulledObjects[objectsStart + i] = GlobalSurfaceAtlasObjects[objectAddress + i];
|
||||
}
|
||||
objectsStart += objectSize;
|
||||
objectsStart++;
|
||||
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress);
|
||||
}
|
||||
objectAddress += objectSize;
|
||||
}
|
||||
@@ -268,10 +262,11 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
|
||||
Texture3D<float> GlobalSDFTex : register(t0);
|
||||
Texture3D<float> GlobalSDFMip : register(t1);
|
||||
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2);
|
||||
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t3);
|
||||
Texture2D GlobalSurfaceAtlasDepth : register(t4);
|
||||
ByteAddressBuffer GlobalSurfaceAtlasCulledObjects : register(t3);
|
||||
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
|
||||
Texture2D GlobalSurfaceAtlasTex : register(t5);
|
||||
TextureCube Skybox : register(t6);
|
||||
Texture2D GlobalSurfaceAtlasDepth : register(t6);
|
||||
TextureCube Skybox : register(t7);
|
||||
|
||||
// Pixel shader for Global Surface Atlas debug drawing
|
||||
META_PS(true, FEATURE_LEVEL_SM5)
|
||||
@@ -295,7 +290,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target
|
||||
{
|
||||
// Sample Global Surface Atlas at the hit location
|
||||
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit);
|
||||
color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb;
|
||||
color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb;
|
||||
//color = hit.HitNormal * 0.5f + 0.5f;
|
||||
}
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user