Optimize Global SDF dispatches with UAV write overlaps

2-3x faster rasterization
This commit is contained in:
Wojtek Figat
2025-09-03 23:12:55 +02:00
parent f1ecbf828e
commit 80de56f469

View File

@@ -10,6 +10,7 @@
#include "Engine/Content/Content.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUPass.h"
#include "Engine/Graphics/Graphics.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/RenderBuffers.h"
@@ -841,9 +842,10 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
context->BindCB(1, _cb1);
constexpr int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE;
int32 chunkDispatches = 0;
if (!reset)
if (!reset && cascade.NonEmptyChunks.HasItems())
{
PROFILE_GPU_CPU_NAMED("Clear Chunks");
GPUComputePass pass(context);
for (auto it = cascade.NonEmptyChunks.Begin(); it.IsNotEnd(); ++it)
{
auto& key = it->Item;
@@ -856,7 +858,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
context->UpdateCB(_cb1, &data);
context->Dispatch(_csClearChunk, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
chunkDispatches++;
// TODO: don't stall with UAV barrier on D3D12/Vulkan if UAVs don't change between dispatches
}
ZoneValue(chunkDispatches);
}
@@ -877,106 +878,102 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
context->BindSR(0, _objectsBuffer->GetBuffer() ? _objectsBuffer->GetBuffer()->View() : nullptr);
// Rasterize non-empty chunks (first layer so can override existing chunk data)
for (const auto& e : cascade.Chunks)
uint32 maxLayer = 0;
{
if (e.Key.Layer != 0)
continue;
auto& chunk = e.Value;
cascade.NonEmptyChunks.Add(e.Key);
for (int32 i = 0; i < chunk.ModelsCount; i++)
GPUComputePass pass(context);
for (const auto& e : cascade.Chunks)
{
auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]);
data.Objects[i] = objectIndex;
context->BindSR(i + 1, objectsTextures[objectIndex]);
}
for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++)
context->UnBindSR(i + 1);
data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE;
data.ObjectsCount = chunk.ModelsCount;
context->UpdateCB(_cb1, &data);
auto cs = data.ObjectsCount != 0 ? _csRasterizeModel0 : _csClearChunk; // Terrain-only chunk can be quickly cleared
context->Dispatch(cs, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
chunkDispatches++;
// TODO: don't stall with UAV barrier on D3D12/Vulkan if UAVs don't change between dispatches (maybe cache per-shader write/read flags for all UAVs?)
if (e.Key.Layer != 0)
continue;
auto& chunk = e.Value;
data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE;
cascade.NonEmptyChunks.Add(e.Key);
if (chunk.HeightfieldsCount != 0)
{
// Inject heightfield (additive)
for (int32 i = 0; i < chunk.HeightfieldsCount; i++)
{
auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]);
data.Objects[i] = objectIndex;
context->BindSR(i + 1, objectsTextures[objectIndex]);
}
for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++)
context->UnBindSR(i + 1);
data.ObjectsCount = chunk.HeightfieldsCount;
context->UpdateCB(_cb1, &data);
context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
chunkDispatches++;
}
#if GLOBAL_SDF_DEBUG_CHUNKS
// Debug draw chunk bounds in world space with number of models in it
if (cascadeIndex + 1 == GLOBAL_SDF_DEBUG_CHUNKS)
{
int32 count = chunk.ModelsCount + chunk.HeightfieldsCount;
RasterizeChunkKey tmp = e.Key;
tmp.NextLayer();
while (cascade.Chunks.ContainsKey(tmp))
{
count += cascade.Chunks[tmp].ModelsCount + cascade.Chunks[tmp].HeightfieldsCount;
tmp.NextLayer();
}
Float3 chunkMin = cascade.Bounds.Minimum + Float3(e.Key.Coord) * cascade.ChunkSize;
BoundingBox chunkBounds(chunkMin, chunkMin + cascade.ChunkSize);
DebugDraw::DrawWireBox(chunkBounds, Color::Red, 0, false);
DebugDraw::DrawText(StringUtils::ToString(count), chunkBounds.GetCenter(), Color::Red);
}
#endif
}
// Rasterize non-empty chunks (additive layers so need combine with existing chunk data)
for (const auto& e : cascade.Chunks)
{
if (e.Key.Layer == 0)
continue;
auto& chunk = e.Value;
data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE;
if (chunk.ModelsCount != 0)
{
// Inject models (additive)
for (int32 i = 0; i < chunk.ModelsCount; i++)
{
auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]);
data.Objects[i] = objectIndex;
context->BindSR(i + 1, objectsTextures[objectIndex]);
}
for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++)
for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT; i++)
context->UnBindSR(i + 1);
data.ObjectsCount = chunk.ModelsCount;
context->UpdateCB(_cb1, &data);
context->Dispatch(_csRasterizeModel1, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
auto cs = data.ObjectsCount != 0 ? _csRasterizeModel0 : _csClearChunk; // Terrain-only chunk can be quickly cleared
context->Dispatch(cs, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
chunkDispatches++;
}
if (chunk.HeightfieldsCount != 0)
{
// Inject heightfields (additive)
for (int32 i = 0; i < chunk.HeightfieldsCount; i++)
#if GLOBAL_SDF_DEBUG_CHUNKS
// Debug draw chunk bounds in world space with number of models in it
if (cascadeIndex + 1 == GLOBAL_SDF_DEBUG_CHUNKS)
{
auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]);
data.Objects[i] = objectIndex;
context->BindSR(i + 1, objectsTextures[objectIndex]);
int32 count = chunk.ModelsCount + chunk.HeightfieldsCount;
RasterizeChunkKey tmp = e.Key;
tmp.NextLayer();
while (cascade.Chunks.ContainsKey(tmp))
{
count += cascade.Chunks[tmp].ModelsCount + cascade.Chunks[tmp].HeightfieldsCount;
tmp.NextLayer();
}
Float3 chunkMin = cascade.Bounds.Minimum + Float3(e.Key.Coord) * cascade.ChunkSize;
BoundingBox chunkBounds(chunkMin, chunkMin + cascade.ChunkSize);
DebugDraw::DrawWireBox(chunkBounds, Color::Red, 0, false);
DebugDraw::DrawText(StringUtils::ToString(count), chunkBounds.GetCenter(), Color::Red);
}
#endif
}
}
#if PLATFORM_WINDOWS
// Hack to fix D3D11 bug that doesn't insert UAV barrier after overlap region ends (between two GPUComputePass)
if (context->GetDevice()->GetRendererType() == RendererType::DirectX11)
context->Dispatch(_csRasterizeModel0, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
#endif
// Rasterize non-empty chunks (additive layers so need combine with existing chunk data)
for (uint32 layer = 0; layer <= maxLayer; layer++)
{
GPUComputePass pass(context);
for (const auto& e : cascade.Chunks)
{
if (e.Key.Layer != layer)
continue;
auto& chunk = e.Value;
data.ChunkCoord = e.Key.Coord * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE;
if (chunk.ModelsCount != 0 && layer != 0) // Models from layer 0 has been already written
{
// Inject models (additive)
for (int32 i = 0; i < chunk.ModelsCount; i++)
{
auto objectIndex = objectIndexToDataIndex.At(chunk.Models[i]);
data.Objects[i] = objectIndex;
context->BindSR(i + 1, objectsTextures[objectIndex]);
}
for (int32 i = chunk.ModelsCount; i < GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT; i++)
context->UnBindSR(i + 1);
data.ObjectsCount = chunk.ModelsCount;
context->UpdateCB(_cb1, &data);
context->Dispatch(_csRasterizeModel1, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
chunkDispatches++;
}
if (chunk.HeightfieldsCount != 0)
{
// Inject heightfields (additive)
for (int32 i = 0; i < chunk.HeightfieldsCount; i++)
{
auto objectIndex = objectIndexToDataIndex.At(chunk.Heightfields[i]);
data.Objects[i] = objectIndex;
context->BindSR(i + 1, objectsTextures[objectIndex]);
}
for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++)
context->UnBindSR(i + 1);
data.ObjectsCount = chunk.HeightfieldsCount;
context->UpdateCB(_cb1, &data);
context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
chunkDispatches++;
}
for (int32 i = chunk.HeightfieldsCount; i < GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT; i++)
context->UnBindSR(i + 1);
data.ObjectsCount = chunk.HeightfieldsCount;
context->UpdateCB(_cb1, &data);
context->Dispatch(_csRasterizeHeightfield, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
chunkDispatches++;
}
}