Refactor Global Surface Atlas to not rewrite objects data and use indirection for faster culling

This commit is contained in:
Wojciech Figat
2022-06-27 16:07:54 +02:00
parent 282b9066b8
commit d8e79101e7
6 changed files with 80 additions and 79 deletions

View File

@@ -202,12 +202,13 @@ RWTexture2D<float4> RWProbesTrace : register(u0);
Texture3D<float> GlobalSDFTex : register(t0);
Texture3D<float> GlobalSDFMip : register(t1);
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2);
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t3);
Texture2D GlobalSurfaceAtlasDepth : register(t4);
Texture2D GlobalSurfaceAtlasTex : register(t5);
Texture2D<snorm float4> ProbesState : register(t6);
TextureCube Skybox : register(t7);
ByteAddressBuffer ActiveProbes : register(t8);
ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t3);
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
Texture2D GlobalSurfaceAtlasDepth : register(t5);
Texture2D GlobalSurfaceAtlasTex : register(t6);
Texture2D<snorm float4> ProbesState : register(t7);
TextureCube Skybox : register(t8);
ByteAddressBuffer ActiveProbes : register(t9);
// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas.
META_CS(true, FEATURE_LEVEL_SM5)
@@ -248,7 +249,7 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
// Sample Global Surface Atlas to get the lighting at the hit location
float3 hitPosition = hit.GetHitPosition(trace);
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit);
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, RWGlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
radiance = float4(surfaceColor.rgb, hit.HitTime);
// Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale

View File

@@ -163,7 +163,7 @@ float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSur
// Samples the Global Surface Atlas and returns the lighting (with opacity) at the given world location (and direction).
// surfaceThreshold - Additional threshold (in world-units) between object or tile size compared with input data (error due to SDF or LOD incorrect appearance)
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, Buffer<float4> culledObjects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f)
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, ByteAddressBuffer culledObjects, Buffer<float4> objects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f)
{
float4 result = float4(0, 0, 0, 0);
@@ -178,24 +178,22 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu
}
// Read objects counter
float4 chunkHeader = culledObjects[objectsStart];
objectsStart++;
uint objectsCount = asuint(chunkHeader.x);
uint objectsCount = culledObjects.Load(objectsStart * 4);
if (objectsCount > data.ObjectsCount) // Prevents crashing - don't know why the data is invalid here (rare issue when moving fast though scene with terrain)
return result;
objectsStart++;
// Loop over culled objects inside the chunk
LOOP
for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++)
{
// Cull point vs sphere
uint objectAddress = objectsStart;
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(culledObjects, objectAddress);
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(culledObjects, objectAddress);
objectsStart += objectSize;
uint objectAddress = culledObjects.Load(objectsStart * 4);
objectsStart++;
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(objects, objectAddress);
if (distance(objectBounds.xyz, worldPosition) > objectBounds.w)
continue;
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(culledObjects, objectAddress);
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(objects, objectAddress);
float3 localPosition = mul(float4(worldPosition, 1), object.WorldToLocal).xyz;
float3 localExtent = object.Extent + surfaceThreshold;
if (any(localPosition > localExtent) || any(localPosition < -localExtent))
@@ -221,56 +219,56 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu
uint tileOffset = object.TileOffsets[localNormal.x > 0.0f ? 0 : 1];
if (localNormalSq.x > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[localNormal.y > 0.0f ? 2 : 3];
if (localNormalSq.y > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[localNormal.z > 0.0f ? 4 : 5];
if (localNormalSq.z > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
#else
uint tileOffset = object.TileOffsets[0];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[1];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[2];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[3];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[4];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[5];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
#endif

View File

@@ -187,24 +187,23 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target
#include "./Flax/Collisions.hlsl"
RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0);
RWBuffer<float4> RWGlobalSurfaceAtlasCulledObjects : register(u1);
RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1);
Buffer<float4> GlobalSurfaceAtlasObjects : register(t0);
// Compute shader for culling objects into chunks
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)]
void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint3 chunkCoord = DispatchThreadId;
uint chunkAddress = (chunkCoord.z * (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION) + chunkCoord.y * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION + chunkCoord.x) * 4;
if (chunkAddress == 0)
return; // Skip chunk at 0,0,0 (used for counter)
float3 chunkMin = GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * GlobalSurfaceAtlas.ChunkSize;
float3 chunkMax = chunkMin + GlobalSurfaceAtlas.ChunkSize;
// Count objects data size in this chunk (amount of float4s)
uint objectsSize = 0, objectAddress = 0, objectsCount = 0;
// TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reude chunk size to get smaller objects count per chunk)?
// Count objects in this chunk
uint objectAddress = 0, objectsCount = 0;
// TODO: pre-cull objects within a thread group
// TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reuse chunk size to get smaller objects count per chunk)?
LOOP
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
{
@@ -212,22 +211,21 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
{
objectsSize += objectSize;
objectsCount++;
}
objectAddress += objectSize;
}
if (objectsSize == 0)
if (objectsCount == 0)
{
// Empty chunk
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0);
return;
}
objectsSize++; // Include objects count before actual objects data
// Allocate object data size in the buffer
uint objectsStart;
RWGlobalSurfaceAtlasChunks.InterlockedAdd(0, objectsSize, objectsStart);
uint objectsSize = objectsCount + 1; // Include objects count before actual objects data
RWGlobalSurfaceAtlasCulledObjects.InterlockedAdd(0, objectsSize, objectsStart); // Counter at 0
if (objectsStart + objectsSize > CulledObjectsCapacity)
{
// Not enough space in the buffer
@@ -238,9 +236,8 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
// Write object data start
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, objectsStart);
// Write objects count before actual objects data
RWGlobalSurfaceAtlasCulledObjects[objectsStart] = float4(asfloat(objectsCount), 0, 0, 0);
objectsStart++;
// Write objects count before actual objects indices
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectsCount);
// Copy objects data in this chunk
objectAddress = 0;
@@ -251,11 +248,8 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
{
for (uint i = 0; i < objectSize; i++)
{
RWGlobalSurfaceAtlasCulledObjects[objectsStart + i] = GlobalSurfaceAtlasObjects[objectAddress + i];
}
objectsStart += objectSize;
objectsStart++;
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress);
}
objectAddress += objectSize;
}
@@ -268,10 +262,11 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
Texture3D<float> GlobalSDFTex : register(t0);
Texture3D<float> GlobalSDFMip : register(t1);
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2);
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t3);
Texture2D GlobalSurfaceAtlasDepth : register(t4);
ByteAddressBuffer GlobalSurfaceAtlasCulledObjects : register(t3);
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
Texture2D GlobalSurfaceAtlasTex : register(t5);
TextureCube Skybox : register(t6);
Texture2D GlobalSurfaceAtlasDepth : register(t6);
TextureCube Skybox : register(t7);
// Pixel shader for Global Surface Atlas debug drawing
META_PS(true, FEATURE_LEVEL_SM5)
@@ -295,7 +290,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target
{
// Sample Global Surface Atlas at the hit location
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit);
color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb;
color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb;
//color = hit.HitNormal * 0.5f + 0.5f;
}
else