Optimize inactive probes update in DDGI with faster Jump Flood instead of Flood Fill
This commit is contained in:
@@ -42,7 +42,7 @@
|
||||
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
|
||||
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
|
||||
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
|
||||
#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency
|
||||
#define DDGI_PROBE_EMPTY_AREA_DENSITY 8 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency
|
||||
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
|
||||
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
|
||||
|
||||
@@ -76,7 +76,8 @@ GPU_CB_STRUCT(Data0 {
|
||||
|
||||
GPU_CB_STRUCT(Data1 {
|
||||
// TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD
|
||||
Float2 Padding2;
|
||||
float Padding2;
|
||||
int32 StepSize;
|
||||
uint32 CascadeIndex;
|
||||
uint32 ProbeIndexOffset;
|
||||
});
|
||||
@@ -594,10 +595,17 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
||||
// For inactive probes, search nearby ones to find the closest valid for quick fallback when sampling irradiance
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("Update Inactive Probes");
|
||||
// TODO: this could run within GPUComputePass during Trace Rays or Update Probes to overlap compute works
|
||||
context->BindUA(0, ddgiData.Result.ProbesData);
|
||||
int32 iterations = Math::Min(probesCounts.MaxValue() - 1, DDGI_PROBE_EMPTY_AREA_DENSITY);
|
||||
for (int32 i = 0; i < iterations; i++)
|
||||
Data1 data;
|
||||
data.CascadeIndex = cascadeIndex;
|
||||
int32 iterations = Math::CeilToInt(Math::Log2((float)Math::Min(probesCounts.MaxValue(), DDGI_PROBE_EMPTY_AREA_DENSITY) + 1.0f));
|
||||
for (int32 i = iterations - 1; i >= 0; i--)
|
||||
{
|
||||
data.StepSize = Math::FloorToInt(Math::Pow(2, (float)i) + 0.5f); // Jump Flood step size
|
||||
context->UpdateCB(_cb1, &data);
|
||||
context->Dispatch(_csUpdateInactiveProbes, threadGroupsX, 1, 1);
|
||||
}
|
||||
context->ResetUA();
|
||||
}
|
||||
|
||||
|
||||
@@ -26,8 +26,9 @@
|
||||
#endif
|
||||
#define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space
|
||||
#define DDGI_DEFAULT_BIAS 0.2f // Default value for DDGI sampling bias
|
||||
#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)coord / 128.0f)
|
||||
#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f)
|
||||
#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)(coord + 1) / 128.0f)
|
||||
#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f - 1)
|
||||
#define DDGI_FALLBACK_COORDS_VALID(data) (length(data.xyz) > 0)
|
||||
//#define DDGI_DEBUG_CASCADE 0 // Forces a specific cascade to be only in use (for debugging)
|
||||
|
||||
// DDGI data for a constant buffer
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
|
||||
#define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position
|
||||
#define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area
|
||||
#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency
|
||||
#define DDGI_PROBE_EMPTY_AREA_DENSITY 8 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency
|
||||
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
|
||||
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
|
||||
|
||||
@@ -49,7 +49,8 @@ uint FrameIndexMod8;
|
||||
META_CB_END
|
||||
|
||||
META_CB_BEGIN(1, Data1)
|
||||
float2 Padding2;
|
||||
float Padding2;
|
||||
int StepSize;
|
||||
uint CascadeIndex;
|
||||
uint ProbeIndexOffset;
|
||||
META_CB_END
|
||||
@@ -364,38 +365,35 @@ void CS_UpdateProbesInitArgs()
|
||||
|
||||
#ifdef _CS_UpdateInactiveProbes
|
||||
|
||||
globallycoherent RWTexture2D<snorm float4> RWProbesData : register(u0);
|
||||
RWTexture2D<snorm float4> RWProbesData : register(u0);
|
||||
|
||||
void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, uint3 probeCoords, int3 probeCoordsEnd, int3 offset)
|
||||
void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, inout float minDistance, uint3 probeCoords, int3 probeCoordsEnd, int3 offset)
|
||||
{
|
||||
uint3 nearbyCoords = (uint3)clamp(((int3)probeCoords + offset), int3(0, 0, 0), probeCoordsEnd);
|
||||
uint nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyCoords);
|
||||
float4 nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)];
|
||||
uint nearbyState = DecodeDDGIProbeState(nearbyData);
|
||||
uint3 nearbyFallbackCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData);
|
||||
if (nearbyState != DDGI_PROBE_STATE_INACTIVE)
|
||||
float nearbyDist = distance((float3)nearbyCoords, (float3)probeCoords);
|
||||
if (DecodeDDGIProbeState(nearbyData) != DDGI_PROBE_STATE_INACTIVE && nearbyDist < minDistance)
|
||||
{
|
||||
// Use nearby probe
|
||||
fallbackCoords = nearbyCoords;
|
||||
probeState = nearbyState;
|
||||
probeState = DDGI_PROBE_STATE_ACTIVE;
|
||||
minDistance = nearbyDist;
|
||||
return;
|
||||
}
|
||||
// TODO: optimize distance check with squared dst comparision
|
||||
else if (distance((float3)nearbyFallbackCoords, (float3)probeCoords) < distance((float3)fallbackCoords, (float3)probeCoords))
|
||||
nearbyCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData);
|
||||
nearbyDist = distance((float3)nearbyCoords, (float3)probeCoords);
|
||||
if (DDGI_FALLBACK_COORDS_VALID(nearbyData) && nearbyDist < minDistance)
|
||||
{
|
||||
// Check if fallback probe is actually active (not some leftover memory)
|
||||
nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyFallbackCoords);
|
||||
nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)];
|
||||
nearbyState = DecodeDDGIProbeState(nearbyData);
|
||||
if (nearbyState != DDGI_PROBE_STATE_INACTIVE)
|
||||
{
|
||||
// Use fallback of the nearby probe
|
||||
fallbackCoords = nearbyFallbackCoords;
|
||||
probeState = DDGI_PROBE_STATE_ACTIVE;
|
||||
}
|
||||
// Use fallback probe
|
||||
fallbackCoords = nearbyCoords;
|
||||
probeState = DDGI_PROBE_STATE_ACTIVE;
|
||||
minDistance = nearbyDist;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute shader to store closest valid probe coords inside inactive probes data for quick fallback lookup when sampling irradiance.
|
||||
// Uses Jump Flood algorithm.
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)]
|
||||
void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
@@ -409,32 +407,26 @@ void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex);
|
||||
float4 probeData = RWProbesData[probeDataCoords];
|
||||
uint probeState = DecodeDDGIProbeState(probeData);
|
||||
BRANCH
|
||||
if (probeState == DDGI_PROBE_STATE_INACTIVE)
|
||||
{
|
||||
// Find the closest active probe (flood fill)
|
||||
// Find the closest active probe (Jump Flood)
|
||||
int3 probeCoordsEnd = (int3)DDGI.ProbesCounts - int3(1, 1, 1);
|
||||
// Corners
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, 1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, 1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, 1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, 1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, -1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, -1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, -1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, -1));
|
||||
// Sides
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 0, 0));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 1, 0));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, 1));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 0, 0));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, -1, 0));
|
||||
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, -1));
|
||||
float minDistance = 1e27f;
|
||||
UNROLL for (int z = -1; z <= 1; z++)
|
||||
UNROLL for (int y = -1; y <= 1; y++)
|
||||
UNROLL for (int x = -1; x <= 1; x++)
|
||||
{
|
||||
int3 offset = int3(x, y, z) * StepSize;
|
||||
CheckNearbyProbe(fallbackCoords, probeState, minDistance, probeCoords, probeCoordsEnd, offset);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure all threads (within dispatch) got proepr data before writing back to the same memory
|
||||
DeviceMemoryBarrierWithGroupSync();
|
||||
// Ensure all threads (within dispatch) got proper data before writing back to the same memory
|
||||
AllMemoryBarrierWithGroupSync();
|
||||
|
||||
// Write modified probe data back (remain inactive)
|
||||
BRANCH
|
||||
if (probeState != DDGI_PROBE_STATE_INACTIVE && DispatchThreadId.x < ProbesCount && fallbackCoords.x != 1000)
|
||||
{
|
||||
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(DDGI_FALLBACK_COORDS_ENCODE(fallbackCoords), DDGI_PROBE_STATE_INACTIVE, 0.0f);
|
||||
|
||||
Reference in New Issue
Block a user