Refactor DDGI irradiance sampling when nearby probe is missing to use precomputed fallback probes

Also, insert fallback/dummy probes when there is no SDF nearby to have lighting in all cases.
This both improves sampling performance and fixes issues when sampling in areas far away from valid GI.
This commit is contained in:
Wojtek Figat
2026-01-05 12:04:02 +01:00
parent 6c79a17c7a
commit c5a28a5734
4 changed files with 141 additions and 34 deletions

View File

@@ -42,6 +42,7 @@
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
@@ -217,6 +218,7 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources()
return true;
_csClassify = shader->GetCS("CS_Classify");
_csUpdateProbesInitArgs = shader->GetCS("CS_UpdateProbesInitArgs");
_csUpdateInactiveProbes = shader->GetCS("CS_UpdateInactiveProbes");
_csTraceRays[0] = shader->GetCS("CS_TraceRays", 0);
_csTraceRays[1] = shader->GetCS("CS_TraceRays", 1);
_csTraceRays[2] = shader->GetCS("CS_TraceRays", 2);
@@ -248,6 +250,7 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj)
LastFrameShaderReload = Engine::FrameCount;
_csClassify = nullptr;
_csUpdateProbesInitArgs = nullptr;
_csUpdateInactiveProbes = nullptr;
_csTraceRays[0] = nullptr;
_csTraceRays[1] = nullptr;
_csTraceRays[2] = nullptr;
@@ -590,6 +593,16 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
context->ResetUA();
}
// For inactive probes, search nearby ones to find the closest valid for quick fallback when sampling irradiance
{
PROFILE_GPU_CPU_NAMED("Update Inactive Probes");
context->BindUA(0, ddgiData.Result.ProbesData);
int32 iterations = Math::Min(probesCounts.MaxValue() - 1, DDGI_PROBE_EMPTY_AREA_DENSITY) * 10;
for (int32 i = 0; i < iterations; i++)
context->Dispatch(_csUpdateInactiveProbes, threadGroupsX, 1, 1);
context->ResetUA();
}
// Update probes in batches so ProbesTrace texture can be smaller
uint32 arg = 0;
// TODO: use rays allocator to dispatch raytracing in packets (eg. 8 threads in a group instead of hardcoded limit)

View File

@@ -45,6 +45,7 @@ private:
GPUConstantBuffer* _cb1 = nullptr;
GPUShaderProgramCS* _csClassify;
GPUShaderProgramCS* _csUpdateProbesInitArgs;
GPUShaderProgramCS* _csUpdateInactiveProbes;
GPUShaderProgramCS* _csTraceRays[4];
GPUShaderProgramCS* _csUpdateProbesIrradiance;
GPUShaderProgramCS* _csUpdateProbesDistance;

View File

@@ -26,6 +26,9 @@
#endif
#define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space
#define DDGI_DEFAULT_BIAS 0.2f // Default value for DDGI sampling bias
#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)coord / 128.0f)
#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f)
//#define DDGI_DEBUG_CASCADE 0 // Forces a specific cascade to be only in use (for debugging)
// DDGI data for a constant buffer
struct DDGIData
@@ -170,7 +173,6 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D<snorm float4> probes
// Loop over the closest probes to accumulate their contributions
float4 irradiance = float4(0, 0, 0, 0);
const int3 SearchAxes[3] = { int3(1, 0, 0), int3(0, 1, 0), int3(0, 0, 1) };
for (uint i = 0; i < 8; i++)
{
uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1;
@@ -180,33 +182,19 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D<snorm float4> probes
// Load probe position and state
float4 probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex);
uint probeState = DecodeDDGIProbeState(probeData);
uint useVisibility = true;
float minWight = 0.000001f;
if (probeState == DDGI_PROBE_STATE_INACTIVE)
{
// Search nearby probes to find any nearby GI sample
LOOP
for (int searchDistance = 1; searchDistance < 3 && probeState == DDGI_PROBE_STATE_INACTIVE; searchDistance++)
{
for (uint searchAxis = 0; searchAxis < 3; searchAxis++)
{
int searchAxisSign = probeCoordsOffset[searchAxis] ? 1 : -1;
int3 searchCoordsOffset = SearchAxes[searchAxis] * (searchAxisSign * searchDistance);
uint3 searchCoords = clamp((uint3)((int3)probeCoords + searchCoordsOffset), uint3(0, 0, 0), probeCoordsEnd);
uint searchIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, searchCoords);
float4 searchData = LoadDDGIProbeData(data, probesData, cascadeIndex, searchIndex);
uint searchState = DecodeDDGIProbeState(searchData);
if (searchState != DDGI_PROBE_STATE_INACTIVE)
{
// Use nearby probe as a fallback (visibility test might ignore it but with smooth gradient)
probeCoords = searchCoords;
probeIndex = searchIndex;
probeData = searchData;
probeState = searchState;
break;
}
}
}
if (probeState == DDGI_PROBE_STATE_INACTIVE)
continue;
// Use fallback probe that is closest to this one
uint3 fallbackCoords = DDGI_FALLBACK_COORDS_DECODE(probeData);
float fallbackToProbeDist = length((float3)probeCoords - (float3)fallbackCoords);
useVisibility = fallbackToProbeDist <= 1.0f; // Skip visibility test that blocks too far probes due to limiting max distance to 1.5 of probe spacing
if (fallbackToProbeDist > 2.0f)
minWight = 1.0f;
probeCoords = fallbackCoords;
probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, fallbackCoords);
probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex);
}
// Calculate probe position
@@ -227,7 +215,7 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D<snorm float4> probes
float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f;
// Visibility weight (Chebyshev)
if (biasedPosToProbeDist > probeDistance.x)
if (biasedPosToProbeDist > probeDistance.x && useVisibility)
{
float variance = abs(Square(probeDistance.x) - probeDistance.y);
float visibilityWeight = variance / (variance + Square(biasedPosToProbeDist - probeDistance.x));
@@ -235,7 +223,7 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D<snorm float4> probes
}
// Avoid a weight of zero
weight = max(weight, 0.000001f);
weight = max(weight, minWight);
// Adjust weight curve to inject a small portion of light
const float minWeightThreshold = 0.2f;
@@ -304,13 +292,16 @@ float sdRoundBox(float3 p, float3 b, float r)
float3 SampleDDGIIrradiance(DDGIData data, Texture2D<snorm float4> probesData, Texture2D<float4> probesDistance, Texture2D<float4> probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = DDGI_DEFAULT_BIAS, float dither = 0.0f)
{
// Select the highest cascade that contains the sample location
uint cascadeIndex = 0;
float probesSpacing = 0, cascadeWeight = 0;
float3 probesOrigin = (float3)0, probesExtent = (float3)0, biasedWorldPosition = (float3)0;
float3 viewDir = normalize(data.ViewPos - worldPosition);
#if DDGI_CASCADE_BLEND_SMOOTH
dither = 0.0f;
#endif
#ifdef DDGI_DEBUG_CASCADE
uint cascadeIndex = DDGI_DEBUG_CASCADE;
#else
uint cascadeIndex = 0;
for (; cascadeIndex < data.CascadesCount; cascadeIndex++)
{
// Get cascade data
@@ -327,6 +318,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<snorm float4> probesData, T
if (cascadeWeight > dither)
break;
}
#endif
if (cascadeIndex == data.CascadesCount)
return data.FallbackIrradiance;
@@ -335,7 +327,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<snorm float4> probesData, T
// Blend with the next cascade (or fallback irradiance outside the volume)
cascadeIndex++;
#if DDGI_CASCADE_BLEND_SMOOTH
#if DDGI_CASCADE_BLEND_SMOOTH && !defined(DDGI_DEBUG_CASCADE)
result *= cascadeWeight;
if (cascadeIndex < data.CascadesCount && cascadeWeight < 0.99f)
{

View File

@@ -27,6 +27,7 @@
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
#define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position
#define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area
#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
@@ -115,6 +116,14 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w;
float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords);
#ifdef DDGI_DEBUG_CASCADE
// Single cascade-only debugging
if (CascadeIndex != DDGI_DEBUG_CASCADE)
{
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f);
return;
}
#else
// Disable probes that are is in the range of higher-quality cascade
if (CascadeIndex > 0)
{
@@ -126,11 +135,11 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin));
if (prevCascadeWeight > 0.1f)
{
// Disable probe
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f);
return;
}
}
#endif
// Check if probe was scrolled
int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz;
@@ -174,9 +183,18 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.8f;
float distanceLimit = probesSpacing * ProbesDistanceLimits[CascadeIndex];
float relocateLimit = probesSpacing * ProbesRelocateLimits[CascadeIndex];
if (sdfDst > distanceLimit + length(probeOffset)) // Probe is too far from geometry (or deep inside)
uint3 probeCoordsStable = GetDDGIProbeCoords(DDGI, probeIndex);
if (sdf > probesSpacing * DDGI.ProbesCounts.x * 0.3f &&
probeCoordsStable.x % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.y % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.z % DDGI_PROBE_EMPTY_AREA_DENSITY == 0)
{
// Disable it
// Addd some fallback probes in empty areas to provide valid GI for nearby dynamic objects or transparency
probeOffset = float3(0, 0, 0);
probeState = wasScrolled || probeStateOld == DDGI_PROBE_STATE_INACTIVE ? DDGI_PROBE_STATE_ACTIVATED : DDGI_PROBE_STATE_ACTIVE;
probeAttention = DDGI_PROBE_ATTENTION_MIN;
}
else if (sdfDst > distanceLimit + length(probeOffset))
{
// Probe is too far from geometry (or deep inside) so disable it
probeOffset = float3(0, 0, 0);
probeState = DDGI_PROBE_STATE_INACTIVE;
probeAttention = 0.0f;
@@ -197,6 +215,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX);
// Relocate only if probe location is not good enough
BRANCH
if (sdf <= voxelLimit)
{
#if DDGI_PROBE_RELOCATE_ITERATIVE
@@ -268,6 +287,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE;
bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f;
#if DDGI_PROBE_RELOCATE_FIND_BEST || DDGI_PROBE_RELOCATE_ITERATIVE
BRANCH
if (wasRelocated && !wasActivated)
{
// If probe was relocated but the previous location is visible from the new one, then don't re-activate it for smoother blend
@@ -326,6 +346,87 @@ void CS_UpdateProbesInitArgs()
#endif
#ifdef _CS_UpdateInactiveProbes
globallycoherent RWTexture2D<snorm float4> RWProbesData : register(u0);
void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, uint3 probeCoords, int3 probeCoordsEnd, int3 offset)
{
uint3 nearbyCoords = (uint3)clamp(((int3)probeCoords + offset), int3(0, 0, 0), probeCoordsEnd);
uint nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyCoords);
float4 nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)];
uint nearbyState = DecodeDDGIProbeState(nearbyData);
uint3 nearbyFallbackCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData);
if (nearbyState != DDGI_PROBE_STATE_INACTIVE)
{
// Use nearby probe
fallbackCoords = nearbyCoords;
probeState = nearbyState;
}
// TODO: optimize distance check with squared dst comparision
else if (distance((float3)nearbyFallbackCoords, (float3)probeCoords) < distance((float3)fallbackCoords, (float3)probeCoords))
{
// Check if fallback probe is actually active (not some leftover memory)
nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyFallbackCoords);
nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)];
nearbyState = DecodeDDGIProbeState(nearbyData);
if (nearbyState != DDGI_PROBE_STATE_INACTIVE)
{
// Use fallback of the nearby probe
fallbackCoords = nearbyFallbackCoords;
probeState = DDGI_PROBE_STATE_ACTIVE;
}
}
}
// Compute shader to store closest valid probe coords inside inactive probes data for quick fallback lookup when sampling irradiance.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)]
void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint probeIndex = min(DispatchThreadId.x, ProbesCount - 1);
uint3 fallbackCoords = uint3(1000, 1000, 1000);
// Load probe data for the current thread
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex);
float4 probeData = RWProbesData[probeDataCoords];
uint probeState = DecodeDDGIProbeState(probeData);
if (probeState == DDGI_PROBE_STATE_INACTIVE)
{
// Find the closest active probe (flood fill)
int3 probeCoordsEnd = (int3)DDGI.ProbesCounts - int3(1, 1, 1);
// Corners
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, 1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, 1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, 1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, 1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, -1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, -1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, -1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, -1));
// Sides
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 0, 0));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 1, 0));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, 1));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 0, 0));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, -1, 0));
CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, -1));
}
// Ensure all threads (within dispatch) got proepr data before writing back to the same memory
DeviceMemoryBarrierWithGroupSync();
// Write modified probe data back (remain inactive)
if (probeState != DDGI_PROBE_STATE_INACTIVE && DispatchThreadId.x < ProbesCount && fallbackCoords.x != 1000)
{
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(DDGI_FALLBACK_COORDS_ENCODE(fallbackCoords), DDGI_PROBE_STATE_INACTIVE, 0.0f);
}
}
#endif
#ifdef _CS_TraceRays
RWTexture2D<float4> RWProbesTrace : register(u0);
@@ -644,7 +745,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
// Add distance (R), distance^2 (G) and weight (A)
float rayDistance = CachedProbesTraceDistance[rayIndex];
result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight);
result += float4(rayDistance, rayDistance * rayDistance, 0.0f, 1.0f) * rayWeight;
#endif
}