From c5a28a5734ff9a132b278efd23a08a8cdd56e3da Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 5 Jan 2026 12:04:02 +0100 Subject: [PATCH] Refactor DDGI irradiance sampling when nearby probe is missing to use precomputed fallback probes Also, insert fallback/dummy probes when there is no SDF nearby to have lighting in all cases. This both improves sampling performance and fixes issues when sampling in areas far away from valid GI. --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 13 +++ .../GI/DynamicDiffuseGlobalIllumination.h | 1 + Source/Shaders/GI/DDGI.hlsl | 52 ++++----- Source/Shaders/GI/DDGI.shader | 109 +++++++++++++++++- 4 files changed, 141 insertions(+), 34 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 0ccb9aa02..a85ba6342 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -42,6 +42,7 @@ #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 +#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -217,6 +218,7 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources() return true; _csClassify = shader->GetCS("CS_Classify"); _csUpdateProbesInitArgs = shader->GetCS("CS_UpdateProbesInitArgs"); + _csUpdateInactiveProbes = shader->GetCS("CS_UpdateInactiveProbes"); _csTraceRays[0] = shader->GetCS("CS_TraceRays", 0); _csTraceRays[1] = shader->GetCS("CS_TraceRays", 1); _csTraceRays[2] = shader->GetCS("CS_TraceRays", 2); @@ -248,6 +250,7 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj) LastFrameShaderReload = Engine::FrameCount; _csClassify = nullptr; _csUpdateProbesInitArgs = nullptr; + _csUpdateInactiveProbes = nullptr; _csTraceRays[0] = nullptr; _csTraceRays[1] = nullptr; _csTraceRays[2] = nullptr; @@ -590,6 +593,16 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->ResetUA(); } + // For inactive probes, search nearby ones to find the closest valid for quick fallback when sampling irradiance + { + PROFILE_GPU_CPU_NAMED("Update Inactive Probes"); + context->BindUA(0, ddgiData.Result.ProbesData); + int32 iterations = Math::Min(probesCounts.MaxValue() - 1, DDGI_PROBE_EMPTY_AREA_DENSITY) * 10; + for (int32 i = 0; i < iterations; i++) + context->Dispatch(_csUpdateInactiveProbes, threadGroupsX, 1, 1); + context->ResetUA(); + } + // Update probes in batches so ProbesTrace texture can be smaller uint32 arg = 0; // TODO: use rays allocator to dispatch raytracing in packets (eg. 8 threads in a group instead of hardcoded limit) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index 123a47c6d..85478261f 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -45,6 +45,7 @@ private: GPUConstantBuffer* _cb1 = nullptr; GPUShaderProgramCS* _csClassify; GPUShaderProgramCS* _csUpdateProbesInitArgs; + GPUShaderProgramCS* _csUpdateInactiveProbes; GPUShaderProgramCS* _csTraceRays[4]; GPUShaderProgramCS* _csUpdateProbesIrradiance; GPUShaderProgramCS* _csUpdateProbesDistance; diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index e70454c2b..a7de92577 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -26,6 +26,9 @@ #endif #define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space #define DDGI_DEFAULT_BIAS 0.2f // Default value for DDGI sampling bias +#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)coord / 128.0f) +#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f) +//#define DDGI_DEBUG_CASCADE 0 // Forces a specific cascade to be only in use (for debugging) // DDGI data for a constant buffer struct DDGIData @@ -170,7 +173,6 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes // Loop over the closest probes to accumulate their contributions float4 irradiance = float4(0, 0, 0, 0); - const int3 SearchAxes[3] = { int3(1, 0, 0), int3(0, 1, 0), int3(0, 0, 1) }; for (uint i = 0; i < 8; i++) { uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; @@ -180,33 +182,19 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes // Load probe position and state float4 probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); uint probeState = DecodeDDGIProbeState(probeData); + uint useVisibility = true; + float minWight = 0.000001f; if (probeState == DDGI_PROBE_STATE_INACTIVE) { - // Search nearby probes to find any nearby GI sample - LOOP - for (int searchDistance = 1; searchDistance < 3 && probeState == DDGI_PROBE_STATE_INACTIVE; searchDistance++) - { - for (uint searchAxis = 0; searchAxis < 3; searchAxis++) - { - int searchAxisSign = probeCoordsOffset[searchAxis] ? 1 : -1; - int3 searchCoordsOffset = SearchAxes[searchAxis] * (searchAxisSign * searchDistance); - uint3 searchCoords = clamp((uint3)((int3)probeCoords + searchCoordsOffset), uint3(0, 0, 0), probeCoordsEnd); - uint searchIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, searchCoords); - float4 searchData = LoadDDGIProbeData(data, probesData, cascadeIndex, searchIndex); - uint searchState = DecodeDDGIProbeState(searchData); - if (searchState != DDGI_PROBE_STATE_INACTIVE) - { - // Use nearby probe as a fallback (visibility test might ignore it but with smooth gradient) - probeCoords = searchCoords; - probeIndex = searchIndex; - probeData = searchData; - probeState = searchState; - break; - } - } - } - if (probeState == DDGI_PROBE_STATE_INACTIVE) - continue; + // Use fallback probe that is closest to this one + uint3 fallbackCoords = DDGI_FALLBACK_COORDS_DECODE(probeData); + float fallbackToProbeDist = length((float3)probeCoords - (float3)fallbackCoords); + useVisibility = fallbackToProbeDist <= 1.0f; // Skip visibility test that blocks too far probes due to limiting max distance to 1.5 of probe spacing + if (fallbackToProbeDist > 2.0f) + minWight = 1.0f; + probeCoords = fallbackCoords; + probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, fallbackCoords); + probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); } // Calculate probe position @@ -227,7 +215,7 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f; // Visibility weight (Chebyshev) - if (biasedPosToProbeDist > probeDistance.x) + if (biasedPosToProbeDist > probeDistance.x && useVisibility) { float variance = abs(Square(probeDistance.x) - probeDistance.y); float visibilityWeight = variance / (variance + Square(biasedPosToProbeDist - probeDistance.x)); @@ -235,7 +223,7 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes } // Avoid a weight of zero - weight = max(weight, 0.000001f); + weight = max(weight, minWight); // Adjust weight curve to inject a small portion of light const float minWeightThreshold = 0.2f; @@ -304,13 +292,16 @@ float sdRoundBox(float3 p, float3 b, float r) float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = DDGI_DEFAULT_BIAS, float dither = 0.0f) { // Select the highest cascade that contains the sample location - uint cascadeIndex = 0; float probesSpacing = 0, cascadeWeight = 0; float3 probesOrigin = (float3)0, probesExtent = (float3)0, biasedWorldPosition = (float3)0; float3 viewDir = normalize(data.ViewPos - worldPosition); #if DDGI_CASCADE_BLEND_SMOOTH dither = 0.0f; #endif +#ifdef DDGI_DEBUG_CASCADE + uint cascadeIndex = DDGI_DEBUG_CASCADE; +#else + uint cascadeIndex = 0; for (; cascadeIndex < data.CascadesCount; cascadeIndex++) { // Get cascade data @@ -327,6 +318,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T if (cascadeWeight > dither) break; } +#endif if (cascadeIndex == data.CascadesCount) return data.FallbackIrradiance; @@ -335,7 +327,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Blend with the next cascade (or fallback irradiance outside the volume) cascadeIndex++; -#if DDGI_CASCADE_BLEND_SMOOTH +#if DDGI_CASCADE_BLEND_SMOOTH && !defined(DDGI_DEBUG_CASCADE) result *= cascadeWeight; if (cascadeIndex < data.CascadesCount && cascadeWeight < 0.99f) { diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 6f68cef34..d12803430 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -27,6 +27,7 @@ #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area +#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -115,6 +116,14 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords); +#ifdef DDGI_DEBUG_CASCADE + // Single cascade-only debugging + if (CascadeIndex != DDGI_DEBUG_CASCADE) + { + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); + return; + } +#else // Disable probes that are is in the range of higher-quality cascade if (CascadeIndex > 0) { @@ -126,11 +135,11 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin)); if (prevCascadeWeight > 0.1f) { - // Disable probe RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); return; } } +#endif // Check if probe was scrolled int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz; @@ -174,9 +183,18 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.8f; float distanceLimit = probesSpacing * ProbesDistanceLimits[CascadeIndex]; float relocateLimit = probesSpacing * ProbesRelocateLimits[CascadeIndex]; - if (sdfDst > distanceLimit + length(probeOffset)) // Probe is too far from geometry (or deep inside) + uint3 probeCoordsStable = GetDDGIProbeCoords(DDGI, probeIndex); + if (sdf > probesSpacing * DDGI.ProbesCounts.x * 0.3f && + probeCoordsStable.x % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.y % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.z % DDGI_PROBE_EMPTY_AREA_DENSITY == 0) { - // Disable it + // Addd some fallback probes in empty areas to provide valid GI for nearby dynamic objects or transparency + probeOffset = float3(0, 0, 0); + probeState = wasScrolled || probeStateOld == DDGI_PROBE_STATE_INACTIVE ? DDGI_PROBE_STATE_ACTIVATED : DDGI_PROBE_STATE_ACTIVE; + probeAttention = DDGI_PROBE_ATTENTION_MIN; + } + else if (sdfDst > distanceLimit + length(probeOffset)) + { + // Probe is too far from geometry (or deep inside) so disable it probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; probeAttention = 0.0f; @@ -197,6 +215,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX); // Relocate only if probe location is not good enough + BRANCH if (sdf <= voxelLimit) { #if DDGI_PROBE_RELOCATE_ITERATIVE @@ -268,6 +287,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE; bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f; #if DDGI_PROBE_RELOCATE_FIND_BEST || DDGI_PROBE_RELOCATE_ITERATIVE + BRANCH if (wasRelocated && !wasActivated) { // If probe was relocated but the previous location is visible from the new one, then don't re-activate it for smoother blend @@ -326,6 +346,87 @@ void CS_UpdateProbesInitArgs() #endif +#ifdef _CS_UpdateInactiveProbes + +globallycoherent RWTexture2D RWProbesData : register(u0); + +void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, uint3 probeCoords, int3 probeCoordsEnd, int3 offset) +{ + uint3 nearbyCoords = (uint3)clamp(((int3)probeCoords + offset), int3(0, 0, 0), probeCoordsEnd); + uint nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyCoords); + float4 nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)]; + uint nearbyState = DecodeDDGIProbeState(nearbyData); + uint3 nearbyFallbackCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData); + if (nearbyState != DDGI_PROBE_STATE_INACTIVE) + { + // Use nearby probe + fallbackCoords = nearbyCoords; + probeState = nearbyState; + } + // TODO: optimize distance check with squared dst comparision + else if (distance((float3)nearbyFallbackCoords, (float3)probeCoords) < distance((float3)fallbackCoords, (float3)probeCoords)) + { + // Check if fallback probe is actually active (not some leftover memory) + nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyFallbackCoords); + nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)]; + nearbyState = DecodeDDGIProbeState(nearbyData); + if (nearbyState != DDGI_PROBE_STATE_INACTIVE) + { + // Use fallback of the nearby probe + fallbackCoords = nearbyFallbackCoords; + probeState = DDGI_PROBE_STATE_ACTIVE; + } + } +} + +// Compute shader to store closest valid probe coords inside inactive probes data for quick fallback lookup when sampling irradiance. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)] +void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID) +{ + uint probeIndex = min(DispatchThreadId.x, ProbesCount - 1); + uint3 fallbackCoords = uint3(1000, 1000, 1000); + + // Load probe data for the current thread + uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); + probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); + float4 probeData = RWProbesData[probeDataCoords]; + uint probeState = DecodeDDGIProbeState(probeData); + if (probeState == DDGI_PROBE_STATE_INACTIVE) + { + // Find the closest active probe (flood fill) + int3 probeCoordsEnd = (int3)DDGI.ProbesCounts - int3(1, 1, 1); + // Corners + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, -1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, -1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, -1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, -1)); + // Sides + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 0, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 1, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 0, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, -1, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, -1)); + } + + // Ensure all threads (within dispatch) got proepr data before writing back to the same memory + DeviceMemoryBarrierWithGroupSync(); + + // Write modified probe data back (remain inactive) + if (probeState != DDGI_PROBE_STATE_INACTIVE && DispatchThreadId.x < ProbesCount && fallbackCoords.x != 1000) + { + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(DDGI_FALLBACK_COORDS_ENCODE(fallbackCoords), DDGI_PROBE_STATE_INACTIVE, 0.0f); + } +} + +#endif + #ifdef _CS_TraceRays RWTexture2D RWProbesTrace : register(u0); @@ -644,7 +745,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ // Add distance (R), distance^2 (G) and weight (A) float rayDistance = CachedProbesTraceDistance[rayIndex]; - result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight); + result += float4(rayDistance, rayDistance * rayDistance, 0.0f, 1.0f) * rayWeight; #endif }