diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 0ccb9aa02..a85ba6342 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -42,6 +42,7 @@ #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 +#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -217,6 +218,7 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources() return true; _csClassify = shader->GetCS("CS_Classify"); _csUpdateProbesInitArgs = shader->GetCS("CS_UpdateProbesInitArgs"); + _csUpdateInactiveProbes = shader->GetCS("CS_UpdateInactiveProbes"); _csTraceRays[0] = shader->GetCS("CS_TraceRays", 0); _csTraceRays[1] = shader->GetCS("CS_TraceRays", 1); _csTraceRays[2] = shader->GetCS("CS_TraceRays", 2); @@ -248,6 +250,7 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj) LastFrameShaderReload = Engine::FrameCount; _csClassify = nullptr; _csUpdateProbesInitArgs = nullptr; + _csUpdateInactiveProbes = nullptr; _csTraceRays[0] = nullptr; _csTraceRays[1] = nullptr; _csTraceRays[2] = nullptr; @@ -590,6 +593,16 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->ResetUA(); } + // For inactive probes, search nearby ones to find the closest valid for quick fallback when sampling irradiance + { + PROFILE_GPU_CPU_NAMED("Update Inactive Probes"); + context->BindUA(0, ddgiData.Result.ProbesData); + int32 iterations = Math::Min(probesCounts.MaxValue() - 1, DDGI_PROBE_EMPTY_AREA_DENSITY) * 10; + for (int32 i = 0; i < iterations; i++) + context->Dispatch(_csUpdateInactiveProbes, threadGroupsX, 1, 1); + context->ResetUA(); + } + // Update probes in batches so ProbesTrace texture can be smaller uint32 arg = 0; // TODO: use rays allocator to dispatch raytracing in packets (eg. 8 threads in a group instead of hardcoded limit) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index 123a47c6d..85478261f 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -45,6 +45,7 @@ private: GPUConstantBuffer* _cb1 = nullptr; GPUShaderProgramCS* _csClassify; GPUShaderProgramCS* _csUpdateProbesInitArgs; + GPUShaderProgramCS* _csUpdateInactiveProbes; GPUShaderProgramCS* _csTraceRays[4]; GPUShaderProgramCS* _csUpdateProbesIrradiance; GPUShaderProgramCS* _csUpdateProbesDistance; diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index e70454c2b..a7de92577 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -26,6 +26,9 @@ #endif #define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space #define DDGI_DEFAULT_BIAS 0.2f // Default value for DDGI sampling bias +#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)coord / 128.0f) +#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f) +//#define DDGI_DEBUG_CASCADE 0 // Forces a specific cascade to be only in use (for debugging) // DDGI data for a constant buffer struct DDGIData @@ -170,7 +173,6 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes // Loop over the closest probes to accumulate their contributions float4 irradiance = float4(0, 0, 0, 0); - const int3 SearchAxes[3] = { int3(1, 0, 0), int3(0, 1, 0), int3(0, 0, 1) }; for (uint i = 0; i < 8; i++) { uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; @@ -180,33 +182,19 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes // Load probe position and state float4 probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); uint probeState = DecodeDDGIProbeState(probeData); + uint useVisibility = true; + float minWight = 0.000001f; if (probeState == DDGI_PROBE_STATE_INACTIVE) { - // Search nearby probes to find any nearby GI sample - LOOP - for (int searchDistance = 1; searchDistance < 3 && probeState == DDGI_PROBE_STATE_INACTIVE; searchDistance++) - { - for (uint searchAxis = 0; searchAxis < 3; searchAxis++) - { - int searchAxisSign = probeCoordsOffset[searchAxis] ? 1 : -1; - int3 searchCoordsOffset = SearchAxes[searchAxis] * (searchAxisSign * searchDistance); - uint3 searchCoords = clamp((uint3)((int3)probeCoords + searchCoordsOffset), uint3(0, 0, 0), probeCoordsEnd); - uint searchIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, searchCoords); - float4 searchData = LoadDDGIProbeData(data, probesData, cascadeIndex, searchIndex); - uint searchState = DecodeDDGIProbeState(searchData); - if (searchState != DDGI_PROBE_STATE_INACTIVE) - { - // Use nearby probe as a fallback (visibility test might ignore it but with smooth gradient) - probeCoords = searchCoords; - probeIndex = searchIndex; - probeData = searchData; - probeState = searchState; - break; - } - } - } - if (probeState == DDGI_PROBE_STATE_INACTIVE) - continue; + // Use fallback probe that is closest to this one + uint3 fallbackCoords = DDGI_FALLBACK_COORDS_DECODE(probeData); + float fallbackToProbeDist = length((float3)probeCoords - (float3)fallbackCoords); + useVisibility = fallbackToProbeDist <= 1.0f; // Skip visibility test that blocks too far probes due to limiting max distance to 1.5 of probe spacing + if (fallbackToProbeDist > 2.0f) + minWight = 1.0f; + probeCoords = fallbackCoords; + probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, fallbackCoords); + probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); } // Calculate probe position @@ -227,7 +215,7 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f; // Visibility weight (Chebyshev) - if (biasedPosToProbeDist > probeDistance.x) + if (biasedPosToProbeDist > probeDistance.x && useVisibility) { float variance = abs(Square(probeDistance.x) - probeDistance.y); float visibilityWeight = variance / (variance + Square(biasedPosToProbeDist - probeDistance.x)); @@ -235,7 +223,7 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes } // Avoid a weight of zero - weight = max(weight, 0.000001f); + weight = max(weight, minWight); // Adjust weight curve to inject a small portion of light const float minWeightThreshold = 0.2f; @@ -304,13 +292,16 @@ float sdRoundBox(float3 p, float3 b, float r) float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = DDGI_DEFAULT_BIAS, float dither = 0.0f) { // Select the highest cascade that contains the sample location - uint cascadeIndex = 0; float probesSpacing = 0, cascadeWeight = 0; float3 probesOrigin = (float3)0, probesExtent = (float3)0, biasedWorldPosition = (float3)0; float3 viewDir = normalize(data.ViewPos - worldPosition); #if DDGI_CASCADE_BLEND_SMOOTH dither = 0.0f; #endif +#ifdef DDGI_DEBUG_CASCADE + uint cascadeIndex = DDGI_DEBUG_CASCADE; +#else + uint cascadeIndex = 0; for (; cascadeIndex < data.CascadesCount; cascadeIndex++) { // Get cascade data @@ -327,6 +318,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T if (cascadeWeight > dither) break; } +#endif if (cascadeIndex == data.CascadesCount) return data.FallbackIrradiance; @@ -335,7 +327,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T // Blend with the next cascade (or fallback irradiance outside the volume) cascadeIndex++; -#if DDGI_CASCADE_BLEND_SMOOTH +#if DDGI_CASCADE_BLEND_SMOOTH && !defined(DDGI_DEBUG_CASCADE) result *= cascadeWeight; if (cascadeIndex < data.CascadesCount && cascadeWeight < 0.99f) { diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 6f68cef34..d12803430 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -27,6 +27,7 @@ #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area +#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -115,6 +116,14 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords); +#ifdef DDGI_DEBUG_CASCADE + // Single cascade-only debugging + if (CascadeIndex != DDGI_DEBUG_CASCADE) + { + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); + return; + } +#else // Disable probes that are is in the range of higher-quality cascade if (CascadeIndex > 0) { @@ -126,11 +135,11 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin)); if (prevCascadeWeight > 0.1f) { - // Disable probe RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); return; } } +#endif // Check if probe was scrolled int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz; @@ -174,9 +183,18 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.8f; float distanceLimit = probesSpacing * ProbesDistanceLimits[CascadeIndex]; float relocateLimit = probesSpacing * ProbesRelocateLimits[CascadeIndex]; - if (sdfDst > distanceLimit + length(probeOffset)) // Probe is too far from geometry (or deep inside) + uint3 probeCoordsStable = GetDDGIProbeCoords(DDGI, probeIndex); + if (sdf > probesSpacing * DDGI.ProbesCounts.x * 0.3f && + probeCoordsStable.x % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.y % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.z % DDGI_PROBE_EMPTY_AREA_DENSITY == 0) { - // Disable it + // Addd some fallback probes in empty areas to provide valid GI for nearby dynamic objects or transparency + probeOffset = float3(0, 0, 0); + probeState = wasScrolled || probeStateOld == DDGI_PROBE_STATE_INACTIVE ? DDGI_PROBE_STATE_ACTIVATED : DDGI_PROBE_STATE_ACTIVE; + probeAttention = DDGI_PROBE_ATTENTION_MIN; + } + else if (sdfDst > distanceLimit + length(probeOffset)) + { + // Probe is too far from geometry (or deep inside) so disable it probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; probeAttention = 0.0f; @@ -197,6 +215,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX); // Relocate only if probe location is not good enough + BRANCH if (sdf <= voxelLimit) { #if DDGI_PROBE_RELOCATE_ITERATIVE @@ -268,6 +287,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE; bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f; #if DDGI_PROBE_RELOCATE_FIND_BEST || DDGI_PROBE_RELOCATE_ITERATIVE + BRANCH if (wasRelocated && !wasActivated) { // If probe was relocated but the previous location is visible from the new one, then don't re-activate it for smoother blend @@ -326,6 +346,87 @@ void CS_UpdateProbesInitArgs() #endif +#ifdef _CS_UpdateInactiveProbes + +globallycoherent RWTexture2D RWProbesData : register(u0); + +void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, uint3 probeCoords, int3 probeCoordsEnd, int3 offset) +{ + uint3 nearbyCoords = (uint3)clamp(((int3)probeCoords + offset), int3(0, 0, 0), probeCoordsEnd); + uint nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyCoords); + float4 nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)]; + uint nearbyState = DecodeDDGIProbeState(nearbyData); + uint3 nearbyFallbackCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData); + if (nearbyState != DDGI_PROBE_STATE_INACTIVE) + { + // Use nearby probe + fallbackCoords = nearbyCoords; + probeState = nearbyState; + } + // TODO: optimize distance check with squared dst comparision + else if (distance((float3)nearbyFallbackCoords, (float3)probeCoords) < distance((float3)fallbackCoords, (float3)probeCoords)) + { + // Check if fallback probe is actually active (not some leftover memory) + nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyFallbackCoords); + nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)]; + nearbyState = DecodeDDGIProbeState(nearbyData); + if (nearbyState != DDGI_PROBE_STATE_INACTIVE) + { + // Use fallback of the nearby probe + fallbackCoords = nearbyFallbackCoords; + probeState = DDGI_PROBE_STATE_ACTIVE; + } + } +} + +// Compute shader to store closest valid probe coords inside inactive probes data for quick fallback lookup when sampling irradiance. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)] +void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID) +{ + uint probeIndex = min(DispatchThreadId.x, ProbesCount - 1); + uint3 fallbackCoords = uint3(1000, 1000, 1000); + + // Load probe data for the current thread + uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); + probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); + float4 probeData = RWProbesData[probeDataCoords]; + uint probeState = DecodeDDGIProbeState(probeData); + if (probeState == DDGI_PROBE_STATE_INACTIVE) + { + // Find the closest active probe (flood fill) + int3 probeCoordsEnd = (int3)DDGI.ProbesCounts - int3(1, 1, 1); + // Corners + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, -1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, -1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, -1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, -1)); + // Sides + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 0, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 1, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, 1)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 0, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, -1, 0)); + CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, -1)); + } + + // Ensure all threads (within dispatch) got proepr data before writing back to the same memory + DeviceMemoryBarrierWithGroupSync(); + + // Write modified probe data back (remain inactive) + if (probeState != DDGI_PROBE_STATE_INACTIVE && DispatchThreadId.x < ProbesCount && fallbackCoords.x != 1000) + { + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(DDGI_FALLBACK_COORDS_ENCODE(fallbackCoords), DDGI_PROBE_STATE_INACTIVE, 0.0f); + } +} + +#endif + #ifdef _CS_TraceRays RWTexture2D RWProbesTrace : register(u0); @@ -644,7 +745,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ // Add distance (R), distance^2 (G) and weight (A) float rayDistance = CachedProbesTraceDistance[rayIndex]; - result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight); + result += float4(rayDistance, rayDistance * rayDistance, 0.0f, 1.0f) * rayWeight; #endif }