diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 973f91b3d..25550ecd8 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -42,7 +42,7 @@ #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 -#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency +#define DDGI_PROBE_EMPTY_AREA_DENSITY 8 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -76,7 +76,8 @@ GPU_CB_STRUCT(Data0 { GPU_CB_STRUCT(Data1 { // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD - Float2 Padding2; + float Padding2; + int32 StepSize; uint32 CascadeIndex; uint32 ProbeIndexOffset; }); @@ -594,10 +595,17 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // For inactive probes, search nearby ones to find the closest valid for quick fallback when sampling irradiance { PROFILE_GPU_CPU_NAMED("Update Inactive Probes"); + // TODO: this could run within GPUComputePass during Trace Rays or Update Probes to overlap compute works context->BindUA(0, ddgiData.Result.ProbesData); - int32 iterations = Math::Min(probesCounts.MaxValue() - 1, DDGI_PROBE_EMPTY_AREA_DENSITY); - for (int32 i = 0; i < iterations; i++) + Data1 data; + data.CascadeIndex = cascadeIndex; + int32 iterations = Math::CeilToInt(Math::Log2((float)Math::Min(probesCounts.MaxValue(), DDGI_PROBE_EMPTY_AREA_DENSITY) + 1.0f)); + for (int32 i = iterations - 1; i >= 0; i--) + { + data.StepSize = Math::FloorToInt(Math::Pow(2, (float)i) + 0.5f); // Jump Flood step size + context->UpdateCB(_cb1, &data); context->Dispatch(_csUpdateInactiveProbes, threadGroupsX, 1, 1); + } context->ResetUA(); } diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 662a634a0..3e31c2e53 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -26,8 +26,9 @@ #endif #define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space #define DDGI_DEFAULT_BIAS 0.2f // Default value for DDGI sampling bias -#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)coord / 128.0f) -#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f) +#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)(coord + 1) / 128.0f) +#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f - 1) +#define DDGI_FALLBACK_COORDS_VALID(data) (length(data.xyz) > 0) //#define DDGI_DEBUG_CASCADE 0 // Forces a specific cascade to be only in use (for debugging) // DDGI data for a constant buffer diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index f6f2d05a9..b080efc0b 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -27,7 +27,7 @@ #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area -#define DDGI_PROBE_EMPTY_AREA_DENSITY 10 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency +#define DDGI_PROBE_EMPTY_AREA_DENSITY 8 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -49,7 +49,8 @@ uint FrameIndexMod8; META_CB_END META_CB_BEGIN(1, Data1) -float2 Padding2; +float Padding2; +int StepSize; uint CascadeIndex; uint ProbeIndexOffset; META_CB_END @@ -364,38 +365,35 @@ void CS_UpdateProbesInitArgs() #ifdef _CS_UpdateInactiveProbes -globallycoherent RWTexture2D RWProbesData : register(u0); +RWTexture2D RWProbesData : register(u0); -void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, uint3 probeCoords, int3 probeCoordsEnd, int3 offset) +void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, inout float minDistance, uint3 probeCoords, int3 probeCoordsEnd, int3 offset) { uint3 nearbyCoords = (uint3)clamp(((int3)probeCoords + offset), int3(0, 0, 0), probeCoordsEnd); uint nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyCoords); float4 nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)]; - uint nearbyState = DecodeDDGIProbeState(nearbyData); - uint3 nearbyFallbackCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData); - if (nearbyState != DDGI_PROBE_STATE_INACTIVE) + float nearbyDist = distance((float3)nearbyCoords, (float3)probeCoords); + if (DecodeDDGIProbeState(nearbyData) != DDGI_PROBE_STATE_INACTIVE && nearbyDist < minDistance) { // Use nearby probe fallbackCoords = nearbyCoords; - probeState = nearbyState; + probeState = DDGI_PROBE_STATE_ACTIVE; + minDistance = nearbyDist; + return; } - // TODO: optimize distance check with squared dst comparision - else if (distance((float3)nearbyFallbackCoords, (float3)probeCoords) < distance((float3)fallbackCoords, (float3)probeCoords)) + nearbyCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData); + nearbyDist = distance((float3)nearbyCoords, (float3)probeCoords); + if (DDGI_FALLBACK_COORDS_VALID(nearbyData) && nearbyDist < minDistance) { - // Check if fallback probe is actually active (not some leftover memory) - nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyFallbackCoords); - nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)]; - nearbyState = DecodeDDGIProbeState(nearbyData); - if (nearbyState != DDGI_PROBE_STATE_INACTIVE) - { - // Use fallback of the nearby probe - fallbackCoords = nearbyFallbackCoords; - probeState = DDGI_PROBE_STATE_ACTIVE; - } + // Use fallback probe + fallbackCoords = nearbyCoords; + probeState = DDGI_PROBE_STATE_ACTIVE; + minDistance = nearbyDist; } } // Compute shader to store closest valid probe coords inside inactive probes data for quick fallback lookup when sampling irradiance. +// Uses Jump Flood algorithm. META_CS(true, FEATURE_LEVEL_SM5) [numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)] void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID) @@ -409,32 +407,26 @@ void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID) int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); float4 probeData = RWProbesData[probeDataCoords]; uint probeState = DecodeDDGIProbeState(probeData); + BRANCH if (probeState == DDGI_PROBE_STATE_INACTIVE) { - // Find the closest active probe (flood fill) + // Find the closest active probe (Jump Flood) int3 probeCoordsEnd = (int3)DDGI.ProbesCounts - int3(1, 1, 1); - // Corners - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, 1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, 1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, 1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, 1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 1, -1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 1, -1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, -1, -1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, -1, -1)); - // Sides - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(1, 0, 0)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 1, 0)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, 1)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(-1, 0, 0)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, -1, 0)); - CheckNearbyProbe(fallbackCoords, probeState, probeCoords, probeCoordsEnd, int3(0, 0, -1)); + float minDistance = 1e27f; + UNROLL for (int z = -1; z <= 1; z++) + UNROLL for (int y = -1; y <= 1; y++) + UNROLL for (int x = -1; x <= 1; x++) + { + int3 offset = int3(x, y, z) * StepSize; + CheckNearbyProbe(fallbackCoords, probeState, minDistance, probeCoords, probeCoordsEnd, offset); + } } - // Ensure all threads (within dispatch) got proepr data before writing back to the same memory - DeviceMemoryBarrierWithGroupSync(); + // Ensure all threads (within dispatch) got proper data before writing back to the same memory + AllMemoryBarrierWithGroupSync(); // Write modified probe data back (remain inactive) + BRANCH if (probeState != DDGI_PROBE_STATE_INACTIVE && DispatchThreadId.x < ProbesCount && fallbackCoords.x != 1000) { RWProbesData[probeDataCoords] = EncodeDDGIProbeData(DDGI_FALLBACK_COORDS_ENCODE(fallbackCoords), DDGI_PROBE_STATE_INACTIVE, 0.0f);