diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 27ec7146d..2844b6482 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -40,6 +40,19 @@ #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 +#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count +#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging + +#if DDGI_DEBUG_STATS +#include "Engine/Core/Collections/SamplesBuffer.h" +#define DDGI_DEBUG_STATS_FRAMES 60 + +struct StatsData +{ + uint32 RaysCount; + uint32 ProbesCount; +}; +#endif GPU_CB_STRUCT(Data0 { DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; @@ -52,11 +65,13 @@ GPU_CB_STRUCT(Data0 { float ResetBlend; float TemporalTime; Int4 ProbeScrollClears[4]; + Float3 ViewDir; + float Padding1; }); GPU_CB_STRUCT(Data1 { // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD - Float2 Padding1; + Float2 Padding2; uint32 CascadeIndex; uint32 ProbeIndexOffset; }); @@ -84,11 +99,21 @@ public: int32 ProbesCountTotal = 0; Int3 ProbeCounts = Int3::Zero; GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance) - GPUTexture* ProbesData = nullptr; // Probes data: (RGB: world-space offset, A: state/data) + GPUTexture* ProbesData = nullptr; // Probes data: (RGB: probe-space offset, A: state/data) GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color) GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2) GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0 GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend) +#if DDGI_DEBUG_STATS + GPUBuffer* StatsWrite = nullptr; + GPUBuffer* StatsRead = nullptr; + SamplesBuffer StatsProbes; + SamplesBuffer StatsRays; + uint32 StatsFrames = 0; +#endif +#if DDGI_DEBUG_INSTABILITY + GPUTexture* ProbesInstability = nullptr; +#endif DynamicDiffuseGlobalIlluminationPass::BindingData Result; FORCE_INLINE void Release() @@ -99,6 +124,16 @@ public: RenderTargetPool::Release(ProbesDistance); SAFE_DELETE_GPU_RESOURCE(ActiveProbes); SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs); +#if DDGI_DEBUG_STATS + SAFE_DELETE_GPU_RESOURCE(StatsWrite); + SAFE_DELETE_GPU_RESOURCE(StatsRead); + StatsProbes.Clear(); + StatsRays.Clear(); + StatsFrames = 0; +#endif +#if DDGI_DEBUG_INSTABILITY + RenderTargetPool::Release(ProbesInstability); +#endif } ~DDGICustomBuffer() @@ -373,12 +408,21 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY); INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); +#if DDGI_DEBUG_INSTABILITY + INIT_TEXTURE(ProbesInstability, PixelFormat::R16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); +#endif #undef INIT_TEXTURE #define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage(); GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess); INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes"); desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)); INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs"); +#if DDGI_DEBUG_STATS + desc2 = GPUBufferDescription::Raw(sizeof(StatsData), GPUBufferFlags::UnorderedAccess); + INIT_BUFFER(StatsWrite, "DDGI.StatsWrite"); + desc2 = desc2.ToStagingReadback(); + INIT_BUFFER(StatsRead, "DDGI.StatsRead"); +#endif #undef INIT_BUFFER LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024)); clear = true; @@ -393,6 +437,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->ClearUA(ddgiData.ProbesData, Float4::Zero); context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero); context->ClearUA(ddgiData.ProbesDistance, Float4::Zero); +#if DDGI_DEBUG_INSTABILITY + context->ClearUA(ddgiData.ProbesInstability, Float4::Zero); +#endif } ddgiData.LastFrameUsed = Engine::FrameCount; @@ -486,6 +533,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0); } data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; + data.ViewDir = renderContext.View.Direction; GBufferPass::SetInputs(renderContext.View, data.GBuffer); context->UpdateCB(_cb0, &data); context->BindCB(0, _cb0); @@ -496,6 +544,10 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont PROFILE_GPU_CPU_NAMED("Probes Update"); bool anyDirty = false; uint32 threadGroupsX, threadGroupsY; +#if DDGI_DEBUG_STATS + uint32 zero[4] = {}; + context->ClearUA(ddgiData.StatsWrite, zero); +#endif for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { if (cascadeSkipUpdate[cascadeIndex]) @@ -556,6 +608,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->BindSR(8, skybox); context->BindSR(9, ddgiData.ActiveProbes->View()); context->BindUA(0, ddgiData.ProbesTrace->View()); +#if DDGI_DEBUG_STATS + context->BindUA(1, ddgiData.StatsWrite->View()); +#endif context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg); context->ResetUA(); context->ResetSR(); @@ -564,21 +619,55 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes irradiance and distance textures (one thread-group per probe) { PROFILE_GPU_CPU_NAMED("Update Probes"); + + // Distance context->BindSR(0, ddgiData.Result.ProbesData); context->BindSR(1, ddgiData.ProbesTrace->View()); context->BindSR(2, ddgiData.ActiveProbes->View()); - context->BindUA(0, ddgiData.Result.ProbesIrradiance); - context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg); context->BindUA(0, ddgiData.Result.ProbesDistance); context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg); context->ResetUA(); context->ResetSR(); + + // Irradiance + context->BindSR(1, ddgiData.ProbesTrace->View()); + context->BindSR(2, ddgiData.ActiveProbes->View()); + context->BindUA(0, ddgiData.Result.ProbesIrradiance); + context->BindUA(1, ddgiData.Result.ProbesData); +#if DDGI_DEBUG_INSTABILITY + context->BindUA(2, ddgiData.ProbesInstability->View()); +#endif + context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg); + context->ResetUA(); + context->ResetSR(); } arg += sizeof(GPUDispatchIndirectArgs); } } +#if DDGI_DEBUG_STATS + // Update stats + { + StatsData stats; + if (void* mapped = ddgiData.StatsRead->Map(GPUResourceMapMode::Read)) + { + Platform::MemoryCopy(&stats, mapped, sizeof(stats)); + ddgiData.StatsRead->Unmap(); + ddgiData.StatsProbes.Add(stats.ProbesCount); + ddgiData.StatsRays.Add(stats.RaysCount); + } + context->CopyBuffer(ddgiData.StatsRead, ddgiData.StatsWrite, sizeof(stats)); + if (++ddgiData.StatsFrames >= DDGI_DEBUG_STATS_FRAMES) + { + ddgiData.StatsFrames = 0; + stats.ProbesCount = ddgiData.StatsProbes.Average(); + stats.RaysCount = ddgiData.StatsRays.Average(); + LOG(Info, "DDGI active probes: {}, traced rays: {} per frame, rays per probe: {}", stats.ProbesCount, stats.RaysCount, stats.ProbesCount > 0 ? stats.RaysCount / stats.ProbesCount : 0); + } + } +#endif + // Update probes border pixels if (anyDirty) { @@ -718,7 +807,11 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, { // Pass DDGI data to the material _debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData)); +#if DDGI_DEBUG_INSTABILITY + _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesInstability)); +#else _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance)); +#endif _debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance)); auto cb = _debugMaterial->GetShader()->GetCB(3); if (cb) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index fff009022..330a20420 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -16,6 +16,8 @@ #define DDGI_PROBE_STATE_INACTIVE 0 #define DDGI_PROBE_STATE_ACTIVATED 1 #define DDGI_PROBE_STATE_ACTIVE 2 +#define DDGI_PROBE_ATTENTION_MIN 0.02f // Minimum probe attention value that still makes it active. +#define DDGI_PROBE_ATTENTION_MAX 0.98f // Maximum probe attention value that still makes it active (but not activated which is 1.0f). #define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens @@ -99,15 +101,36 @@ float4 LoadDDGIProbeData(DDGIData data, Texture2D probesData, uint } // Encodes probe probe data -float4 EncodeDDGIProbeData(float3 probeOffset, uint probeState) +float4 EncodeDDGIProbeData(float3 offset, uint state, float attention) { - return float4(probeOffset, (float)probeState * (1.0f / 8.0f)); + // [0;1] -> [-1;1] + attention = saturate(attention) * 2.0f - 1.0f; + if (state == DDGI_PROBE_STATE_INACTIVE) + attention = -1.0f; + else if (state == DDGI_PROBE_STATE_ACTIVATED) + attention = 1.0f; + return float4(offset, attention); +} + +// Decodes probe attention value from the encoded state +float DecodeDDGIProbeAttention(float4 probeData) +{ + // [-1;1] -> [0;1] + if (probeData.w <= -1.0f) + return 0.0f; + if (probeData.w >= 1.0f) + return 1.0f; + return probeData.w * 0.5f + 0.5f; } // Decodes probe state from the encoded state uint DecodeDDGIProbeState(float4 probeData) { - return (uint)(probeData.w * 8.0f); + if (probeData.w <= -1.0f) + return DDGI_PROBE_STATE_INACTIVE; + if (probeData.w >= 1.0f) + return DDGI_PROBE_STATE_ACTIVATED; + return DDGI_PROBE_STATE_ACTIVE; } // Decodes probe world-space position (XYZ) from the encoded state diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index c80407d54..59e74bbb9 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -20,11 +20,14 @@ // This must match C++ #define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending #define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller) +#define DDGI_TRACE_RAYS_MIN 16 // Minimum amount of rays to shoot for sleepy probes #define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area +#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count +#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging META_CB_BEGIN(0, Data0) DDGIData DDGI; @@ -37,10 +40,12 @@ uint ProbesCount; float ResetBlend; float TemporalTime; int4 ProbeScrollClears[4]; +float3 ViewDir; +float Padding1; META_CB_END META_CB_BEGIN(1, Data1) -float2 Padding1; +float2 Padding2; uint CascadeIndex; uint ProbeIndexOffset; META_CB_END @@ -73,10 +78,11 @@ float3 GetProbeRayDirection(DDGIData data, uint rayIndex, uint raysCount, uint p } // Calculates amount of rays to allocate for a probe -uint GetProbeRaysCount(DDGIData data, uint probeState) +uint GetProbeRaysCount(DDGIData data, float probeAttention) { - // TODO: implement variable ray count based on probe location relative to the view frustum (use probe state for storage) - return data.RaysCount; + //return data.RaysCount; + probeAttention = saturate((probeAttention - DDGI_PROBE_ATTENTION_MIN) / (DDGI_PROBE_ATTENTION_MAX - DDGI_PROBE_ATTENTION_MIN)); + return DDGI_TRACE_RAYS_MIN + (uint)max(probeAttention * (float)(data.RaysCount - DDGI_TRACE_RAYS_MIN), 0.0f); } #ifdef _CS_Classify @@ -118,7 +124,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) if (prevCascadeWeight > 0.1f) { // Disable probe - RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE); + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); return; } } @@ -140,11 +146,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Load probe state and position float4 probeData = RWProbesData[probeDataCoords]; + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); uint probeStateOld = probeState; float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE) + { probeOffset = float3(0, 0, 0); // Clear offset for a new probe + probeAttention = 1.0f; // Wake-up + } float3 probeOffsetOld = probeOffset; float3 probePosition = probeBasePosition + probeOffset; @@ -166,11 +176,24 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable it probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; } else { - // Relocate only if probe location is not good enough + // Apply distance/view heuristics to probe attention probeState = DDGI_PROBE_STATE_ACTIVE; + float3 viewToProbe = probePosition - GBuffer.ViewPos; + float distanceToProbe = length(viewToProbe); + viewToProbe /= distanceToProbe; + float probeViewDot = dot(viewToProbe, ViewDir); + probeAttention *= lerp(0.1f, 1.0f, saturate(probeViewDot)); // Reduce quality for probes behind the camera (or away from view dir) + probeAttention *= lerp(1.0f, 0.5f, saturate(sdfDst / voxelLimit)); // Reduce quality for probes far away from geometry + probeAttention += (1.0f - saturate(distanceToProbe / 1000.0f)) * 1.2f; // Boost quality for probes nearby view + //probeAttention = 0.0f; // Debug test lowest ray count + //probeAttention = 1.0f; // Debug test highest ray count + probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX); + + // Relocate only if probe location is not good enough if (sdf <= voxelLimit) { #if DDGI_PROBE_RELOCATE_ITERATIVE @@ -222,6 +245,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable probe that is too close to the geometry probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; } else { @@ -232,6 +256,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable probe probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; #endif } } @@ -254,12 +279,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) } #endif if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE) + { probeState = DDGI_PROBE_STATE_ACTIVATED; + probeAttention = 1.0f; + } } // Save probe state probeOffset /= probesSpacing; // Move offset back to [-1;1] space - RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState); + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState, probeAttention); // Collect active probes if (probeState != DDGI_PROBE_STATE_INACTIVE) @@ -282,7 +310,7 @@ META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1, 1, 1)] void CS_UpdateProbesInitArgs() { - uint activeProbesCount = ActiveProbes.Load(0); + uint activeProbesCount = ActiveProbes.Load(0); // Counter at 0 uint arg = 0; for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT) { @@ -298,6 +326,9 @@ void CS_UpdateProbesInitArgs() #ifdef _CS_TraceRays RWTexture2D RWProbesTrace : register(u0); +#if DDGI_DEBUG_STATS +RWByteAddressBuffer RWStats : register(u1); +#endif Texture3D GlobalSDFTex : register(t0); Texture3D GlobalSDFMip : register(t1); @@ -326,12 +357,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Load current probe state and position float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); - uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); + uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention); if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount) return; // Skip disabled probes or if current thread's ray is unused float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords); float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); + // TODO: implement ray-guiding based on the probe irradiance (prioritize directions with high luminance) // Trace ray with Global SDF GlobalSDFTrace trace; @@ -370,6 +403,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Write into probes trace results RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance; + +#if DDGI_DEBUG_STATS + // Update stats + uint tmp; + RWStats.InterlockedAdd(0, 1, tmp); + if (rayIndex == 0) + RWStats.InterlockedAdd(4, 1, tmp); +#endif } #endif @@ -380,6 +421,44 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Update irradiance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT]; +groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION]; + +// Source: https://github.com/turanszkij/WickedEngine +#define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4) +static const uint4 BorderOffsets[BorderOffsetsSize] = { + uint4(6, 1, 1, 0), + uint4(5, 1, 2, 0), + uint4(4, 1, 3, 0), + uint4(3, 1, 4, 0), + uint4(2, 1, 5, 0), + uint4(1, 1, 6, 0), + + uint4(6, 6, 1, 7), + uint4(5, 6, 2, 7), + uint4(4, 6, 3, 7), + uint4(3, 6, 4, 7), + uint4(2, 6, 5, 7), + uint4(1, 6, 6, 7), + + uint4(1, 1, 0, 6), + uint4(1, 2, 0, 5), + uint4(1, 3, 0, 4), + uint4(1, 4, 0, 3), + uint4(1, 5, 0, 2), + uint4(1, 6, 0, 1), + + uint4(6, 1, 7, 6), + uint4(6, 2, 7, 5), + uint4(6, 3, 7, 4), + uint4(6, 4, 7, 3), + uint4(6, 5, 7, 2), + uint4(6, 6, 7, 1), + + uint4(1, 1, 7, 7), + uint4(6, 1, 0, 7), + uint4(1, 6, 7, 0), + uint4(6, 6, 0, 0), +}; #else // Update distance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE @@ -389,7 +468,14 @@ groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT]; groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT]; RWTexture2D RWOutput : register(u0); +#if DDGI_PROBE_UPDATE_MODE == 0 +RWTexture2D RWProbesData : register(u1); +#if DDGI_DEBUG_INSTABILITY +RWTexture2D RWOutputInstability : register(u2); +#endif +#else Texture2D ProbesData : register(t0); +#endif Texture2D ProbesTrace : register(t1); ByteAddressBuffer ActiveProbes : register(t2); @@ -407,13 +493,16 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); - // Skip disabled probes - bool skip = false; + // Load probe data +#if DDGI_PROBE_UPDATE_MODE == 0 + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); + float4 probeData = RWProbesData[probeDataCoords]; +#else float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); +#endif + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); - uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); - if (probeState == DDGI_PROBE_STATE_INACTIVE) - skip = true; + uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention); #if DDGI_PROBE_UPDATE_MODE == 0 uint backfacesCount = 0; @@ -423,30 +512,23 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ float distanceLimit = probesSpacing * 1.5f; #endif - BRANCH - if (!skip) + // Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread) + uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION))); + uint raysStart = GroupIndex * raysCount; + raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart; + for (uint i = 0; i < raysCount; i++) { - // Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread) - uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION))); - uint raysStart = GroupIndex * raysCount; - raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart; - for (uint i = 0; i < raysCount; i++) - { - uint rayIndex = raysStart + i; + uint rayIndex = raysStart + i; #if DDGI_PROBE_UPDATE_MODE == 0 - CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)]; + CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)]; #else - float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; - CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); + float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; + CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); #endif - CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); - } + CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); } GroupMemoryBarrierWithGroupSync(); - if (skip) - return; probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); - uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy; // Calculate octahedral projection for probe (unwraps spherical projection into a square) float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION); @@ -495,30 +577,52 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ result.rgb *= 1.0f / (2.0f * max(result.a, epsilon)); // Load current probe value + uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy; float3 previous = RWOutput[outputCoords].rgb; - bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED; - if (ResetBlend || wasActivated) - previous = float3(0, 0, 0); + bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED || ResetBlend; + if (wasActivated) + previous = result.rgb; + +#if DDGI_PROBE_UPDATE_MODE == 0 + // Calculate instability of the irradiance + float previousLuma = Luminance(previous.rgb); + float resultLuma = Luminance(result.rgb); + float instability = abs(previousLuma - resultLuma) / previousLuma; // Percentage change in luminance of irradiance + instability = max(instability, Max3(abs(result.rgb - previous) / previous)); // Percentage of color delta change of irradiance + //instability *= saturate(result.a); // Reduce instability in areas with a small ray-coverage + //instability = pow(instability, 1.2f); // Increase contrast + instability *= 2.0f; // Make it stronger on scene changes + //instability = saturate(instability); + OutputInstability[GroupIndex] = instability; +#if DDGI_DEBUG_INSTABILITY + RWOutputInstability[outputCoords] = instability; + //RWOutputInstability[outputCoords] = probeAttention; // Debug test probe attention visualization +#endif +#endif // Blend current value with the previous probe data - float historyWeight = DDGI.ProbeHistoryWeight; - //historyWeight = 1.0f; - //historyWeight = 0.0f; - if (ResetBlend || wasActivated) - historyWeight = 0.0f; + float historyWeightFast = DDGI.ProbeHistoryWeight; + float historyWeightSlow = 0.97f; #if DDGI_PROBE_UPDATE_MODE == 0 - result *= DDGI.IndirectLightingIntensity; -#if DDGI_SRGB_BLENDING - result.rgb = pow(result.rgb, 1.0f / DDGI.IrradianceGamma); -#endif float3 irradianceDelta = result.rgb - previous; float irradianceDeltaMax = Max3(abs(irradianceDelta)); float irradianceDeltaLen = length(irradianceDelta); if (irradianceDeltaMax > 0.5f) { // Reduce history weight after significant lighting change - historyWeight = historyWeight * 0.5f; + historyWeightFast *= 0.5f; } +#endif + float historyWeight = lerp(historyWeightSlow, historyWeightFast, probeAttention * probeAttention * probeAttention); + //historyWeight = 1.0f; // Debug full-blend + //historyWeight = 0.0f; // Debug no-blend + if (wasActivated) + historyWeight = 0.0f; +#if DDGI_PROBE_UPDATE_MODE == 0 + result *= DDGI.IndirectLightingIntensity; +#if DDGI_SRGB_BLENDING + result.rgb = pow(max(result.rgb, 0), 1.0f / DDGI.IrradianceGamma); +#endif if (irradianceDeltaLen > 2.0f) { // Reduce flickering during rapid brightness changes @@ -530,6 +634,45 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ #endif RWOutput[outputCoords] = result; + +#if DDGI_PROBE_UPDATE_MODE == 0 + // The first thread updates the probe attention based on the instability of all texels + GroupMemoryBarrierWithGroupSync(); + BRANCH + if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE) + { + // Calculate instability statistics for a whole probe + float instabilityAvg = 0; + for (uint i = 0; i < DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION; i++) + instabilityAvg += OutputInstability[i]; + instabilityAvg *= 1.0f / float(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION); + instabilityAvg = saturate(instabilityAvg); + instability = instabilityAvg; + + // Calculate probe attention + float taregAttention = lerp(0.5f, DDGI_PROBE_ATTENTION_MAX, instability); // Use some base level + if (taregAttention >= probeAttention) + probeAttention = taregAttention; // Quick jump up + else + probeAttention = lerp(probeAttention, taregAttention, 0.2f); // Slow blend down + if (probeState == DDGI_PROBE_STATE_ACTIVATED) + probeAttention = DDGI_PROBE_ATTENTION_MAX; + + // Update probe data for the next frame + probeState = DDGI_PROBE_STATE_ACTIVE; + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeData.xyz, probeState, probeAttention); + } + +#if DDGI_DEBUG_INSTABILITY + // Copy border pixels + uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); + for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION) + { + uint4 borderOffsets = BorderOffsets[borderIndex]; + RWOutputInstability[baseCoords + borderOffsets.zw] = RWOutputInstability[baseCoords + borderOffsets.xy]; + } +#endif +#endif } // Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering)