From aeff147b6ded0860b80b7e80696ae4d1f650611c Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 18 Jul 2024 08:38:23 +0200 Subject: [PATCH] Optimize and refactor DDGI to use linear `attention` per-probe to dynamically control ray count and blend speed Probes will use lower ray count when behind the camera or not correlated with the view direction or far from geometry. Probes nearby camera or with high instability in irradiance will maintain higher ray count. Probes that use less rays will have slower blending to reduce artifacts. Added probe instability, attention and stats debugging for devs. --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 101 +++++++- Source/Shaders/GI/DDGI.hlsl | 29 ++- Source/Shaders/GI/DDGI.shader | 231 ++++++++++++++---- 3 files changed, 310 insertions(+), 51 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 27ec7146d..2844b6482 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -40,6 +40,19 @@ #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 +#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count +#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging + +#if DDGI_DEBUG_STATS +#include "Engine/Core/Collections/SamplesBuffer.h" +#define DDGI_DEBUG_STATS_FRAMES 60 + +struct StatsData +{ + uint32 RaysCount; + uint32 ProbesCount; +}; +#endif GPU_CB_STRUCT(Data0 { DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; @@ -52,11 +65,13 @@ GPU_CB_STRUCT(Data0 { float ResetBlend; float TemporalTime; Int4 ProbeScrollClears[4]; + Float3 ViewDir; + float Padding1; }); GPU_CB_STRUCT(Data1 { // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD - Float2 Padding1; + Float2 Padding2; uint32 CascadeIndex; uint32 ProbeIndexOffset; }); @@ -84,11 +99,21 @@ public: int32 ProbesCountTotal = 0; Int3 ProbeCounts = Int3::Zero; GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance) - GPUTexture* ProbesData = nullptr; // Probes data: (RGB: world-space offset, A: state/data) + GPUTexture* ProbesData = nullptr; // Probes data: (RGB: probe-space offset, A: state/data) GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color) GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2) GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0 GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend) +#if DDGI_DEBUG_STATS + GPUBuffer* StatsWrite = nullptr; + GPUBuffer* StatsRead = nullptr; + SamplesBuffer StatsProbes; + SamplesBuffer StatsRays; + uint32 StatsFrames = 0; +#endif +#if DDGI_DEBUG_INSTABILITY + GPUTexture* ProbesInstability = nullptr; +#endif DynamicDiffuseGlobalIlluminationPass::BindingData Result; FORCE_INLINE void Release() @@ -99,6 +124,16 @@ public: RenderTargetPool::Release(ProbesDistance); SAFE_DELETE_GPU_RESOURCE(ActiveProbes); SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs); +#if DDGI_DEBUG_STATS + SAFE_DELETE_GPU_RESOURCE(StatsWrite); + SAFE_DELETE_GPU_RESOURCE(StatsRead); + StatsProbes.Clear(); + StatsRays.Clear(); + StatsFrames = 0; +#endif +#if DDGI_DEBUG_INSTABILITY + RenderTargetPool::Release(ProbesInstability); +#endif } ~DDGICustomBuffer() @@ -373,12 +408,21 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY); INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); +#if DDGI_DEBUG_INSTABILITY + INIT_TEXTURE(ProbesInstability, PixelFormat::R16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); +#endif #undef INIT_TEXTURE #define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage(); GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess); INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes"); desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)); INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs"); +#if DDGI_DEBUG_STATS + desc2 = GPUBufferDescription::Raw(sizeof(StatsData), GPUBufferFlags::UnorderedAccess); + INIT_BUFFER(StatsWrite, "DDGI.StatsWrite"); + desc2 = desc2.ToStagingReadback(); + INIT_BUFFER(StatsRead, "DDGI.StatsRead"); +#endif #undef INIT_BUFFER LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024)); clear = true; @@ -393,6 +437,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->ClearUA(ddgiData.ProbesData, Float4::Zero); context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero); context->ClearUA(ddgiData.ProbesDistance, Float4::Zero); +#if DDGI_DEBUG_INSTABILITY + context->ClearUA(ddgiData.ProbesInstability, Float4::Zero); +#endif } ddgiData.LastFrameUsed = Engine::FrameCount; @@ -486,6 +533,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0); } data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; + data.ViewDir = renderContext.View.Direction; GBufferPass::SetInputs(renderContext.View, data.GBuffer); context->UpdateCB(_cb0, &data); context->BindCB(0, _cb0); @@ -496,6 +544,10 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont PROFILE_GPU_CPU_NAMED("Probes Update"); bool anyDirty = false; uint32 threadGroupsX, threadGroupsY; +#if DDGI_DEBUG_STATS + uint32 zero[4] = {}; + context->ClearUA(ddgiData.StatsWrite, zero); +#endif for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { if (cascadeSkipUpdate[cascadeIndex]) @@ -556,6 +608,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->BindSR(8, skybox); context->BindSR(9, ddgiData.ActiveProbes->View()); context->BindUA(0, ddgiData.ProbesTrace->View()); +#if DDGI_DEBUG_STATS + context->BindUA(1, ddgiData.StatsWrite->View()); +#endif context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg); context->ResetUA(); context->ResetSR(); @@ -564,21 +619,55 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes irradiance and distance textures (one thread-group per probe) { PROFILE_GPU_CPU_NAMED("Update Probes"); + + // Distance context->BindSR(0, ddgiData.Result.ProbesData); context->BindSR(1, ddgiData.ProbesTrace->View()); context->BindSR(2, ddgiData.ActiveProbes->View()); - context->BindUA(0, ddgiData.Result.ProbesIrradiance); - context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg); context->BindUA(0, ddgiData.Result.ProbesDistance); context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg); context->ResetUA(); context->ResetSR(); + + // Irradiance + context->BindSR(1, ddgiData.ProbesTrace->View()); + context->BindSR(2, ddgiData.ActiveProbes->View()); + context->BindUA(0, ddgiData.Result.ProbesIrradiance); + context->BindUA(1, ddgiData.Result.ProbesData); +#if DDGI_DEBUG_INSTABILITY + context->BindUA(2, ddgiData.ProbesInstability->View()); +#endif + context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg); + context->ResetUA(); + context->ResetSR(); } arg += sizeof(GPUDispatchIndirectArgs); } } +#if DDGI_DEBUG_STATS + // Update stats + { + StatsData stats; + if (void* mapped = ddgiData.StatsRead->Map(GPUResourceMapMode::Read)) + { + Platform::MemoryCopy(&stats, mapped, sizeof(stats)); + ddgiData.StatsRead->Unmap(); + ddgiData.StatsProbes.Add(stats.ProbesCount); + ddgiData.StatsRays.Add(stats.RaysCount); + } + context->CopyBuffer(ddgiData.StatsRead, ddgiData.StatsWrite, sizeof(stats)); + if (++ddgiData.StatsFrames >= DDGI_DEBUG_STATS_FRAMES) + { + ddgiData.StatsFrames = 0; + stats.ProbesCount = ddgiData.StatsProbes.Average(); + stats.RaysCount = ddgiData.StatsRays.Average(); + LOG(Info, "DDGI active probes: {}, traced rays: {} per frame, rays per probe: {}", stats.ProbesCount, stats.RaysCount, stats.ProbesCount > 0 ? stats.RaysCount / stats.ProbesCount : 0); + } + } +#endif + // Update probes border pixels if (anyDirty) { @@ -718,7 +807,11 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, { // Pass DDGI data to the material _debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData)); +#if DDGI_DEBUG_INSTABILITY + _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesInstability)); +#else _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance)); +#endif _debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance)); auto cb = _debugMaterial->GetShader()->GetCB(3); if (cb) diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index fff009022..330a20420 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -16,6 +16,8 @@ #define DDGI_PROBE_STATE_INACTIVE 0 #define DDGI_PROBE_STATE_ACTIVATED 1 #define DDGI_PROBE_STATE_ACTIVE 2 +#define DDGI_PROBE_ATTENTION_MIN 0.02f // Minimum probe attention value that still makes it active. +#define DDGI_PROBE_ATTENTION_MAX 0.98f // Maximum probe attention value that still makes it active (but not activated which is 1.0f). #define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens @@ -99,15 +101,36 @@ float4 LoadDDGIProbeData(DDGIData data, Texture2D probesData, uint } // Encodes probe probe data -float4 EncodeDDGIProbeData(float3 probeOffset, uint probeState) +float4 EncodeDDGIProbeData(float3 offset, uint state, float attention) { - return float4(probeOffset, (float)probeState * (1.0f / 8.0f)); + // [0;1] -> [-1;1] + attention = saturate(attention) * 2.0f - 1.0f; + if (state == DDGI_PROBE_STATE_INACTIVE) + attention = -1.0f; + else if (state == DDGI_PROBE_STATE_ACTIVATED) + attention = 1.0f; + return float4(offset, attention); +} + +// Decodes probe attention value from the encoded state +float DecodeDDGIProbeAttention(float4 probeData) +{ + // [-1;1] -> [0;1] + if (probeData.w <= -1.0f) + return 0.0f; + if (probeData.w >= 1.0f) + return 1.0f; + return probeData.w * 0.5f + 0.5f; } // Decodes probe state from the encoded state uint DecodeDDGIProbeState(float4 probeData) { - return (uint)(probeData.w * 8.0f); + if (probeData.w <= -1.0f) + return DDGI_PROBE_STATE_INACTIVE; + if (probeData.w >= 1.0f) + return DDGI_PROBE_STATE_ACTIVATED; + return DDGI_PROBE_STATE_ACTIVE; } // Decodes probe world-space position (XYZ) from the encoded state diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index c80407d54..59e74bbb9 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -20,11 +20,14 @@ // This must match C++ #define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending #define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller) +#define DDGI_TRACE_RAYS_MIN 16 // Minimum amount of rays to shoot for sleepy probes #define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area +#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count +#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging META_CB_BEGIN(0, Data0) DDGIData DDGI; @@ -37,10 +40,12 @@ uint ProbesCount; float ResetBlend; float TemporalTime; int4 ProbeScrollClears[4]; +float3 ViewDir; +float Padding1; META_CB_END META_CB_BEGIN(1, Data1) -float2 Padding1; +float2 Padding2; uint CascadeIndex; uint ProbeIndexOffset; META_CB_END @@ -73,10 +78,11 @@ float3 GetProbeRayDirection(DDGIData data, uint rayIndex, uint raysCount, uint p } // Calculates amount of rays to allocate for a probe -uint GetProbeRaysCount(DDGIData data, uint probeState) +uint GetProbeRaysCount(DDGIData data, float probeAttention) { - // TODO: implement variable ray count based on probe location relative to the view frustum (use probe state for storage) - return data.RaysCount; + //return data.RaysCount; + probeAttention = saturate((probeAttention - DDGI_PROBE_ATTENTION_MIN) / (DDGI_PROBE_ATTENTION_MAX - DDGI_PROBE_ATTENTION_MIN)); + return DDGI_TRACE_RAYS_MIN + (uint)max(probeAttention * (float)(data.RaysCount - DDGI_TRACE_RAYS_MIN), 0.0f); } #ifdef _CS_Classify @@ -118,7 +124,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) if (prevCascadeWeight > 0.1f) { // Disable probe - RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE); + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); return; } } @@ -140,11 +146,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Load probe state and position float4 probeData = RWProbesData[probeDataCoords]; + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); uint probeStateOld = probeState; float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE) + { probeOffset = float3(0, 0, 0); // Clear offset for a new probe + probeAttention = 1.0f; // Wake-up + } float3 probeOffsetOld = probeOffset; float3 probePosition = probeBasePosition + probeOffset; @@ -166,11 +176,24 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable it probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; } else { - // Relocate only if probe location is not good enough + // Apply distance/view heuristics to probe attention probeState = DDGI_PROBE_STATE_ACTIVE; + float3 viewToProbe = probePosition - GBuffer.ViewPos; + float distanceToProbe = length(viewToProbe); + viewToProbe /= distanceToProbe; + float probeViewDot = dot(viewToProbe, ViewDir); + probeAttention *= lerp(0.1f, 1.0f, saturate(probeViewDot)); // Reduce quality for probes behind the camera (or away from view dir) + probeAttention *= lerp(1.0f, 0.5f, saturate(sdfDst / voxelLimit)); // Reduce quality for probes far away from geometry + probeAttention += (1.0f - saturate(distanceToProbe / 1000.0f)) * 1.2f; // Boost quality for probes nearby view + //probeAttention = 0.0f; // Debug test lowest ray count + //probeAttention = 1.0f; // Debug test highest ray count + probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX); + + // Relocate only if probe location is not good enough if (sdf <= voxelLimit) { #if DDGI_PROBE_RELOCATE_ITERATIVE @@ -222,6 +245,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable probe that is too close to the geometry probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; } else { @@ -232,6 +256,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) // Disable probe probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; + probeAttention = 0.0f; #endif } } @@ -254,12 +279,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) } #endif if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE) + { probeState = DDGI_PROBE_STATE_ACTIVATED; + probeAttention = 1.0f; + } } // Save probe state probeOffset /= probesSpacing; // Move offset back to [-1;1] space - RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState); + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState, probeAttention); // Collect active probes if (probeState != DDGI_PROBE_STATE_INACTIVE) @@ -282,7 +310,7 @@ META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1, 1, 1)] void CS_UpdateProbesInitArgs() { - uint activeProbesCount = ActiveProbes.Load(0); + uint activeProbesCount = ActiveProbes.Load(0); // Counter at 0 uint arg = 0; for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT) { @@ -298,6 +326,9 @@ void CS_UpdateProbesInitArgs() #ifdef _CS_TraceRays RWTexture2D RWProbesTrace : register(u0); +#if DDGI_DEBUG_STATS +RWByteAddressBuffer RWStats : register(u1); +#endif Texture3D GlobalSDFTex : register(t0); Texture3D GlobalSDFMip : register(t1); @@ -326,12 +357,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Load current probe state and position float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); - uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); + uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention); if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount) return; // Skip disabled probes or if current thread's ray is unused float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords); float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); + // TODO: implement ray-guiding based on the probe irradiance (prioritize directions with high luminance) // Trace ray with Global SDF GlobalSDFTrace trace; @@ -370,6 +403,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Write into probes trace results RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance; + +#if DDGI_DEBUG_STATS + // Update stats + uint tmp; + RWStats.InterlockedAdd(0, 1, tmp); + if (rayIndex == 0) + RWStats.InterlockedAdd(4, 1, tmp); +#endif } #endif @@ -380,6 +421,44 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Update irradiance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT]; +groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION]; + +// Source: https://github.com/turanszkij/WickedEngine +#define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4) +static const uint4 BorderOffsets[BorderOffsetsSize] = { + uint4(6, 1, 1, 0), + uint4(5, 1, 2, 0), + uint4(4, 1, 3, 0), + uint4(3, 1, 4, 0), + uint4(2, 1, 5, 0), + uint4(1, 1, 6, 0), + + uint4(6, 6, 1, 7), + uint4(5, 6, 2, 7), + uint4(4, 6, 3, 7), + uint4(3, 6, 4, 7), + uint4(2, 6, 5, 7), + uint4(1, 6, 6, 7), + + uint4(1, 1, 0, 6), + uint4(1, 2, 0, 5), + uint4(1, 3, 0, 4), + uint4(1, 4, 0, 3), + uint4(1, 5, 0, 2), + uint4(1, 6, 0, 1), + + uint4(6, 1, 7, 6), + uint4(6, 2, 7, 5), + uint4(6, 3, 7, 4), + uint4(6, 4, 7, 3), + uint4(6, 5, 7, 2), + uint4(6, 6, 7, 1), + + uint4(1, 1, 7, 7), + uint4(6, 1, 0, 7), + uint4(1, 6, 7, 0), + uint4(6, 6, 0, 0), +}; #else // Update distance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE @@ -389,7 +468,14 @@ groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT]; groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT]; RWTexture2D RWOutput : register(u0); +#if DDGI_PROBE_UPDATE_MODE == 0 +RWTexture2D RWProbesData : register(u1); +#if DDGI_DEBUG_INSTABILITY +RWTexture2D RWOutputInstability : register(u2); +#endif +#else Texture2D ProbesData : register(t0); +#endif Texture2D ProbesTrace : register(t1); ByteAddressBuffer ActiveProbes : register(t2); @@ -407,13 +493,16 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); - // Skip disabled probes - bool skip = false; + // Load probe data +#if DDGI_PROBE_UPDATE_MODE == 0 + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); + float4 probeData = RWProbesData[probeDataCoords]; +#else float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); +#endif + float probeAttention = DecodeDDGIProbeAttention(probeData); uint probeState = DecodeDDGIProbeState(probeData); - uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); - if (probeState == DDGI_PROBE_STATE_INACTIVE) - skip = true; + uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention); #if DDGI_PROBE_UPDATE_MODE == 0 uint backfacesCount = 0; @@ -423,30 +512,23 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ float distanceLimit = probesSpacing * 1.5f; #endif - BRANCH - if (!skip) + // Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread) + uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION))); + uint raysStart = GroupIndex * raysCount; + raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart; + for (uint i = 0; i < raysCount; i++) { - // Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread) - uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION))); - uint raysStart = GroupIndex * raysCount; - raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart; - for (uint i = 0; i < raysCount; i++) - { - uint rayIndex = raysStart + i; + uint rayIndex = raysStart + i; #if DDGI_PROBE_UPDATE_MODE == 0 - CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)]; + CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)]; #else - float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; - CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); + float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; + CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); #endif - CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); - } + CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); } GroupMemoryBarrierWithGroupSync(); - if (skip) - return; probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); - uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy; // Calculate octahedral projection for probe (unwraps spherical projection into a square) float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION); @@ -495,30 +577,52 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ result.rgb *= 1.0f / (2.0f * max(result.a, epsilon)); // Load current probe value + uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy; float3 previous = RWOutput[outputCoords].rgb; - bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED; - if (ResetBlend || wasActivated) - previous = float3(0, 0, 0); + bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED || ResetBlend; + if (wasActivated) + previous = result.rgb; + +#if DDGI_PROBE_UPDATE_MODE == 0 + // Calculate instability of the irradiance + float previousLuma = Luminance(previous.rgb); + float resultLuma = Luminance(result.rgb); + float instability = abs(previousLuma - resultLuma) / previousLuma; // Percentage change in luminance of irradiance + instability = max(instability, Max3(abs(result.rgb - previous) / previous)); // Percentage of color delta change of irradiance + //instability *= saturate(result.a); // Reduce instability in areas with a small ray-coverage + //instability = pow(instability, 1.2f); // Increase contrast + instability *= 2.0f; // Make it stronger on scene changes + //instability = saturate(instability); + OutputInstability[GroupIndex] = instability; +#if DDGI_DEBUG_INSTABILITY + RWOutputInstability[outputCoords] = instability; + //RWOutputInstability[outputCoords] = probeAttention; // Debug test probe attention visualization +#endif +#endif // Blend current value with the previous probe data - float historyWeight = DDGI.ProbeHistoryWeight; - //historyWeight = 1.0f; - //historyWeight = 0.0f; - if (ResetBlend || wasActivated) - historyWeight = 0.0f; + float historyWeightFast = DDGI.ProbeHistoryWeight; + float historyWeightSlow = 0.97f; #if DDGI_PROBE_UPDATE_MODE == 0 - result *= DDGI.IndirectLightingIntensity; -#if DDGI_SRGB_BLENDING - result.rgb = pow(result.rgb, 1.0f / DDGI.IrradianceGamma); -#endif float3 irradianceDelta = result.rgb - previous; float irradianceDeltaMax = Max3(abs(irradianceDelta)); float irradianceDeltaLen = length(irradianceDelta); if (irradianceDeltaMax > 0.5f) { // Reduce history weight after significant lighting change - historyWeight = historyWeight * 0.5f; + historyWeightFast *= 0.5f; } +#endif + float historyWeight = lerp(historyWeightSlow, historyWeightFast, probeAttention * probeAttention * probeAttention); + //historyWeight = 1.0f; // Debug full-blend + //historyWeight = 0.0f; // Debug no-blend + if (wasActivated) + historyWeight = 0.0f; +#if DDGI_PROBE_UPDATE_MODE == 0 + result *= DDGI.IndirectLightingIntensity; +#if DDGI_SRGB_BLENDING + result.rgb = pow(max(result.rgb, 0), 1.0f / DDGI.IrradianceGamma); +#endif if (irradianceDeltaLen > 2.0f) { // Reduce flickering during rapid brightness changes @@ -530,6 +634,45 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ #endif RWOutput[outputCoords] = result; + +#if DDGI_PROBE_UPDATE_MODE == 0 + // The first thread updates the probe attention based on the instability of all texels + GroupMemoryBarrierWithGroupSync(); + BRANCH + if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE) + { + // Calculate instability statistics for a whole probe + float instabilityAvg = 0; + for (uint i = 0; i < DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION; i++) + instabilityAvg += OutputInstability[i]; + instabilityAvg *= 1.0f / float(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION); + instabilityAvg = saturate(instabilityAvg); + instability = instabilityAvg; + + // Calculate probe attention + float taregAttention = lerp(0.5f, DDGI_PROBE_ATTENTION_MAX, instability); // Use some base level + if (taregAttention >= probeAttention) + probeAttention = taregAttention; // Quick jump up + else + probeAttention = lerp(probeAttention, taregAttention, 0.2f); // Slow blend down + if (probeState == DDGI_PROBE_STATE_ACTIVATED) + probeAttention = DDGI_PROBE_ATTENTION_MAX; + + // Update probe data for the next frame + probeState = DDGI_PROBE_STATE_ACTIVE; + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeData.xyz, probeState, probeAttention); + } + +#if DDGI_DEBUG_INSTABILITY + // Copy border pixels + uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); + for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION) + { + uint4 borderOffsets = BorderOffsets[borderIndex]; + RWOutputInstability[baseCoords + borderOffsets.zw] = RWOutputInstability[baseCoords + borderOffsets.xy]; + } +#endif +#endif } // Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering)