Optimize and refactor DDGI to use linear attention per-probe to dynamically control ray count and blend speed

Probes will use lower ray count when behind the camera or not correlated with the view direction or far from geometry.
Probes nearby camera or with high instability in irradiance will maintain higher ray count.
Probes that use less rays will have slower blending to reduce artifacts.
Added probe instability, attention and stats debugging for devs.
This commit is contained in:
Wojtek Figat
2024-07-18 08:38:23 +02:00
parent ee02aa394a
commit aeff147b6d
3 changed files with 310 additions and 51 deletions

View File

@@ -40,6 +40,19 @@
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
#if DDGI_DEBUG_STATS
#include "Engine/Core/Collections/SamplesBuffer.h"
#define DDGI_DEBUG_STATS_FRAMES 60
struct StatsData
{
uint32 RaysCount;
uint32 ProbesCount;
};
#endif
GPU_CB_STRUCT(Data0 { GPU_CB_STRUCT(Data0 {
DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI;
@@ -52,11 +65,13 @@ GPU_CB_STRUCT(Data0 {
float ResetBlend; float ResetBlend;
float TemporalTime; float TemporalTime;
Int4 ProbeScrollClears[4]; Int4 ProbeScrollClears[4];
Float3 ViewDir;
float Padding1;
}); });
GPU_CB_STRUCT(Data1 { GPU_CB_STRUCT(Data1 {
// TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD
Float2 Padding1; Float2 Padding2;
uint32 CascadeIndex; uint32 CascadeIndex;
uint32 ProbeIndexOffset; uint32 ProbeIndexOffset;
}); });
@@ -84,11 +99,21 @@ public:
int32 ProbesCountTotal = 0; int32 ProbesCountTotal = 0;
Int3 ProbeCounts = Int3::Zero; Int3 ProbeCounts = Int3::Zero;
GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance) GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance)
GPUTexture* ProbesData = nullptr; // Probes data: (RGB: world-space offset, A: state/data) GPUTexture* ProbesData = nullptr; // Probes data: (RGB: probe-space offset, A: state/data)
GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color) GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color)
GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2) GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2)
GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0 GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0
GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend) GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend)
#if DDGI_DEBUG_STATS
GPUBuffer* StatsWrite = nullptr;
GPUBuffer* StatsRead = nullptr;
SamplesBuffer<uint32, DDGI_DEBUG_STATS_FRAMES> StatsProbes;
SamplesBuffer<uint32, DDGI_DEBUG_STATS_FRAMES> StatsRays;
uint32 StatsFrames = 0;
#endif
#if DDGI_DEBUG_INSTABILITY
GPUTexture* ProbesInstability = nullptr;
#endif
DynamicDiffuseGlobalIlluminationPass::BindingData Result; DynamicDiffuseGlobalIlluminationPass::BindingData Result;
FORCE_INLINE void Release() FORCE_INLINE void Release()
@@ -99,6 +124,16 @@ public:
RenderTargetPool::Release(ProbesDistance); RenderTargetPool::Release(ProbesDistance);
SAFE_DELETE_GPU_RESOURCE(ActiveProbes); SAFE_DELETE_GPU_RESOURCE(ActiveProbes);
SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs); SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs);
#if DDGI_DEBUG_STATS
SAFE_DELETE_GPU_RESOURCE(StatsWrite);
SAFE_DELETE_GPU_RESOURCE(StatsRead);
StatsProbes.Clear();
StatsRays.Clear();
StatsFrames = 0;
#endif
#if DDGI_DEBUG_INSTABILITY
RenderTargetPool::Release(ProbesInstability);
#endif
} }
~DDGICustomBuffer() ~DDGICustomBuffer()
@@ -373,12 +408,21 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY); INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY);
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
#if DDGI_DEBUG_INSTABILITY
INIT_TEXTURE(ProbesInstability, PixelFormat::R16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
#endif
#undef INIT_TEXTURE #undef INIT_TEXTURE
#define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage(); #define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage();
GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess); GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess);
INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes"); INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes");
desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)); desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32));
INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs"); INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs");
#if DDGI_DEBUG_STATS
desc2 = GPUBufferDescription::Raw(sizeof(StatsData), GPUBufferFlags::UnorderedAccess);
INIT_BUFFER(StatsWrite, "DDGI.StatsWrite");
desc2 = desc2.ToStagingReadback();
INIT_BUFFER(StatsRead, "DDGI.StatsRead");
#endif
#undef INIT_BUFFER #undef INIT_BUFFER
LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024)); LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024));
clear = true; clear = true;
@@ -393,6 +437,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
context->ClearUA(ddgiData.ProbesData, Float4::Zero); context->ClearUA(ddgiData.ProbesData, Float4::Zero);
context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero); context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero);
context->ClearUA(ddgiData.ProbesDistance, Float4::Zero); context->ClearUA(ddgiData.ProbesDistance, Float4::Zero);
#if DDGI_DEBUG_INSTABILITY
context->ClearUA(ddgiData.ProbesInstability, Float4::Zero);
#endif
} }
ddgiData.LastFrameUsed = Engine::FrameCount; ddgiData.LastFrameUsed = Engine::FrameCount;
@@ -486,6 +533,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0); data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0);
} }
data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f;
data.ViewDir = renderContext.View.Direction;
GBufferPass::SetInputs(renderContext.View, data.GBuffer); GBufferPass::SetInputs(renderContext.View, data.GBuffer);
context->UpdateCB(_cb0, &data); context->UpdateCB(_cb0, &data);
context->BindCB(0, _cb0); context->BindCB(0, _cb0);
@@ -496,6 +544,10 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
PROFILE_GPU_CPU_NAMED("Probes Update"); PROFILE_GPU_CPU_NAMED("Probes Update");
bool anyDirty = false; bool anyDirty = false;
uint32 threadGroupsX, threadGroupsY; uint32 threadGroupsX, threadGroupsY;
#if DDGI_DEBUG_STATS
uint32 zero[4] = {};
context->ClearUA(ddgiData.StatsWrite, zero);
#endif
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{ {
if (cascadeSkipUpdate[cascadeIndex]) if (cascadeSkipUpdate[cascadeIndex])
@@ -556,6 +608,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
context->BindSR(8, skybox); context->BindSR(8, skybox);
context->BindSR(9, ddgiData.ActiveProbes->View()); context->BindSR(9, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.ProbesTrace->View()); context->BindUA(0, ddgiData.ProbesTrace->View());
#if DDGI_DEBUG_STATS
context->BindUA(1, ddgiData.StatsWrite->View());
#endif
context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg); context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg);
context->ResetUA(); context->ResetUA();
context->ResetSR(); context->ResetSR();
@@ -564,21 +619,55 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
// Update probes irradiance and distance textures (one thread-group per probe) // Update probes irradiance and distance textures (one thread-group per probe)
{ {
PROFILE_GPU_CPU_NAMED("Update Probes"); PROFILE_GPU_CPU_NAMED("Update Probes");
// Distance
context->BindSR(0, ddgiData.Result.ProbesData); context->BindSR(0, ddgiData.Result.ProbesData);
context->BindSR(1, ddgiData.ProbesTrace->View()); context->BindSR(1, ddgiData.ProbesTrace->View());
context->BindSR(2, ddgiData.ActiveProbes->View()); context->BindSR(2, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg);
context->BindUA(0, ddgiData.Result.ProbesDistance); context->BindUA(0, ddgiData.Result.ProbesDistance);
context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg); context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg);
context->ResetUA(); context->ResetUA();
context->ResetSR(); context->ResetSR();
// Irradiance
context->BindSR(1, ddgiData.ProbesTrace->View());
context->BindSR(2, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
context->BindUA(1, ddgiData.Result.ProbesData);
#if DDGI_DEBUG_INSTABILITY
context->BindUA(2, ddgiData.ProbesInstability->View());
#endif
context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg);
context->ResetUA();
context->ResetSR();
} }
arg += sizeof(GPUDispatchIndirectArgs); arg += sizeof(GPUDispatchIndirectArgs);
} }
} }
#if DDGI_DEBUG_STATS
// Update stats
{
StatsData stats;
if (void* mapped = ddgiData.StatsRead->Map(GPUResourceMapMode::Read))
{
Platform::MemoryCopy(&stats, mapped, sizeof(stats));
ddgiData.StatsRead->Unmap();
ddgiData.StatsProbes.Add(stats.ProbesCount);
ddgiData.StatsRays.Add(stats.RaysCount);
}
context->CopyBuffer(ddgiData.StatsRead, ddgiData.StatsWrite, sizeof(stats));
if (++ddgiData.StatsFrames >= DDGI_DEBUG_STATS_FRAMES)
{
ddgiData.StatsFrames = 0;
stats.ProbesCount = ddgiData.StatsProbes.Average();
stats.RaysCount = ddgiData.StatsRays.Average();
LOG(Info, "DDGI active probes: {}, traced rays: {} per frame, rays per probe: {}", stats.ProbesCount, stats.RaysCount, stats.ProbesCount > 0 ? stats.RaysCount / stats.ProbesCount : 0);
}
}
#endif
// Update probes border pixels // Update probes border pixels
if (anyDirty) if (anyDirty)
{ {
@@ -718,7 +807,11 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
{ {
// Pass DDGI data to the material // Pass DDGI data to the material
_debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData)); _debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData));
#if DDGI_DEBUG_INSTABILITY
_debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesInstability));
#else
_debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance)); _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance));
#endif
_debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance)); _debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance));
auto cb = _debugMaterial->GetShader()->GetCB(3); auto cb = _debugMaterial->GetShader()->GetCB(3);
if (cb) if (cb)

View File

@@ -16,6 +16,8 @@
#define DDGI_PROBE_STATE_INACTIVE 0 #define DDGI_PROBE_STATE_INACTIVE 0
#define DDGI_PROBE_STATE_ACTIVATED 1 #define DDGI_PROBE_STATE_ACTIVATED 1
#define DDGI_PROBE_STATE_ACTIVE 2 #define DDGI_PROBE_STATE_ACTIVE 2
#define DDGI_PROBE_ATTENTION_MIN 0.02f // Minimum probe attention value that still makes it active.
#define DDGI_PROBE_ATTENTION_MAX 0.98f // Maximum probe attention value that still makes it active (but not activated which is 1.0f).
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens #define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens
@@ -99,15 +101,36 @@ float4 LoadDDGIProbeData(DDGIData data, Texture2D<snorm float4> probesData, uint
} }
// Encodes probe probe data // Encodes probe probe data
float4 EncodeDDGIProbeData(float3 probeOffset, uint probeState) float4 EncodeDDGIProbeData(float3 offset, uint state, float attention)
{ {
return float4(probeOffset, (float)probeState * (1.0f / 8.0f)); // [0;1] -> [-1;1]
attention = saturate(attention) * 2.0f - 1.0f;
if (state == DDGI_PROBE_STATE_INACTIVE)
attention = -1.0f;
else if (state == DDGI_PROBE_STATE_ACTIVATED)
attention = 1.0f;
return float4(offset, attention);
}
// Decodes probe attention value from the encoded state
float DecodeDDGIProbeAttention(float4 probeData)
{
// [-1;1] -> [0;1]
if (probeData.w <= -1.0f)
return 0.0f;
if (probeData.w >= 1.0f)
return 1.0f;
return probeData.w * 0.5f + 0.5f;
} }
// Decodes probe state from the encoded state // Decodes probe state from the encoded state
uint DecodeDDGIProbeState(float4 probeData) uint DecodeDDGIProbeState(float4 probeData)
{ {
return (uint)(probeData.w * 8.0f); if (probeData.w <= -1.0f)
return DDGI_PROBE_STATE_INACTIVE;
if (probeData.w >= 1.0f)
return DDGI_PROBE_STATE_ACTIVATED;
return DDGI_PROBE_STATE_ACTIVE;
} }
// Decodes probe world-space position (XYZ) from the encoded state // Decodes probe world-space position (XYZ) from the encoded state

View File

@@ -20,11 +20,14 @@
// This must match C++ // This must match C++
#define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending #define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending
#define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller) #define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_TRACE_RAYS_MIN 16 // Minimum amount of rays to shoot for sleepy probes
#define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit #define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
#define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position
#define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
META_CB_BEGIN(0, Data0) META_CB_BEGIN(0, Data0)
DDGIData DDGI; DDGIData DDGI;
@@ -37,10 +40,12 @@ uint ProbesCount;
float ResetBlend; float ResetBlend;
float TemporalTime; float TemporalTime;
int4 ProbeScrollClears[4]; int4 ProbeScrollClears[4];
float3 ViewDir;
float Padding1;
META_CB_END META_CB_END
META_CB_BEGIN(1, Data1) META_CB_BEGIN(1, Data1)
float2 Padding1; float2 Padding2;
uint CascadeIndex; uint CascadeIndex;
uint ProbeIndexOffset; uint ProbeIndexOffset;
META_CB_END META_CB_END
@@ -73,10 +78,11 @@ float3 GetProbeRayDirection(DDGIData data, uint rayIndex, uint raysCount, uint p
} }
// Calculates amount of rays to allocate for a probe // Calculates amount of rays to allocate for a probe
uint GetProbeRaysCount(DDGIData data, uint probeState) uint GetProbeRaysCount(DDGIData data, float probeAttention)
{ {
// TODO: implement variable ray count based on probe location relative to the view frustum (use probe state for storage) //return data.RaysCount;
return data.RaysCount; probeAttention = saturate((probeAttention - DDGI_PROBE_ATTENTION_MIN) / (DDGI_PROBE_ATTENTION_MAX - DDGI_PROBE_ATTENTION_MIN));
return DDGI_TRACE_RAYS_MIN + (uint)max(probeAttention * (float)(data.RaysCount - DDGI_TRACE_RAYS_MIN), 0.0f);
} }
#ifdef _CS_Classify #ifdef _CS_Classify
@@ -118,7 +124,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
if (prevCascadeWeight > 0.1f) if (prevCascadeWeight > 0.1f)
{ {
// Disable probe // Disable probe
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE); RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f);
return; return;
} }
} }
@@ -140,11 +146,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
// Load probe state and position // Load probe state and position
float4 probeData = RWProbesData[probeDataCoords]; float4 probeData = RWProbesData[probeDataCoords];
float probeAttention = DecodeDDGIProbeAttention(probeData);
uint probeState = DecodeDDGIProbeState(probeData); uint probeState = DecodeDDGIProbeState(probeData);
uint probeStateOld = probeState; uint probeStateOld = probeState;
float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing
if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE) if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE)
{
probeOffset = float3(0, 0, 0); // Clear offset for a new probe probeOffset = float3(0, 0, 0); // Clear offset for a new probe
probeAttention = 1.0f; // Wake-up
}
float3 probeOffsetOld = probeOffset; float3 probeOffsetOld = probeOffset;
float3 probePosition = probeBasePosition + probeOffset; float3 probePosition = probeBasePosition + probeOffset;
@@ -166,11 +176,24 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
// Disable it // Disable it
probeOffset = float3(0, 0, 0); probeOffset = float3(0, 0, 0);
probeState = DDGI_PROBE_STATE_INACTIVE; probeState = DDGI_PROBE_STATE_INACTIVE;
probeAttention = 0.0f;
} }
else else
{ {
// Relocate only if probe location is not good enough // Apply distance/view heuristics to probe attention
probeState = DDGI_PROBE_STATE_ACTIVE; probeState = DDGI_PROBE_STATE_ACTIVE;
float3 viewToProbe = probePosition - GBuffer.ViewPos;
float distanceToProbe = length(viewToProbe);
viewToProbe /= distanceToProbe;
float probeViewDot = dot(viewToProbe, ViewDir);
probeAttention *= lerp(0.1f, 1.0f, saturate(probeViewDot)); // Reduce quality for probes behind the camera (or away from view dir)
probeAttention *= lerp(1.0f, 0.5f, saturate(sdfDst / voxelLimit)); // Reduce quality for probes far away from geometry
probeAttention += (1.0f - saturate(distanceToProbe / 1000.0f)) * 1.2f; // Boost quality for probes nearby view
//probeAttention = 0.0f; // Debug test lowest ray count
//probeAttention = 1.0f; // Debug test highest ray count
probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX);
// Relocate only if probe location is not good enough
if (sdf <= voxelLimit) if (sdf <= voxelLimit)
{ {
#if DDGI_PROBE_RELOCATE_ITERATIVE #if DDGI_PROBE_RELOCATE_ITERATIVE
@@ -222,6 +245,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
// Disable probe that is too close to the geometry // Disable probe that is too close to the geometry
probeOffset = float3(0, 0, 0); probeOffset = float3(0, 0, 0);
probeState = DDGI_PROBE_STATE_INACTIVE; probeState = DDGI_PROBE_STATE_INACTIVE;
probeAttention = 0.0f;
} }
else else
{ {
@@ -232,6 +256,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
// Disable probe // Disable probe
probeOffset = float3(0, 0, 0); probeOffset = float3(0, 0, 0);
probeState = DDGI_PROBE_STATE_INACTIVE; probeState = DDGI_PROBE_STATE_INACTIVE;
probeAttention = 0.0f;
#endif #endif
} }
} }
@@ -254,12 +279,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
} }
#endif #endif
if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE) if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE)
{
probeState = DDGI_PROBE_STATE_ACTIVATED; probeState = DDGI_PROBE_STATE_ACTIVATED;
probeAttention = 1.0f;
}
} }
// Save probe state // Save probe state
probeOffset /= probesSpacing; // Move offset back to [-1;1] space probeOffset /= probesSpacing; // Move offset back to [-1;1] space
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState); RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState, probeAttention);
// Collect active probes // Collect active probes
if (probeState != DDGI_PROBE_STATE_INACTIVE) if (probeState != DDGI_PROBE_STATE_INACTIVE)
@@ -282,7 +310,7 @@ META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(1, 1, 1)] [numthreads(1, 1, 1)]
void CS_UpdateProbesInitArgs() void CS_UpdateProbesInitArgs()
{ {
uint activeProbesCount = ActiveProbes.Load(0); uint activeProbesCount = ActiveProbes.Load(0); // Counter at 0
uint arg = 0; uint arg = 0;
for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT) for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)
{ {
@@ -298,6 +326,9 @@ void CS_UpdateProbesInitArgs()
#ifdef _CS_TraceRays #ifdef _CS_TraceRays
RWTexture2D<float4> RWProbesTrace : register(u0); RWTexture2D<float4> RWProbesTrace : register(u0);
#if DDGI_DEBUG_STATS
RWByteAddressBuffer RWStats : register(u1);
#endif
Texture3D<snorm float> GlobalSDFTex : register(t0); Texture3D<snorm float> GlobalSDFTex : register(t0);
Texture3D<snorm float> GlobalSDFMip : register(t1); Texture3D<snorm float> GlobalSDFMip : register(t1);
@@ -326,12 +357,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
// Load current probe state and position // Load current probe state and position
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
float probeAttention = DecodeDDGIProbeAttention(probeData);
uint probeState = DecodeDDGIProbeState(probeData); uint probeState = DecodeDDGIProbeState(probeData);
uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention);
if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount) if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount)
return; // Skip disabled probes or if current thread's ray is unused return; // Skip disabled probes or if current thread's ray is unused
float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords); float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords);
float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
// TODO: implement ray-guiding based on the probe irradiance (prioritize directions with high luminance)
// Trace ray with Global SDF // Trace ray with Global SDF
GlobalSDFTrace trace; GlobalSDFTrace trace;
@@ -370,6 +403,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
// Write into probes trace results // Write into probes trace results
RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance; RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance;
#if DDGI_DEBUG_STATS
// Update stats
uint tmp;
RWStats.InterlockedAdd(0, 1, tmp);
if (rayIndex == 0)
RWStats.InterlockedAdd(4, 1, tmp);
#endif
} }
#endif #endif
@@ -380,6 +421,44 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
// Update irradiance // Update irradiance
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE
groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT]; groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT];
groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION];
// Source: https://github.com/turanszkij/WickedEngine
#define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4)
static const uint4 BorderOffsets[BorderOffsetsSize] = {
uint4(6, 1, 1, 0),
uint4(5, 1, 2, 0),
uint4(4, 1, 3, 0),
uint4(3, 1, 4, 0),
uint4(2, 1, 5, 0),
uint4(1, 1, 6, 0),
uint4(6, 6, 1, 7),
uint4(5, 6, 2, 7),
uint4(4, 6, 3, 7),
uint4(3, 6, 4, 7),
uint4(2, 6, 5, 7),
uint4(1, 6, 6, 7),
uint4(1, 1, 0, 6),
uint4(1, 2, 0, 5),
uint4(1, 3, 0, 4),
uint4(1, 4, 0, 3),
uint4(1, 5, 0, 2),
uint4(1, 6, 0, 1),
uint4(6, 1, 7, 6),
uint4(6, 2, 7, 5),
uint4(6, 3, 7, 4),
uint4(6, 4, 7, 3),
uint4(6, 5, 7, 2),
uint4(6, 6, 7, 1),
uint4(1, 1, 7, 7),
uint4(6, 1, 0, 7),
uint4(1, 6, 7, 0),
uint4(6, 6, 0, 0),
};
#else #else
// Update distance // Update distance
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE
@@ -389,7 +468,14 @@ groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT];
groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT]; groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT];
RWTexture2D<float4> RWOutput : register(u0); RWTexture2D<float4> RWOutput : register(u0);
#if DDGI_PROBE_UPDATE_MODE == 0
RWTexture2D<snorm float4> RWProbesData : register(u1);
#if DDGI_DEBUG_INSTABILITY
RWTexture2D<float> RWOutputInstability : register(u2);
#endif
#else
Texture2D<snorm float4> ProbesData : register(t0); Texture2D<snorm float4> ProbesData : register(t0);
#endif
Texture2D<float4> ProbesTrace : register(t1); Texture2D<float4> ProbesTrace : register(t1);
ByteAddressBuffer ActiveProbes : register(t2); ByteAddressBuffer ActiveProbes : register(t2);
@@ -407,13 +493,16 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
// Skip disabled probes // Load probe data
bool skip = false; #if DDGI_PROBE_UPDATE_MODE == 0
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex);
float4 probeData = RWProbesData[probeDataCoords];
#else
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex); float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
#endif
float probeAttention = DecodeDDGIProbeAttention(probeData);
uint probeState = DecodeDDGIProbeState(probeData); uint probeState = DecodeDDGIProbeState(probeData);
uint probeRaysCount = GetProbeRaysCount(DDGI, probeState); uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention);
if (probeState == DDGI_PROBE_STATE_INACTIVE)
skip = true;
#if DDGI_PROBE_UPDATE_MODE == 0 #if DDGI_PROBE_UPDATE_MODE == 0
uint backfacesCount = 0; uint backfacesCount = 0;
@@ -423,30 +512,23 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
float distanceLimit = probesSpacing * 1.5f; float distanceLimit = probesSpacing * 1.5f;
#endif #endif
BRANCH // Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread)
if (!skip) uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)));
uint raysStart = GroupIndex * raysCount;
raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart;
for (uint i = 0; i < raysCount; i++)
{ {
// Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread) uint rayIndex = raysStart + i;
uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)));
uint raysStart = GroupIndex * raysCount;
raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart;
for (uint i = 0; i < raysCount; i++)
{
uint rayIndex = raysStart + i;
#if DDGI_PROBE_UPDATE_MODE == 0 #if DDGI_PROBE_UPDATE_MODE == 0
CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)]; CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)];
#else #else
float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w; float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w;
CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit); CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit);
#endif #endif
CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords); CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
}
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
if (skip)
return;
probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy;
// Calculate octahedral projection for probe (unwraps spherical projection into a square) // Calculate octahedral projection for probe (unwraps spherical projection into a square)
float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION); float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION);
@@ -495,30 +577,52 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
result.rgb *= 1.0f / (2.0f * max(result.a, epsilon)); result.rgb *= 1.0f / (2.0f * max(result.a, epsilon));
// Load current probe value // Load current probe value
uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy;
float3 previous = RWOutput[outputCoords].rgb; float3 previous = RWOutput[outputCoords].rgb;
bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED; bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED || ResetBlend;
if (ResetBlend || wasActivated) if (wasActivated)
previous = float3(0, 0, 0); previous = result.rgb;
#if DDGI_PROBE_UPDATE_MODE == 0
// Calculate instability of the irradiance
float previousLuma = Luminance(previous.rgb);
float resultLuma = Luminance(result.rgb);
float instability = abs(previousLuma - resultLuma) / previousLuma; // Percentage change in luminance of irradiance
instability = max(instability, Max3(abs(result.rgb - previous) / previous)); // Percentage of color delta change of irradiance
//instability *= saturate(result.a); // Reduce instability in areas with a small ray-coverage
//instability = pow(instability, 1.2f); // Increase contrast
instability *= 2.0f; // Make it stronger on scene changes
//instability = saturate(instability);
OutputInstability[GroupIndex] = instability;
#if DDGI_DEBUG_INSTABILITY
RWOutputInstability[outputCoords] = instability;
//RWOutputInstability[outputCoords] = probeAttention; // Debug test probe attention visualization
#endif
#endif
// Blend current value with the previous probe data // Blend current value with the previous probe data
float historyWeight = DDGI.ProbeHistoryWeight; float historyWeightFast = DDGI.ProbeHistoryWeight;
//historyWeight = 1.0f; float historyWeightSlow = 0.97f;
//historyWeight = 0.0f;
if (ResetBlend || wasActivated)
historyWeight = 0.0f;
#if DDGI_PROBE_UPDATE_MODE == 0 #if DDGI_PROBE_UPDATE_MODE == 0
result *= DDGI.IndirectLightingIntensity;
#if DDGI_SRGB_BLENDING
result.rgb = pow(result.rgb, 1.0f / DDGI.IrradianceGamma);
#endif
float3 irradianceDelta = result.rgb - previous; float3 irradianceDelta = result.rgb - previous;
float irradianceDeltaMax = Max3(abs(irradianceDelta)); float irradianceDeltaMax = Max3(abs(irradianceDelta));
float irradianceDeltaLen = length(irradianceDelta); float irradianceDeltaLen = length(irradianceDelta);
if (irradianceDeltaMax > 0.5f) if (irradianceDeltaMax > 0.5f)
{ {
// Reduce history weight after significant lighting change // Reduce history weight after significant lighting change
historyWeight = historyWeight * 0.5f; historyWeightFast *= 0.5f;
} }
#endif
float historyWeight = lerp(historyWeightSlow, historyWeightFast, probeAttention * probeAttention * probeAttention);
//historyWeight = 1.0f; // Debug full-blend
//historyWeight = 0.0f; // Debug no-blend
if (wasActivated)
historyWeight = 0.0f;
#if DDGI_PROBE_UPDATE_MODE == 0
result *= DDGI.IndirectLightingIntensity;
#if DDGI_SRGB_BLENDING
result.rgb = pow(max(result.rgb, 0), 1.0f / DDGI.IrradianceGamma);
#endif
if (irradianceDeltaLen > 2.0f) if (irradianceDeltaLen > 2.0f)
{ {
// Reduce flickering during rapid brightness changes // Reduce flickering during rapid brightness changes
@@ -530,6 +634,45 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
#endif #endif
RWOutput[outputCoords] = result; RWOutput[outputCoords] = result;
#if DDGI_PROBE_UPDATE_MODE == 0
// The first thread updates the probe attention based on the instability of all texels
GroupMemoryBarrierWithGroupSync();
BRANCH
if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE)
{
// Calculate instability statistics for a whole probe
float instabilityAvg = 0;
for (uint i = 0; i < DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION; i++)
instabilityAvg += OutputInstability[i];
instabilityAvg *= 1.0f / float(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION);
instabilityAvg = saturate(instabilityAvg);
instability = instabilityAvg;
// Calculate probe attention
float taregAttention = lerp(0.5f, DDGI_PROBE_ATTENTION_MAX, instability); // Use some base level
if (taregAttention >= probeAttention)
probeAttention = taregAttention; // Quick jump up
else
probeAttention = lerp(probeAttention, taregAttention, 0.2f); // Slow blend down
if (probeState == DDGI_PROBE_STATE_ACTIVATED)
probeAttention = DDGI_PROBE_ATTENTION_MAX;
// Update probe data for the next frame
probeState = DDGI_PROBE_STATE_ACTIVE;
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeData.xyz, probeState, probeAttention);
}
#if DDGI_DEBUG_INSTABILITY
// Copy border pixels
uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2);
for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)
{
uint4 borderOffsets = BorderOffsets[borderIndex];
RWOutputInstability[baseCoords + borderOffsets.zw] = RWOutputInstability[baseCoords + borderOffsets.xy];
}
#endif
#endif
} }
// Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering) // Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering)