Optimize and refactor DDGI to use linear attention per-probe to dynamically control ray count and blend speed
Probes will use lower ray count when behind the camera or not correlated with the view direction or far from geometry. Probes nearby camera or with high instability in irradiance will maintain higher ray count. Probes that use less rays will have slower blending to reduce artifacts. Added probe instability, attention and stats debugging for devs.
This commit is contained in:
@@ -40,6 +40,19 @@
|
|||||||
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
|
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
|
||||||
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
|
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
|
||||||
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
|
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
|
||||||
|
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
|
||||||
|
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
|
||||||
|
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
#include "Engine/Core/Collections/SamplesBuffer.h"
|
||||||
|
#define DDGI_DEBUG_STATS_FRAMES 60
|
||||||
|
|
||||||
|
struct StatsData
|
||||||
|
{
|
||||||
|
uint32 RaysCount;
|
||||||
|
uint32 ProbesCount;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
GPU_CB_STRUCT(Data0 {
|
GPU_CB_STRUCT(Data0 {
|
||||||
DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI;
|
DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI;
|
||||||
@@ -52,11 +65,13 @@ GPU_CB_STRUCT(Data0 {
|
|||||||
float ResetBlend;
|
float ResetBlend;
|
||||||
float TemporalTime;
|
float TemporalTime;
|
||||||
Int4 ProbeScrollClears[4];
|
Int4 ProbeScrollClears[4];
|
||||||
|
Float3 ViewDir;
|
||||||
|
float Padding1;
|
||||||
});
|
});
|
||||||
|
|
||||||
GPU_CB_STRUCT(Data1 {
|
GPU_CB_STRUCT(Data1 {
|
||||||
// TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD
|
// TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD
|
||||||
Float2 Padding1;
|
Float2 Padding2;
|
||||||
uint32 CascadeIndex;
|
uint32 CascadeIndex;
|
||||||
uint32 ProbeIndexOffset;
|
uint32 ProbeIndexOffset;
|
||||||
});
|
});
|
||||||
@@ -84,11 +99,21 @@ public:
|
|||||||
int32 ProbesCountTotal = 0;
|
int32 ProbesCountTotal = 0;
|
||||||
Int3 ProbeCounts = Int3::Zero;
|
Int3 ProbeCounts = Int3::Zero;
|
||||||
GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance)
|
GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance)
|
||||||
GPUTexture* ProbesData = nullptr; // Probes data: (RGB: world-space offset, A: state/data)
|
GPUTexture* ProbesData = nullptr; // Probes data: (RGB: probe-space offset, A: state/data)
|
||||||
GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color)
|
GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color)
|
||||||
GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2)
|
GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2)
|
||||||
GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0
|
GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0
|
||||||
GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend)
|
GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend)
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
GPUBuffer* StatsWrite = nullptr;
|
||||||
|
GPUBuffer* StatsRead = nullptr;
|
||||||
|
SamplesBuffer<uint32, DDGI_DEBUG_STATS_FRAMES> StatsProbes;
|
||||||
|
SamplesBuffer<uint32, DDGI_DEBUG_STATS_FRAMES> StatsRays;
|
||||||
|
uint32 StatsFrames = 0;
|
||||||
|
#endif
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
GPUTexture* ProbesInstability = nullptr;
|
||||||
|
#endif
|
||||||
DynamicDiffuseGlobalIlluminationPass::BindingData Result;
|
DynamicDiffuseGlobalIlluminationPass::BindingData Result;
|
||||||
|
|
||||||
FORCE_INLINE void Release()
|
FORCE_INLINE void Release()
|
||||||
@@ -99,6 +124,16 @@ public:
|
|||||||
RenderTargetPool::Release(ProbesDistance);
|
RenderTargetPool::Release(ProbesDistance);
|
||||||
SAFE_DELETE_GPU_RESOURCE(ActiveProbes);
|
SAFE_DELETE_GPU_RESOURCE(ActiveProbes);
|
||||||
SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs);
|
SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs);
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
SAFE_DELETE_GPU_RESOURCE(StatsWrite);
|
||||||
|
SAFE_DELETE_GPU_RESOURCE(StatsRead);
|
||||||
|
StatsProbes.Clear();
|
||||||
|
StatsRays.Clear();
|
||||||
|
StatsFrames = 0;
|
||||||
|
#endif
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
RenderTargetPool::Release(ProbesInstability);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
~DDGICustomBuffer()
|
~DDGICustomBuffer()
|
||||||
@@ -373,12 +408,21 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
|||||||
INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY);
|
INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY);
|
||||||
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
|
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
|
||||||
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
|
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
INIT_TEXTURE(ProbesInstability, PixelFormat::R16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
|
||||||
|
#endif
|
||||||
#undef INIT_TEXTURE
|
#undef INIT_TEXTURE
|
||||||
#define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage();
|
#define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage();
|
||||||
GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess);
|
GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess);
|
||||||
INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes");
|
INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes");
|
||||||
desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32));
|
desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32));
|
||||||
INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs");
|
INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs");
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
desc2 = GPUBufferDescription::Raw(sizeof(StatsData), GPUBufferFlags::UnorderedAccess);
|
||||||
|
INIT_BUFFER(StatsWrite, "DDGI.StatsWrite");
|
||||||
|
desc2 = desc2.ToStagingReadback();
|
||||||
|
INIT_BUFFER(StatsRead, "DDGI.StatsRead");
|
||||||
|
#endif
|
||||||
#undef INIT_BUFFER
|
#undef INIT_BUFFER
|
||||||
LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024));
|
LOG(Info, "Dynamic Diffuse Global Illumination probes: {0}, memory usage: {1} MB", probesCountTotal, memUsage / (1024 * 1024));
|
||||||
clear = true;
|
clear = true;
|
||||||
@@ -393,6 +437,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
|||||||
context->ClearUA(ddgiData.ProbesData, Float4::Zero);
|
context->ClearUA(ddgiData.ProbesData, Float4::Zero);
|
||||||
context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero);
|
context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero);
|
||||||
context->ClearUA(ddgiData.ProbesDistance, Float4::Zero);
|
context->ClearUA(ddgiData.ProbesDistance, Float4::Zero);
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
context->ClearUA(ddgiData.ProbesInstability, Float4::Zero);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
ddgiData.LastFrameUsed = Engine::FrameCount;
|
ddgiData.LastFrameUsed = Engine::FrameCount;
|
||||||
|
|
||||||
@@ -486,6 +533,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
|||||||
data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0);
|
data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0);
|
||||||
}
|
}
|
||||||
data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f;
|
data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f;
|
||||||
|
data.ViewDir = renderContext.View.Direction;
|
||||||
GBufferPass::SetInputs(renderContext.View, data.GBuffer);
|
GBufferPass::SetInputs(renderContext.View, data.GBuffer);
|
||||||
context->UpdateCB(_cb0, &data);
|
context->UpdateCB(_cb0, &data);
|
||||||
context->BindCB(0, _cb0);
|
context->BindCB(0, _cb0);
|
||||||
@@ -496,6 +544,10 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
|||||||
PROFILE_GPU_CPU_NAMED("Probes Update");
|
PROFILE_GPU_CPU_NAMED("Probes Update");
|
||||||
bool anyDirty = false;
|
bool anyDirty = false;
|
||||||
uint32 threadGroupsX, threadGroupsY;
|
uint32 threadGroupsX, threadGroupsY;
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
uint32 zero[4] = {};
|
||||||
|
context->ClearUA(ddgiData.StatsWrite, zero);
|
||||||
|
#endif
|
||||||
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
|
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
|
||||||
{
|
{
|
||||||
if (cascadeSkipUpdate[cascadeIndex])
|
if (cascadeSkipUpdate[cascadeIndex])
|
||||||
@@ -556,6 +608,9 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
|||||||
context->BindSR(8, skybox);
|
context->BindSR(8, skybox);
|
||||||
context->BindSR(9, ddgiData.ActiveProbes->View());
|
context->BindSR(9, ddgiData.ActiveProbes->View());
|
||||||
context->BindUA(0, ddgiData.ProbesTrace->View());
|
context->BindUA(0, ddgiData.ProbesTrace->View());
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
context->BindUA(1, ddgiData.StatsWrite->View());
|
||||||
|
#endif
|
||||||
context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg);
|
context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg);
|
||||||
context->ResetUA();
|
context->ResetUA();
|
||||||
context->ResetSR();
|
context->ResetSR();
|
||||||
@@ -564,21 +619,55 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
|||||||
// Update probes irradiance and distance textures (one thread-group per probe)
|
// Update probes irradiance and distance textures (one thread-group per probe)
|
||||||
{
|
{
|
||||||
PROFILE_GPU_CPU_NAMED("Update Probes");
|
PROFILE_GPU_CPU_NAMED("Update Probes");
|
||||||
|
|
||||||
|
// Distance
|
||||||
context->BindSR(0, ddgiData.Result.ProbesData);
|
context->BindSR(0, ddgiData.Result.ProbesData);
|
||||||
context->BindSR(1, ddgiData.ProbesTrace->View());
|
context->BindSR(1, ddgiData.ProbesTrace->View());
|
||||||
context->BindSR(2, ddgiData.ActiveProbes->View());
|
context->BindSR(2, ddgiData.ActiveProbes->View());
|
||||||
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
|
|
||||||
context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg);
|
|
||||||
context->BindUA(0, ddgiData.Result.ProbesDistance);
|
context->BindUA(0, ddgiData.Result.ProbesDistance);
|
||||||
context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg);
|
context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg);
|
||||||
context->ResetUA();
|
context->ResetUA();
|
||||||
context->ResetSR();
|
context->ResetSR();
|
||||||
|
|
||||||
|
// Irradiance
|
||||||
|
context->BindSR(1, ddgiData.ProbesTrace->View());
|
||||||
|
context->BindSR(2, ddgiData.ActiveProbes->View());
|
||||||
|
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
|
||||||
|
context->BindUA(1, ddgiData.Result.ProbesData);
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
context->BindUA(2, ddgiData.ProbesInstability->View());
|
||||||
|
#endif
|
||||||
|
context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg);
|
||||||
|
context->ResetUA();
|
||||||
|
context->ResetSR();
|
||||||
}
|
}
|
||||||
|
|
||||||
arg += sizeof(GPUDispatchIndirectArgs);
|
arg += sizeof(GPUDispatchIndirectArgs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
// Update stats
|
||||||
|
{
|
||||||
|
StatsData stats;
|
||||||
|
if (void* mapped = ddgiData.StatsRead->Map(GPUResourceMapMode::Read))
|
||||||
|
{
|
||||||
|
Platform::MemoryCopy(&stats, mapped, sizeof(stats));
|
||||||
|
ddgiData.StatsRead->Unmap();
|
||||||
|
ddgiData.StatsProbes.Add(stats.ProbesCount);
|
||||||
|
ddgiData.StatsRays.Add(stats.RaysCount);
|
||||||
|
}
|
||||||
|
context->CopyBuffer(ddgiData.StatsRead, ddgiData.StatsWrite, sizeof(stats));
|
||||||
|
if (++ddgiData.StatsFrames >= DDGI_DEBUG_STATS_FRAMES)
|
||||||
|
{
|
||||||
|
ddgiData.StatsFrames = 0;
|
||||||
|
stats.ProbesCount = ddgiData.StatsProbes.Average();
|
||||||
|
stats.RaysCount = ddgiData.StatsRays.Average();
|
||||||
|
LOG(Info, "DDGI active probes: {}, traced rays: {} per frame, rays per probe: {}", stats.ProbesCount, stats.RaysCount, stats.ProbesCount > 0 ? stats.RaysCount / stats.ProbesCount : 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Update probes border pixels
|
// Update probes border pixels
|
||||||
if (anyDirty)
|
if (anyDirty)
|
||||||
{
|
{
|
||||||
@@ -718,7 +807,11 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
|
|||||||
{
|
{
|
||||||
// Pass DDGI data to the material
|
// Pass DDGI data to the material
|
||||||
_debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData));
|
_debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData));
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
_debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesInstability));
|
||||||
|
#else
|
||||||
_debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance));
|
_debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance));
|
||||||
|
#endif
|
||||||
_debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance));
|
_debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance));
|
||||||
auto cb = _debugMaterial->GetShader()->GetCB(3);
|
auto cb = _debugMaterial->GetShader()->GetCB(3);
|
||||||
if (cb)
|
if (cb)
|
||||||
|
|||||||
@@ -16,6 +16,8 @@
|
|||||||
#define DDGI_PROBE_STATE_INACTIVE 0
|
#define DDGI_PROBE_STATE_INACTIVE 0
|
||||||
#define DDGI_PROBE_STATE_ACTIVATED 1
|
#define DDGI_PROBE_STATE_ACTIVATED 1
|
||||||
#define DDGI_PROBE_STATE_ACTIVE 2
|
#define DDGI_PROBE_STATE_ACTIVE 2
|
||||||
|
#define DDGI_PROBE_ATTENTION_MIN 0.02f // Minimum probe attention value that still makes it active.
|
||||||
|
#define DDGI_PROBE_ATTENTION_MAX 0.98f // Maximum probe attention value that still makes it active (but not activated which is 1.0f).
|
||||||
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
|
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
|
||||||
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
|
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
|
||||||
#define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens
|
#define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens
|
||||||
@@ -99,15 +101,36 @@ float4 LoadDDGIProbeData(DDGIData data, Texture2D<snorm float4> probesData, uint
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Encodes probe probe data
|
// Encodes probe probe data
|
||||||
float4 EncodeDDGIProbeData(float3 probeOffset, uint probeState)
|
float4 EncodeDDGIProbeData(float3 offset, uint state, float attention)
|
||||||
{
|
{
|
||||||
return float4(probeOffset, (float)probeState * (1.0f / 8.0f));
|
// [0;1] -> [-1;1]
|
||||||
|
attention = saturate(attention) * 2.0f - 1.0f;
|
||||||
|
if (state == DDGI_PROBE_STATE_INACTIVE)
|
||||||
|
attention = -1.0f;
|
||||||
|
else if (state == DDGI_PROBE_STATE_ACTIVATED)
|
||||||
|
attention = 1.0f;
|
||||||
|
return float4(offset, attention);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decodes probe attention value from the encoded state
|
||||||
|
float DecodeDDGIProbeAttention(float4 probeData)
|
||||||
|
{
|
||||||
|
// [-1;1] -> [0;1]
|
||||||
|
if (probeData.w <= -1.0f)
|
||||||
|
return 0.0f;
|
||||||
|
if (probeData.w >= 1.0f)
|
||||||
|
return 1.0f;
|
||||||
|
return probeData.w * 0.5f + 0.5f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decodes probe state from the encoded state
|
// Decodes probe state from the encoded state
|
||||||
uint DecodeDDGIProbeState(float4 probeData)
|
uint DecodeDDGIProbeState(float4 probeData)
|
||||||
{
|
{
|
||||||
return (uint)(probeData.w * 8.0f);
|
if (probeData.w <= -1.0f)
|
||||||
|
return DDGI_PROBE_STATE_INACTIVE;
|
||||||
|
if (probeData.w >= 1.0f)
|
||||||
|
return DDGI_PROBE_STATE_ACTIVATED;
|
||||||
|
return DDGI_PROBE_STATE_ACTIVE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decodes probe world-space position (XYZ) from the encoded state
|
// Decodes probe world-space position (XYZ) from the encoded state
|
||||||
|
|||||||
@@ -20,11 +20,14 @@
|
|||||||
// This must match C++
|
// This must match C++
|
||||||
#define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending
|
#define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending
|
||||||
#define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller)
|
#define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller)
|
||||||
|
#define DDGI_TRACE_RAYS_MIN 16 // Minimum amount of rays to shoot for sleepy probes
|
||||||
#define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit
|
#define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit
|
||||||
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
|
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
|
||||||
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
|
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
|
||||||
#define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position
|
#define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position
|
||||||
#define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area
|
#define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area
|
||||||
|
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
|
||||||
|
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
|
||||||
|
|
||||||
META_CB_BEGIN(0, Data0)
|
META_CB_BEGIN(0, Data0)
|
||||||
DDGIData DDGI;
|
DDGIData DDGI;
|
||||||
@@ -37,10 +40,12 @@ uint ProbesCount;
|
|||||||
float ResetBlend;
|
float ResetBlend;
|
||||||
float TemporalTime;
|
float TemporalTime;
|
||||||
int4 ProbeScrollClears[4];
|
int4 ProbeScrollClears[4];
|
||||||
|
float3 ViewDir;
|
||||||
|
float Padding1;
|
||||||
META_CB_END
|
META_CB_END
|
||||||
|
|
||||||
META_CB_BEGIN(1, Data1)
|
META_CB_BEGIN(1, Data1)
|
||||||
float2 Padding1;
|
float2 Padding2;
|
||||||
uint CascadeIndex;
|
uint CascadeIndex;
|
||||||
uint ProbeIndexOffset;
|
uint ProbeIndexOffset;
|
||||||
META_CB_END
|
META_CB_END
|
||||||
@@ -73,10 +78,11 @@ float3 GetProbeRayDirection(DDGIData data, uint rayIndex, uint raysCount, uint p
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculates amount of rays to allocate for a probe
|
// Calculates amount of rays to allocate for a probe
|
||||||
uint GetProbeRaysCount(DDGIData data, uint probeState)
|
uint GetProbeRaysCount(DDGIData data, float probeAttention)
|
||||||
{
|
{
|
||||||
// TODO: implement variable ray count based on probe location relative to the view frustum (use probe state for storage)
|
//return data.RaysCount;
|
||||||
return data.RaysCount;
|
probeAttention = saturate((probeAttention - DDGI_PROBE_ATTENTION_MIN) / (DDGI_PROBE_ATTENTION_MAX - DDGI_PROBE_ATTENTION_MIN));
|
||||||
|
return DDGI_TRACE_RAYS_MIN + (uint)max(probeAttention * (float)(data.RaysCount - DDGI_TRACE_RAYS_MIN), 0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _CS_Classify
|
#ifdef _CS_Classify
|
||||||
@@ -118,7 +124,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
if (prevCascadeWeight > 0.1f)
|
if (prevCascadeWeight > 0.1f)
|
||||||
{
|
{
|
||||||
// Disable probe
|
// Disable probe
|
||||||
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE);
|
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -140,11 +146,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
|
|
||||||
// Load probe state and position
|
// Load probe state and position
|
||||||
float4 probeData = RWProbesData[probeDataCoords];
|
float4 probeData = RWProbesData[probeDataCoords];
|
||||||
|
float probeAttention = DecodeDDGIProbeAttention(probeData);
|
||||||
uint probeState = DecodeDDGIProbeState(probeData);
|
uint probeState = DecodeDDGIProbeState(probeData);
|
||||||
uint probeStateOld = probeState;
|
uint probeStateOld = probeState;
|
||||||
float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing
|
float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing
|
||||||
if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE)
|
if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE)
|
||||||
|
{
|
||||||
probeOffset = float3(0, 0, 0); // Clear offset for a new probe
|
probeOffset = float3(0, 0, 0); // Clear offset for a new probe
|
||||||
|
probeAttention = 1.0f; // Wake-up
|
||||||
|
}
|
||||||
float3 probeOffsetOld = probeOffset;
|
float3 probeOffsetOld = probeOffset;
|
||||||
float3 probePosition = probeBasePosition + probeOffset;
|
float3 probePosition = probeBasePosition + probeOffset;
|
||||||
|
|
||||||
@@ -166,11 +176,24 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
// Disable it
|
// Disable it
|
||||||
probeOffset = float3(0, 0, 0);
|
probeOffset = float3(0, 0, 0);
|
||||||
probeState = DDGI_PROBE_STATE_INACTIVE;
|
probeState = DDGI_PROBE_STATE_INACTIVE;
|
||||||
|
probeAttention = 0.0f;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Relocate only if probe location is not good enough
|
// Apply distance/view heuristics to probe attention
|
||||||
probeState = DDGI_PROBE_STATE_ACTIVE;
|
probeState = DDGI_PROBE_STATE_ACTIVE;
|
||||||
|
float3 viewToProbe = probePosition - GBuffer.ViewPos;
|
||||||
|
float distanceToProbe = length(viewToProbe);
|
||||||
|
viewToProbe /= distanceToProbe;
|
||||||
|
float probeViewDot = dot(viewToProbe, ViewDir);
|
||||||
|
probeAttention *= lerp(0.1f, 1.0f, saturate(probeViewDot)); // Reduce quality for probes behind the camera (or away from view dir)
|
||||||
|
probeAttention *= lerp(1.0f, 0.5f, saturate(sdfDst / voxelLimit)); // Reduce quality for probes far away from geometry
|
||||||
|
probeAttention += (1.0f - saturate(distanceToProbe / 1000.0f)) * 1.2f; // Boost quality for probes nearby view
|
||||||
|
//probeAttention = 0.0f; // Debug test lowest ray count
|
||||||
|
//probeAttention = 1.0f; // Debug test highest ray count
|
||||||
|
probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX);
|
||||||
|
|
||||||
|
// Relocate only if probe location is not good enough
|
||||||
if (sdf <= voxelLimit)
|
if (sdf <= voxelLimit)
|
||||||
{
|
{
|
||||||
#if DDGI_PROBE_RELOCATE_ITERATIVE
|
#if DDGI_PROBE_RELOCATE_ITERATIVE
|
||||||
@@ -222,6 +245,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
// Disable probe that is too close to the geometry
|
// Disable probe that is too close to the geometry
|
||||||
probeOffset = float3(0, 0, 0);
|
probeOffset = float3(0, 0, 0);
|
||||||
probeState = DDGI_PROBE_STATE_INACTIVE;
|
probeState = DDGI_PROBE_STATE_INACTIVE;
|
||||||
|
probeAttention = 0.0f;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -232,6 +256,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
// Disable probe
|
// Disable probe
|
||||||
probeOffset = float3(0, 0, 0);
|
probeOffset = float3(0, 0, 0);
|
||||||
probeState = DDGI_PROBE_STATE_INACTIVE;
|
probeState = DDGI_PROBE_STATE_INACTIVE;
|
||||||
|
probeAttention = 0.0f;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -254,12 +279,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE)
|
if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE)
|
||||||
|
{
|
||||||
probeState = DDGI_PROBE_STATE_ACTIVATED;
|
probeState = DDGI_PROBE_STATE_ACTIVATED;
|
||||||
|
probeAttention = 1.0f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save probe state
|
// Save probe state
|
||||||
probeOffset /= probesSpacing; // Move offset back to [-1;1] space
|
probeOffset /= probesSpacing; // Move offset back to [-1;1] space
|
||||||
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState);
|
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState, probeAttention);
|
||||||
|
|
||||||
// Collect active probes
|
// Collect active probes
|
||||||
if (probeState != DDGI_PROBE_STATE_INACTIVE)
|
if (probeState != DDGI_PROBE_STATE_INACTIVE)
|
||||||
@@ -282,7 +310,7 @@ META_CS(true, FEATURE_LEVEL_SM5)
|
|||||||
[numthreads(1, 1, 1)]
|
[numthreads(1, 1, 1)]
|
||||||
void CS_UpdateProbesInitArgs()
|
void CS_UpdateProbesInitArgs()
|
||||||
{
|
{
|
||||||
uint activeProbesCount = ActiveProbes.Load(0);
|
uint activeProbesCount = ActiveProbes.Load(0); // Counter at 0
|
||||||
uint arg = 0;
|
uint arg = 0;
|
||||||
for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)
|
for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)
|
||||||
{
|
{
|
||||||
@@ -298,6 +326,9 @@ void CS_UpdateProbesInitArgs()
|
|||||||
#ifdef _CS_TraceRays
|
#ifdef _CS_TraceRays
|
||||||
|
|
||||||
RWTexture2D<float4> RWProbesTrace : register(u0);
|
RWTexture2D<float4> RWProbesTrace : register(u0);
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
RWByteAddressBuffer RWStats : register(u1);
|
||||||
|
#endif
|
||||||
|
|
||||||
Texture3D<snorm float> GlobalSDFTex : register(t0);
|
Texture3D<snorm float> GlobalSDFTex : register(t0);
|
||||||
Texture3D<snorm float> GlobalSDFMip : register(t1);
|
Texture3D<snorm float> GlobalSDFMip : register(t1);
|
||||||
@@ -326,12 +357,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
|
|
||||||
// Load current probe state and position
|
// Load current probe state and position
|
||||||
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
|
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
|
||||||
|
float probeAttention = DecodeDDGIProbeAttention(probeData);
|
||||||
uint probeState = DecodeDDGIProbeState(probeData);
|
uint probeState = DecodeDDGIProbeState(probeData);
|
||||||
uint probeRaysCount = GetProbeRaysCount(DDGI, probeState);
|
uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention);
|
||||||
if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount)
|
if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount)
|
||||||
return; // Skip disabled probes or if current thread's ray is unused
|
return; // Skip disabled probes or if current thread's ray is unused
|
||||||
float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords);
|
float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords);
|
||||||
float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
|
float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
|
||||||
|
// TODO: implement ray-guiding based on the probe irradiance (prioritize directions with high luminance)
|
||||||
|
|
||||||
// Trace ray with Global SDF
|
// Trace ray with Global SDF
|
||||||
GlobalSDFTrace trace;
|
GlobalSDFTrace trace;
|
||||||
@@ -370,6 +403,14 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
|
|
||||||
// Write into probes trace results
|
// Write into probes trace results
|
||||||
RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance;
|
RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance;
|
||||||
|
|
||||||
|
#if DDGI_DEBUG_STATS
|
||||||
|
// Update stats
|
||||||
|
uint tmp;
|
||||||
|
RWStats.InterlockedAdd(0, 1, tmp);
|
||||||
|
if (rayIndex == 0)
|
||||||
|
RWStats.InterlockedAdd(4, 1, tmp);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -380,6 +421,44 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|||||||
// Update irradiance
|
// Update irradiance
|
||||||
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE
|
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE
|
||||||
groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT];
|
groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT];
|
||||||
|
groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION];
|
||||||
|
|
||||||
|
// Source: https://github.com/turanszkij/WickedEngine
|
||||||
|
#define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4)
|
||||||
|
static const uint4 BorderOffsets[BorderOffsetsSize] = {
|
||||||
|
uint4(6, 1, 1, 0),
|
||||||
|
uint4(5, 1, 2, 0),
|
||||||
|
uint4(4, 1, 3, 0),
|
||||||
|
uint4(3, 1, 4, 0),
|
||||||
|
uint4(2, 1, 5, 0),
|
||||||
|
uint4(1, 1, 6, 0),
|
||||||
|
|
||||||
|
uint4(6, 6, 1, 7),
|
||||||
|
uint4(5, 6, 2, 7),
|
||||||
|
uint4(4, 6, 3, 7),
|
||||||
|
uint4(3, 6, 4, 7),
|
||||||
|
uint4(2, 6, 5, 7),
|
||||||
|
uint4(1, 6, 6, 7),
|
||||||
|
|
||||||
|
uint4(1, 1, 0, 6),
|
||||||
|
uint4(1, 2, 0, 5),
|
||||||
|
uint4(1, 3, 0, 4),
|
||||||
|
uint4(1, 4, 0, 3),
|
||||||
|
uint4(1, 5, 0, 2),
|
||||||
|
uint4(1, 6, 0, 1),
|
||||||
|
|
||||||
|
uint4(6, 1, 7, 6),
|
||||||
|
uint4(6, 2, 7, 5),
|
||||||
|
uint4(6, 3, 7, 4),
|
||||||
|
uint4(6, 4, 7, 3),
|
||||||
|
uint4(6, 5, 7, 2),
|
||||||
|
uint4(6, 6, 7, 1),
|
||||||
|
|
||||||
|
uint4(1, 1, 7, 7),
|
||||||
|
uint4(6, 1, 0, 7),
|
||||||
|
uint4(1, 6, 7, 0),
|
||||||
|
uint4(6, 6, 0, 0),
|
||||||
|
};
|
||||||
#else
|
#else
|
||||||
// Update distance
|
// Update distance
|
||||||
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE
|
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE
|
||||||
@@ -389,7 +468,14 @@ groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT];
|
|||||||
groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT];
|
groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT];
|
||||||
|
|
||||||
RWTexture2D<float4> RWOutput : register(u0);
|
RWTexture2D<float4> RWOutput : register(u0);
|
||||||
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
|
RWTexture2D<snorm float4> RWProbesData : register(u1);
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
RWTexture2D<float> RWOutputInstability : register(u2);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
Texture2D<snorm float4> ProbesData : register(t0);
|
Texture2D<snorm float4> ProbesData : register(t0);
|
||||||
|
#endif
|
||||||
Texture2D<float4> ProbesTrace : register(t1);
|
Texture2D<float4> ProbesTrace : register(t1);
|
||||||
ByteAddressBuffer ActiveProbes : register(t2);
|
ByteAddressBuffer ActiveProbes : register(t2);
|
||||||
|
|
||||||
@@ -407,13 +493,16 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
|
|||||||
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
||||||
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
|
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
|
||||||
|
|
||||||
// Skip disabled probes
|
// Load probe data
|
||||||
bool skip = false;
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
|
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex);
|
||||||
|
float4 probeData = RWProbesData[probeDataCoords];
|
||||||
|
#else
|
||||||
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
|
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
|
||||||
|
#endif
|
||||||
|
float probeAttention = DecodeDDGIProbeAttention(probeData);
|
||||||
uint probeState = DecodeDDGIProbeState(probeData);
|
uint probeState = DecodeDDGIProbeState(probeData);
|
||||||
uint probeRaysCount = GetProbeRaysCount(DDGI, probeState);
|
uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention);
|
||||||
if (probeState == DDGI_PROBE_STATE_INACTIVE)
|
|
||||||
skip = true;
|
|
||||||
|
|
||||||
#if DDGI_PROBE_UPDATE_MODE == 0
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
uint backfacesCount = 0;
|
uint backfacesCount = 0;
|
||||||
@@ -423,30 +512,23 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
|
|||||||
float distanceLimit = probesSpacing * 1.5f;
|
float distanceLimit = probesSpacing * 1.5f;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BRANCH
|
// Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread)
|
||||||
if (!skip)
|
uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)));
|
||||||
|
uint raysStart = GroupIndex * raysCount;
|
||||||
|
raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart;
|
||||||
|
for (uint i = 0; i < raysCount; i++)
|
||||||
{
|
{
|
||||||
// Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread)
|
uint rayIndex = raysStart + i;
|
||||||
uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)));
|
|
||||||
uint raysStart = GroupIndex * raysCount;
|
|
||||||
raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart;
|
|
||||||
for (uint i = 0; i < raysCount; i++)
|
|
||||||
{
|
|
||||||
uint rayIndex = raysStart + i;
|
|
||||||
#if DDGI_PROBE_UPDATE_MODE == 0
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)];
|
CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)];
|
||||||
#else
|
#else
|
||||||
float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w;
|
float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w;
|
||||||
CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit);
|
CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit);
|
||||||
#endif
|
#endif
|
||||||
CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
|
CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (skip)
|
|
||||||
return;
|
|
||||||
probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
||||||
uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy;
|
|
||||||
|
|
||||||
// Calculate octahedral projection for probe (unwraps spherical projection into a square)
|
// Calculate octahedral projection for probe (unwraps spherical projection into a square)
|
||||||
float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION);
|
float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION);
|
||||||
@@ -495,30 +577,52 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
|
|||||||
result.rgb *= 1.0f / (2.0f * max(result.a, epsilon));
|
result.rgb *= 1.0f / (2.0f * max(result.a, epsilon));
|
||||||
|
|
||||||
// Load current probe value
|
// Load current probe value
|
||||||
|
uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy;
|
||||||
float3 previous = RWOutput[outputCoords].rgb;
|
float3 previous = RWOutput[outputCoords].rgb;
|
||||||
bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED;
|
bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED || ResetBlend;
|
||||||
if (ResetBlend || wasActivated)
|
if (wasActivated)
|
||||||
previous = float3(0, 0, 0);
|
previous = result.rgb;
|
||||||
|
|
||||||
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
|
// Calculate instability of the irradiance
|
||||||
|
float previousLuma = Luminance(previous.rgb);
|
||||||
|
float resultLuma = Luminance(result.rgb);
|
||||||
|
float instability = abs(previousLuma - resultLuma) / previousLuma; // Percentage change in luminance of irradiance
|
||||||
|
instability = max(instability, Max3(abs(result.rgb - previous) / previous)); // Percentage of color delta change of irradiance
|
||||||
|
//instability *= saturate(result.a); // Reduce instability in areas with a small ray-coverage
|
||||||
|
//instability = pow(instability, 1.2f); // Increase contrast
|
||||||
|
instability *= 2.0f; // Make it stronger on scene changes
|
||||||
|
//instability = saturate(instability);
|
||||||
|
OutputInstability[GroupIndex] = instability;
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
RWOutputInstability[outputCoords] = instability;
|
||||||
|
//RWOutputInstability[outputCoords] = probeAttention; // Debug test probe attention visualization
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// Blend current value with the previous probe data
|
// Blend current value with the previous probe data
|
||||||
float historyWeight = DDGI.ProbeHistoryWeight;
|
float historyWeightFast = DDGI.ProbeHistoryWeight;
|
||||||
//historyWeight = 1.0f;
|
float historyWeightSlow = 0.97f;
|
||||||
//historyWeight = 0.0f;
|
|
||||||
if (ResetBlend || wasActivated)
|
|
||||||
historyWeight = 0.0f;
|
|
||||||
#if DDGI_PROBE_UPDATE_MODE == 0
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
result *= DDGI.IndirectLightingIntensity;
|
|
||||||
#if DDGI_SRGB_BLENDING
|
|
||||||
result.rgb = pow(result.rgb, 1.0f / DDGI.IrradianceGamma);
|
|
||||||
#endif
|
|
||||||
float3 irradianceDelta = result.rgb - previous;
|
float3 irradianceDelta = result.rgb - previous;
|
||||||
float irradianceDeltaMax = Max3(abs(irradianceDelta));
|
float irradianceDeltaMax = Max3(abs(irradianceDelta));
|
||||||
float irradianceDeltaLen = length(irradianceDelta);
|
float irradianceDeltaLen = length(irradianceDelta);
|
||||||
if (irradianceDeltaMax > 0.5f)
|
if (irradianceDeltaMax > 0.5f)
|
||||||
{
|
{
|
||||||
// Reduce history weight after significant lighting change
|
// Reduce history weight after significant lighting change
|
||||||
historyWeight = historyWeight * 0.5f;
|
historyWeightFast *= 0.5f;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
float historyWeight = lerp(historyWeightSlow, historyWeightFast, probeAttention * probeAttention * probeAttention);
|
||||||
|
//historyWeight = 1.0f; // Debug full-blend
|
||||||
|
//historyWeight = 0.0f; // Debug no-blend
|
||||||
|
if (wasActivated)
|
||||||
|
historyWeight = 0.0f;
|
||||||
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
|
result *= DDGI.IndirectLightingIntensity;
|
||||||
|
#if DDGI_SRGB_BLENDING
|
||||||
|
result.rgb = pow(max(result.rgb, 0), 1.0f / DDGI.IrradianceGamma);
|
||||||
|
#endif
|
||||||
if (irradianceDeltaLen > 2.0f)
|
if (irradianceDeltaLen > 2.0f)
|
||||||
{
|
{
|
||||||
// Reduce flickering during rapid brightness changes
|
// Reduce flickering during rapid brightness changes
|
||||||
@@ -530,6 +634,45 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
RWOutput[outputCoords] = result;
|
RWOutput[outputCoords] = result;
|
||||||
|
|
||||||
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
||||||
|
// The first thread updates the probe attention based on the instability of all texels
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
BRANCH
|
||||||
|
if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE)
|
||||||
|
{
|
||||||
|
// Calculate instability statistics for a whole probe
|
||||||
|
float instabilityAvg = 0;
|
||||||
|
for (uint i = 0; i < DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION; i++)
|
||||||
|
instabilityAvg += OutputInstability[i];
|
||||||
|
instabilityAvg *= 1.0f / float(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION);
|
||||||
|
instabilityAvg = saturate(instabilityAvg);
|
||||||
|
instability = instabilityAvg;
|
||||||
|
|
||||||
|
// Calculate probe attention
|
||||||
|
float taregAttention = lerp(0.5f, DDGI_PROBE_ATTENTION_MAX, instability); // Use some base level
|
||||||
|
if (taregAttention >= probeAttention)
|
||||||
|
probeAttention = taregAttention; // Quick jump up
|
||||||
|
else
|
||||||
|
probeAttention = lerp(probeAttention, taregAttention, 0.2f); // Slow blend down
|
||||||
|
if (probeState == DDGI_PROBE_STATE_ACTIVATED)
|
||||||
|
probeAttention = DDGI_PROBE_ATTENTION_MAX;
|
||||||
|
|
||||||
|
// Update probe data for the next frame
|
||||||
|
probeState = DDGI_PROBE_STATE_ACTIVE;
|
||||||
|
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeData.xyz, probeState, probeAttention);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if DDGI_DEBUG_INSTABILITY
|
||||||
|
// Copy border pixels
|
||||||
|
uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2);
|
||||||
|
for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)
|
||||||
|
{
|
||||||
|
uint4 borderOffsets = BorderOffsets[borderIndex];
|
||||||
|
RWOutputInstability[baseCoords + borderOffsets.zw] = RWOutputInstability[baseCoords + borderOffsets.xy];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering)
|
// Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering)
|
||||||
|
|||||||
Reference in New Issue
Block a user