800 lines
32 KiB
GLSL
800 lines
32 KiB
GLSL
// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
|
|
|
|
// Implementation based on:
|
|
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019
|
|
// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire
|
|
// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/
|
|
//
|
|
// Additional references:
|
|
// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/
|
|
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/
|
|
|
|
#include "./Flax/Common.hlsl"
|
|
#include "./Flax/Math.hlsl"
|
|
#include "./Flax/Noise.hlsl"
|
|
#include "./Flax/Quaternion.hlsl"
|
|
#include "./Flax/GlobalSignDistanceField.hlsl"
|
|
#include "./Flax/GI/GlobalSurfaceAtlas.hlsl"
|
|
#include "./Flax/GI/DDGI.hlsl"
|
|
|
|
// This must match C++
|
|
#define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending
|
|
#define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller)
|
|
#define DDGI_TRACE_RAYS_MIN 16 // Minimum amount of rays to shoot for sleepy probes
|
|
#define DDGI_TRACE_NEGATIVE 0 // If true, rays that start inside geometry will use negative distance to indicate backface hit
|
|
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
|
|
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
|
|
#define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position
|
|
#define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area
|
|
#define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count
|
|
#define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging
|
|
|
|
META_CB_BEGIN(0, Data0)
|
|
DDGIData DDGI;
|
|
GlobalSDFData GlobalSDF;
|
|
GlobalSurfaceAtlasData GlobalSurfaceAtlas;
|
|
GBufferData GBuffer;
|
|
float4 RaysRotation;
|
|
float SkyboxIntensity;
|
|
uint ProbesCount;
|
|
float ResetBlend;
|
|
float TemporalTime;
|
|
int4 ProbeScrollClears[4];
|
|
float3 ViewDir;
|
|
float Padding1;
|
|
META_CB_END
|
|
|
|
META_CB_BEGIN(1, Data1)
|
|
float2 Padding2;
|
|
uint CascadeIndex;
|
|
uint ProbeIndexOffset;
|
|
META_CB_END
|
|
|
|
// Calculates the evenly distributed direction ray on a sphere (Spherical Fibonacci lattice)
|
|
float3 GetSphericalFibonacci(float sampleIndex, float samplesCount)
|
|
{
|
|
float b = (sqrt(5.0f) * 0.5f + 0.5f) - 1.0f;
|
|
float s = sampleIndex * b;
|
|
float phi = (2.0f * PI) * (s - floor(s));
|
|
float cosTheta = 1.0f - (2.0f * sampleIndex + 1.0f) * (1.0f / samplesCount);
|
|
float sinTheta = sqrt(saturate(1.0f - (cosTheta * cosTheta)));
|
|
return float3(cos(phi) * sinTheta, sin(phi) * sinTheta, cosTheta);
|
|
}
|
|
|
|
// Calculates a random normalized ray direction (based on the ray index and the current probes rotation phrase)
|
|
float3 GetProbeRayDirection(DDGIData data, uint rayIndex, uint raysCount, uint probeIndex, uint3 probeCoords)
|
|
{
|
|
float4 rotation = RaysRotation;
|
|
|
|
// Randomize rotation per-probe (otherwise all probes are in sync)
|
|
float3 probePos = (float3)probeCoords / (float3)data.ProbesCounts;
|
|
float3 randomAxis = normalize(Mod289(probePos));
|
|
float randomAngle = (float)probeIndex / (float)ProbesCount * (2.0f * PI);
|
|
rotation = QuaternionMultiply(rotation, QuaternionFromAxisAngle(randomAxis, randomAngle));
|
|
|
|
// Random rotation per-ray - relative to the per-frame rays rotation
|
|
float3 direction = GetSphericalFibonacci((float)rayIndex, (float)raysCount);
|
|
return normalize(QuaternionRotate(rotation, direction));
|
|
}
|
|
|
|
// Calculates amount of rays to allocate for a probe
|
|
uint GetProbeRaysCount(DDGIData data, float probeAttention)
|
|
{
|
|
//return data.RaysCount;
|
|
probeAttention = saturate((probeAttention - DDGI_PROBE_ATTENTION_MIN) / (DDGI_PROBE_ATTENTION_MAX - DDGI_PROBE_ATTENTION_MIN));
|
|
return DDGI_TRACE_RAYS_MIN + (uint)max(probeAttention * (float)(data.RaysCount - DDGI_TRACE_RAYS_MIN), 0.0f);
|
|
}
|
|
|
|
#ifdef _CS_Classify
|
|
|
|
RWTexture2D<snorm float4> RWProbesData : register(u0);
|
|
RWByteAddressBuffer RWActiveProbes : register(u1);
|
|
|
|
Texture3D<snorm float> GlobalSDFTex : register(t0);
|
|
Texture3D<snorm float> GlobalSDFMip : register(t1);
|
|
|
|
float3 Remap(float3 value, float3 fromMin, float3 fromMax, float3 toMin, float3 toMax)
|
|
{
|
|
return (value - fromMin) / (fromMax - fromMin) * (toMax - toMin) + toMin;
|
|
}
|
|
|
|
// Compute shader for updating probes state between active and inactive and performing probes relocation.
|
|
META_CS(true, FEATURE_LEVEL_SM5)
|
|
[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)]
|
|
void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
uint probeIndex = DispatchThreadId.x;
|
|
if (probeIndex >= ProbesCount)
|
|
return;
|
|
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
|
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
|
|
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex);
|
|
float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w;
|
|
float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords);
|
|
|
|
// Disable probes that are is in the range of higher-quality cascade
|
|
if (CascadeIndex > 0)
|
|
{
|
|
uint prevCascade = CascadeIndex - 1;
|
|
float prevProbesSpacing = DDGI.ProbesOriginAndSpacing[prevCascade].w;
|
|
float3 prevProbesOrigin = DDGI.ProbesScrollOffsets[prevCascade].xyz * prevProbesSpacing + DDGI.ProbesOriginAndSpacing[prevCascade].xyz;
|
|
float3 prevProbesExtent = (DDGI.ProbesCounts - 1) * (prevProbesSpacing * 0.5f);
|
|
prevProbesExtent -= probesSpacing * ceil(DDGI_CASCADE_BLEND_SIZE); // Apply safe margin to allow probes on cascade edges
|
|
float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin));
|
|
if (prevCascadeWeight > 0.1f)
|
|
{
|
|
// Disable probe
|
|
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Check if probe was scrolled
|
|
int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz;
|
|
bool wasScrolled = false;
|
|
UNROLL
|
|
for (uint planeIndex = 0; planeIndex < 3; planeIndex++)
|
|
{
|
|
int probeCount = (int)DDGI.ProbesCounts[planeIndex];
|
|
int newCoord = (int)probeCoords[planeIndex] + probeScrollClears[planeIndex];
|
|
if (newCoord < 0 || newCoord >= probeCount)
|
|
wasScrolled = true;
|
|
newCoord = (int)probeCoords[planeIndex] - probeScrollClears[planeIndex];
|
|
if (newCoord < 0 || newCoord >= probeCount)
|
|
wasScrolled = true;
|
|
}
|
|
|
|
// Load probe state and position
|
|
float4 probeData = RWProbesData[probeDataCoords];
|
|
float probeAttention = DecodeDDGIProbeAttention(probeData);
|
|
uint probeState = DecodeDDGIProbeState(probeData);
|
|
uint probeStateOld = probeState;
|
|
float3 probeOffset = probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing
|
|
if (wasScrolled || probeState == DDGI_PROBE_STATE_INACTIVE)
|
|
{
|
|
probeOffset = float3(0, 0, 0); // Clear offset for a new probe
|
|
probeAttention = 1.0f; // Wake-up
|
|
}
|
|
float3 probeOffsetOld = probeOffset;
|
|
float3 probePosition = probeBasePosition + probeOffset;
|
|
|
|
// Use Global SDF to quickly get distance and direction to the scene geometry
|
|
#if DDGI_PROBE_RELOCATE_ITERATIVE
|
|
float sdf;
|
|
float3 sdfNormal = normalize(SampleGlobalSDFGradient(GlobalSDF, GlobalSDFTex, GlobalSDFMip, probePosition, sdf));
|
|
#else
|
|
float sdf = SampleGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, probePosition);
|
|
#endif
|
|
float sdfDst = abs(sdf);
|
|
const float ProbesDistanceLimits[4] = { 1.1f, 2.3f, 2.5f, 2.5f };
|
|
const float ProbesRelocateLimits[4] = { 0.4f, 0.5f, 0.6f, 0.7f };
|
|
float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.8f;
|
|
float distanceLimit = probesSpacing * ProbesDistanceLimits[CascadeIndex];
|
|
float relocateLimit = probesSpacing * ProbesRelocateLimits[CascadeIndex];
|
|
if (sdfDst > distanceLimit + length(probeOffset)) // Probe is too far from geometry (or deep inside)
|
|
{
|
|
// Disable it
|
|
probeOffset = float3(0, 0, 0);
|
|
probeState = DDGI_PROBE_STATE_INACTIVE;
|
|
probeAttention = 0.0f;
|
|
}
|
|
else
|
|
{
|
|
// Apply distance/view heuristics to probe attention
|
|
probeState = DDGI_PROBE_STATE_ACTIVE;
|
|
float3 viewToProbe = probePosition - GBuffer.ViewPos;
|
|
float distanceToProbe = length(viewToProbe);
|
|
viewToProbe /= distanceToProbe;
|
|
float probeViewDot = dot(viewToProbe, ViewDir);
|
|
probeAttention *= lerp(0.1f, 1.0f, saturate(probeViewDot)); // Reduce quality for probes behind the camera (or away from view dir)
|
|
probeAttention *= lerp(1.0f, 0.5f, saturate(sdfDst / voxelLimit)); // Reduce quality for probes far away from geometry
|
|
probeAttention += (1.0f - saturate(distanceToProbe / 1000.0f)) * 1.2f; // Boost quality for probes nearby view
|
|
//probeAttention = 0.0f; // Debug test lowest ray count
|
|
//probeAttention = 1.0f; // Debug test highest ray count
|
|
probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX);
|
|
|
|
// Relocate only if probe location is not good enough
|
|
if (sdf <= voxelLimit)
|
|
{
|
|
#if DDGI_PROBE_RELOCATE_ITERATIVE
|
|
{
|
|
// Use SDF gradient to relocate probe away the surface
|
|
float iterativeRelocateSpeed = probeStateOld != DDGI_PROBE_STATE_ACTIVE ? 1.0f : 0.3f;
|
|
float3 offsetToSet = probeOffset + sdfNormal * ((sdf + voxelLimit) * iterativeRelocateSpeed);
|
|
if (length(offsetToSet) < relocateLimit)
|
|
{
|
|
// Relocate it
|
|
probeOffset = offsetToSet;
|
|
}
|
|
else
|
|
{
|
|
// Reset offset
|
|
probeOffset = float3(0, 0, 0);
|
|
}
|
|
|
|
// Read SDF at the new position for additional check
|
|
probePosition = probeBasePosition + probeOffset;
|
|
sdf = SampleGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, probePosition);
|
|
sdfDst = abs(sdf);
|
|
}
|
|
if (sdf <= voxelLimit * 1.1f) // Add some safe-bias to reduce artifacts
|
|
#endif
|
|
{
|
|
#if DDGI_PROBE_RELOCATE_FIND_BEST
|
|
// Sample Global SDF around the probe base location
|
|
uint sdfCascade = GetGlobalSDFCascade(GlobalSDF, probeBasePosition);
|
|
float4 CachedProbeOffsets[64];
|
|
for (uint x = 0; x < 4; x++)
|
|
for (uint y = 0; y < 4; y++)
|
|
for (uint z = 0; z < 4; z++)
|
|
{
|
|
float3 offset = Remap(float3(x, y, z), 0, 3, -0.707f, 0.707f) * relocateLimit;
|
|
float offsetSdf = SampleGlobalSDFCascade(GlobalSDF, GlobalSDFTex, probeBasePosition + offset, sdfCascade);
|
|
CachedProbeOffsets[x * 16 + y * 4 + z] = float4(offset, offsetSdf);
|
|
}
|
|
|
|
// Select the best probe location around the base position
|
|
float4 bestOffset = CachedProbeOffsets[0];
|
|
for (uint i = 1; i < 64; i++)
|
|
{
|
|
if (CachedProbeOffsets[i].w > bestOffset.w)
|
|
bestOffset = CachedProbeOffsets[i];
|
|
}
|
|
if (bestOffset.w <= voxelLimit)
|
|
{
|
|
// Disable probe that is too close to the geometry
|
|
probeOffset = float3(0, 0, 0);
|
|
probeState = DDGI_PROBE_STATE_INACTIVE;
|
|
probeAttention = 0.0f;
|
|
}
|
|
else
|
|
{
|
|
// Relocate the probe to the best found location
|
|
probeOffset = bestOffset.xyz;
|
|
}
|
|
#elif DDGI_PROBE_RELOCATE_ITERATIVE
|
|
// Disable probe
|
|
probeOffset = float3(0, 0, 0);
|
|
probeState = DDGI_PROBE_STATE_INACTIVE;
|
|
probeAttention = 0.0f;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
// If probe was in a different location or was activated now then mark it as activated
|
|
bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE;
|
|
bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f;
|
|
#if DDGI_PROBE_RELOCATE_FIND_BEST || DDGI_PROBE_RELOCATE_ITERATIVE
|
|
if (wasRelocated && !wasActivated)
|
|
{
|
|
// If probe was relocated but the previous location is visible from the new one, then don't re-activate it for smoother blend
|
|
float3 diff = probeOffsetOld - probeOffset;
|
|
float diffLen = length(diff);
|
|
float3 diffDir = diff / diffLen;
|
|
GlobalSDFTrace trace;
|
|
trace.Init(probeBasePosition + probeOffset, diffDir, 0.0f, diffLen);
|
|
GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace);
|
|
if (!hit.IsHit())
|
|
wasRelocated = false;
|
|
}
|
|
#endif
|
|
if ((wasActivated || wasScrolled || wasRelocated) && probeState == DDGI_PROBE_STATE_ACTIVE)
|
|
{
|
|
probeState = DDGI_PROBE_STATE_ACTIVATED;
|
|
probeAttention = 1.0f;
|
|
}
|
|
}
|
|
|
|
// Save probe state
|
|
probeOffset /= probesSpacing; // Move offset back to [-1;1] space
|
|
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeOffset, probeState, probeAttention);
|
|
|
|
// Collect active probes
|
|
if (probeState != DDGI_PROBE_STATE_INACTIVE)
|
|
{
|
|
uint activeProbeIndex;
|
|
RWActiveProbes.InterlockedAdd(0, 1, activeProbeIndex); // Counter at 0
|
|
RWActiveProbes.Store(activeProbeIndex * 4 + 4, DispatchThreadId.x);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef _CS_UpdateProbesInitArgs
|
|
|
|
RWBuffer<uint> UpdateProbesInitArgs : register(u0);
|
|
ByteAddressBuffer ActiveProbes : register(t0);
|
|
|
|
// Compute shader for building indirect dispatch arguments for CS_TraceRays and CS_UpdateProbes.
|
|
META_CS(true, FEATURE_LEVEL_SM5)
|
|
[numthreads(1, 1, 1)]
|
|
void CS_UpdateProbesInitArgs()
|
|
{
|
|
uint activeProbesCount = ActiveProbes.Load(0); // Counter at 0
|
|
uint arg = 0;
|
|
for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)
|
|
{
|
|
uint probesBatchSize = min(activeProbesCount - probesOffset, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT);
|
|
UpdateProbesInitArgs[arg++] = probesBatchSize;
|
|
UpdateProbesInitArgs[arg++] = 1;
|
|
UpdateProbesInitArgs[arg++] = 1;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef _CS_TraceRays
|
|
|
|
RWTexture2D<float4> RWProbesTrace : register(u0);
|
|
#if DDGI_DEBUG_STATS
|
|
RWByteAddressBuffer RWStats : register(u1);
|
|
#endif
|
|
|
|
Texture3D<snorm float> GlobalSDFTex : register(t0);
|
|
Texture3D<snorm float> GlobalSDFMip : register(t1);
|
|
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2);
|
|
ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t3);
|
|
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
|
|
Texture2D GlobalSurfaceAtlasDepth : register(t5);
|
|
Texture2D GlobalSurfaceAtlasTex : register(t6);
|
|
Texture2D<snorm float4> ProbesData : register(t7);
|
|
TextureCube Skybox : register(t8);
|
|
ByteAddressBuffer ActiveProbes : register(t9);
|
|
|
|
// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas (1 ray per-thread).
|
|
META_CS(true, FEATURE_LEVEL_SM5)
|
|
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=96)
|
|
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=128)
|
|
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=192)
|
|
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=256)
|
|
[numthreads(1, DDGI_TRACE_RAYS_COUNT, 1)]
|
|
void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
uint rayIndex = DispatchThreadId.y;
|
|
uint probeIndex = ActiveProbes.Load((DispatchThreadId.x + ProbeIndexOffset + 1) * 4);
|
|
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
|
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
|
|
|
|
// Load current probe state and position
|
|
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
|
|
float probeAttention = DecodeDDGIProbeAttention(probeData);
|
|
uint probeState = DecodeDDGIProbeState(probeData);
|
|
uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention);
|
|
if (probeState == DDGI_PROBE_STATE_INACTIVE || rayIndex >= probeRaysCount)
|
|
return; // Skip disabled probes or if current thread's ray is unused
|
|
float3 probePosition = DecodeDDGIProbePosition(DDGI, probeData, CascadeIndex, probeIndex, probeCoords);
|
|
float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
|
|
// TODO: implement ray-guiding based on the probe irradiance (prioritize directions with high luminance)
|
|
|
|
// Trace ray with Global SDF
|
|
GlobalSDFTrace trace;
|
|
trace.Init(probePosition, probeRayDirection, 0.0f, DDGI.RayMaxDistance);
|
|
GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace);
|
|
|
|
// Calculate radiance and distance
|
|
float4 radiance;
|
|
if (hit.IsHit())
|
|
{
|
|
#if DDGI_TRACE_NEGATIVE
|
|
if (hit.HitSDF <= 0.0f && hit.HitTime <= GlobalSDF.CascadeVoxelSize[0])
|
|
{
|
|
// Ray starts inside geometry (mark as negative distance and reduce it's influence during irradiance blending)
|
|
radiance = float4(0, 0, 0, hit.HitTime * -0.25f);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
// Sample Global Surface Atlas to get the lighting at the hit location
|
|
float3 hitPosition = hit.GetHitPosition(trace);
|
|
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit);
|
|
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, RWGlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
|
|
radiance = float4(surfaceColor.rgb, hit.HitTime);
|
|
|
|
// Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale
|
|
radiance.w = max(radiance.w + GlobalSDF.CascadeVoxelSize[hit.HitCascade] * 0.5f, 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Ray hits sky
|
|
radiance.rgb = Skybox.SampleLevel(SamplerLinearClamp, probeRayDirection, 0).rgb * SkyboxIntensity;
|
|
radiance.a = 1e27f; // Sky is the limit
|
|
}
|
|
|
|
// Write into probes trace results
|
|
RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance;
|
|
|
|
#if DDGI_DEBUG_STATS
|
|
// Update stats
|
|
uint tmp;
|
|
RWStats.InterlockedAdd(0, 1, tmp);
|
|
if (rayIndex == 0)
|
|
RWStats.InterlockedAdd(4, 1, tmp);
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(_CS_UpdateProbes)
|
|
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
// Update irradiance
|
|
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE
|
|
groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT];
|
|
groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION];
|
|
#else
|
|
// Update distance
|
|
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE
|
|
groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT];
|
|
#endif
|
|
|
|
// Source: https://github.com/turanszkij/WickedEngine
|
|
#define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4)
|
|
#if DDGI_PROBE_RESOLUTION == 6
|
|
static const uint4 BorderOffsets[BorderOffsetsSize] = {
|
|
uint4(6, 1, 1, 0),
|
|
uint4(5, 1, 2, 0),
|
|
uint4(4, 1, 3, 0),
|
|
uint4(3, 1, 4, 0),
|
|
uint4(2, 1, 5, 0),
|
|
uint4(1, 1, 6, 0),
|
|
|
|
uint4(6, 6, 1, 7),
|
|
uint4(5, 6, 2, 7),
|
|
uint4(4, 6, 3, 7),
|
|
uint4(3, 6, 4, 7),
|
|
uint4(2, 6, 5, 7),
|
|
uint4(1, 6, 6, 7),
|
|
|
|
uint4(1, 1, 0, 6),
|
|
uint4(1, 2, 0, 5),
|
|
uint4(1, 3, 0, 4),
|
|
uint4(1, 4, 0, 3),
|
|
uint4(1, 5, 0, 2),
|
|
uint4(1, 6, 0, 1),
|
|
|
|
uint4(6, 1, 7, 6),
|
|
uint4(6, 2, 7, 5),
|
|
uint4(6, 3, 7, 4),
|
|
uint4(6, 4, 7, 3),
|
|
uint4(6, 5, 7, 2),
|
|
uint4(6, 6, 7, 1),
|
|
|
|
uint4(1, 1, 7, 7),
|
|
uint4(6, 1, 0, 7),
|
|
uint4(1, 6, 7, 0),
|
|
uint4(6, 6, 0, 0)
|
|
};
|
|
#elif DDGI_PROBE_RESOLUTION == 14
|
|
static const uint4 BorderOffsets[BorderOffsetsSize] = {
|
|
uint4(14, 1, 1, 0),
|
|
uint4(13, 1, 2, 0),
|
|
uint4(12, 1, 3, 0),
|
|
uint4(11, 1, 4, 0),
|
|
uint4(10, 1, 5, 0),
|
|
uint4(9, 1, 6, 0),
|
|
uint4(8, 1, 7, 0),
|
|
uint4(7, 1, 8, 0),
|
|
uint4(6, 1, 9, 0),
|
|
uint4(5, 1, 10, 0),
|
|
uint4(4, 1, 11, 0),
|
|
uint4(3, 1, 12, 0),
|
|
uint4(2, 1, 13, 0),
|
|
uint4(1, 1, 14, 0),
|
|
|
|
uint4(14, 14, 1, 15),
|
|
uint4(13, 14, 2, 15),
|
|
uint4(12, 14, 3, 15),
|
|
uint4(11, 14, 4, 15),
|
|
uint4(10, 14, 5, 15),
|
|
uint4(9, 14, 6, 15),
|
|
uint4(8, 14, 7, 15),
|
|
uint4(7, 14, 8, 15),
|
|
uint4(6, 14, 9, 15),
|
|
uint4(5, 14, 10, 15),
|
|
uint4(4, 14, 11, 15),
|
|
uint4(3, 14, 12, 15),
|
|
uint4(2, 14, 13, 15),
|
|
uint4(1, 14, 14, 15),
|
|
|
|
uint4(1, 14, 0, 1),
|
|
uint4(1, 13, 0, 2),
|
|
uint4(1, 12, 0, 3),
|
|
uint4(1, 11, 0, 4),
|
|
uint4(1, 10, 0, 5),
|
|
uint4(1, 9, 0, 6),
|
|
uint4(1, 8, 0, 7),
|
|
uint4(1, 7, 0, 8),
|
|
uint4(1, 6, 0, 9),
|
|
uint4(1, 5, 0, 10),
|
|
uint4(1, 4, 0, 11),
|
|
uint4(1, 3, 0, 12),
|
|
uint4(1, 2, 0, 13),
|
|
uint4(1, 1, 0, 14),
|
|
|
|
uint4(14, 14, 15, 1),
|
|
uint4(14, 13, 15, 2),
|
|
uint4(14, 12, 15, 3),
|
|
uint4(14, 11, 15, 4),
|
|
uint4(14, 10, 15, 5),
|
|
uint4(14, 9, 15, 6),
|
|
uint4(14, 8, 15, 7),
|
|
uint4(14, 7, 15, 8),
|
|
uint4(14, 6, 15, 9),
|
|
uint4(14, 5, 15, 10),
|
|
uint4(14, 4, 15, 11),
|
|
uint4(14, 3, 15, 12),
|
|
uint4(14, 2, 15, 13),
|
|
uint4(14, 1, 15, 14),
|
|
|
|
uint4(14, 14, 0, 0),
|
|
uint4(1, 14, 15, 0),
|
|
uint4(14, 1, 0, 15),
|
|
uint4(1, 1, 15, 15)
|
|
};
|
|
#else
|
|
#error "Unsupported probe size for border values copy."
|
|
#endif
|
|
|
|
groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT];
|
|
|
|
RWTexture2D<float4> RWOutput : register(u0);
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
RWTexture2D<snorm float4> RWProbesData : register(u1);
|
|
#if DDGI_DEBUG_INSTABILITY
|
|
RWTexture2D<float> RWOutputInstability : register(u2);
|
|
#endif
|
|
#else
|
|
Texture2D<snorm float4> ProbesData : register(t0);
|
|
#endif
|
|
Texture2D<float4> ProbesTrace : register(t1);
|
|
ByteAddressBuffer ActiveProbes : register(t2);
|
|
|
|
// Compute shader for updating probes irradiance or distance texture.
|
|
META_CS(true, FEATURE_LEVEL_SM5)
|
|
META_PERMUTATION_1(DDGI_PROBE_UPDATE_MODE=0)
|
|
META_PERMUTATION_1(DDGI_PROBE_UPDATE_MODE=1)
|
|
[numthreads(DDGI_PROBE_RESOLUTION, DDGI_PROBE_RESOLUTION, 1)]
|
|
void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
|
|
{
|
|
// GroupThreadId.xy - coordinates of the probe texel: [0; DDGI_PROBE_RESOLUTION)
|
|
// GroupId.x - index of the thread group which is probe index within a batch: [0; batchSize)
|
|
// GroupIndex.x - index of the thread within a thread group: [0; DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)
|
|
uint probeIndex = ActiveProbes.Load((GroupId.x + ProbeIndexOffset + 1) * 4);
|
|
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
|
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
|
|
|
|
// Load probe data
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex);
|
|
float4 probeData = RWProbesData[probeDataCoords];
|
|
#else
|
|
float4 probeData = LoadDDGIProbeData(DDGI, ProbesData, CascadeIndex, probeIndex);
|
|
#endif
|
|
float probeAttention = DecodeDDGIProbeAttention(probeData);
|
|
uint probeState = DecodeDDGIProbeState(probeData);
|
|
uint probeRaysCount = GetProbeRaysCount(DDGI, probeAttention);
|
|
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
uint backfacesCount = 0;
|
|
uint backfacesLimit = uint(probeRaysCount * 0.1f);
|
|
#else
|
|
float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w;
|
|
float distanceLimit = probesSpacing * 1.5f;
|
|
#endif
|
|
|
|
// Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread)
|
|
uint raysCount = (uint)(ceil((float)probeRaysCount / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)));
|
|
uint raysStart = GroupIndex * raysCount;
|
|
raysCount = max(min(raysStart + raysCount, probeRaysCount), raysStart) - raysStart;
|
|
for (uint i = 0; i < raysCount; i++)
|
|
{
|
|
uint rayIndex = raysStart + i;
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)];
|
|
#else
|
|
float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w;
|
|
CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit);
|
|
#endif
|
|
CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex, probeRaysCount, probeIndex, probeCoords);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
|
|
|
|
// Calculate octahedral projection for probe (unwraps spherical projection into a square)
|
|
float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION);
|
|
float3 octahedralDirection = GetOctahedralDirection(octahedralCoords);
|
|
|
|
// Loop over rays
|
|
float4 result = float4(0, 0, 0, 0);
|
|
LOOP
|
|
for (uint rayIndex = 0; rayIndex < probeRaysCount; rayIndex++)
|
|
{
|
|
float3 rayDirection = CachedProbesTraceDirection[rayIndex];
|
|
float rayWeight = max(dot(octahedralDirection, rayDirection), 0.0f);
|
|
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
float4 rayRadiance = CachedProbesTraceRadiance[rayIndex];
|
|
#if DDGI_TRACE_NEGATIVE
|
|
if (rayRadiance.w < 0.0f)
|
|
{
|
|
// Count backface hits
|
|
backfacesCount++;
|
|
|
|
// Skip further blending after reaching backfaces limit
|
|
if (backfacesCount >= backfacesLimit)
|
|
{
|
|
result = float4(0, 0, 0, 1);
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
// Add radiance (RGB) and weight (A)
|
|
result += float4(rayRadiance.rgb * rayWeight, rayWeight);
|
|
#else
|
|
// Increase reaction speed for depth discontinuities
|
|
rayWeight = pow(rayWeight, 10.0f);
|
|
|
|
// Add distance (R), distance^2 (G) and weight (A)
|
|
float rayDistance = CachedProbesTraceDistance[rayIndex];
|
|
result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight);
|
|
#endif
|
|
}
|
|
|
|
// Normalize results
|
|
float epsilon = (float)probeRaysCount * 1e-9f;
|
|
result.rgb *= 1.0f / (2.0f * max(result.a, epsilon));
|
|
|
|
// Load current probe value
|
|
uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy;
|
|
float3 previous = RWOutput[outputCoords].rgb;
|
|
bool wasActivated = probeState == DDGI_PROBE_STATE_ACTIVATED || ResetBlend;
|
|
if (wasActivated)
|
|
previous = result.rgb;
|
|
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
// Calculate instability of the irradiance
|
|
float previousLuma = Luminance(previous.rgb);
|
|
float resultLuma = Luminance(result.rgb);
|
|
float instability = abs(previousLuma - resultLuma) / previousLuma; // Percentage change in luminance of irradiance
|
|
instability = max(instability, Max3(abs(result.rgb - previous) / previous)); // Percentage of color delta change of irradiance
|
|
//instability *= saturate(result.a); // Reduce instability in areas with a small ray-coverage
|
|
//instability = pow(instability, 1.2f); // Increase contrast
|
|
instability *= 2.0f; // Make it stronger on scene changes
|
|
//instability = saturate(instability);
|
|
OutputInstability[GroupIndex] = instability;
|
|
#if DDGI_DEBUG_INSTABILITY
|
|
RWOutputInstability[outputCoords] = instability;
|
|
//RWOutputInstability[outputCoords] = probeAttention; // Debug test probe attention visualization
|
|
#endif
|
|
#endif
|
|
|
|
// Blend current value with the previous probe data
|
|
float historyWeightFast = DDGI.ProbeHistoryWeight;
|
|
float historyWeightSlow = 0.97f;
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
float3 irradianceDelta = result.rgb - previous;
|
|
float irradianceDeltaMax = Max3(abs(irradianceDelta));
|
|
float irradianceDeltaLen = length(irradianceDelta);
|
|
if (irradianceDeltaMax > 0.5f)
|
|
{
|
|
// Reduce history weight after significant lighting change
|
|
historyWeightFast *= 0.5f;
|
|
}
|
|
#endif
|
|
float historyWeight = lerp(historyWeightSlow, historyWeightFast, probeAttention * probeAttention * probeAttention);
|
|
//historyWeight = 1.0f; // Debug full-blend
|
|
//historyWeight = 0.0f; // Debug no-blend
|
|
if (wasActivated)
|
|
historyWeight = 0.0f;
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
result *= DDGI.IndirectLightingIntensity;
|
|
#if DDGI_SRGB_BLENDING
|
|
result.rgb = pow(max(result.rgb, 0), 1.0f / DDGI.IrradianceGamma);
|
|
#endif
|
|
if (irradianceDeltaLen > 2.0f)
|
|
{
|
|
// Reduce flickering during rapid brightness changes
|
|
//result.rgb = previous + (irradianceDelta * 0.25f);
|
|
}
|
|
result = float4(lerp(result.rgb, previous.rgb, historyWeight), 1.0f);
|
|
#else
|
|
result = float4(lerp(result.rg, previous.rg, historyWeight), 0.0f, 1.0f);
|
|
#endif
|
|
|
|
RWOutput[outputCoords] = result;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2);
|
|
|
|
#if DDGI_PROBE_UPDATE_MODE == 0
|
|
// The first thread updates the probe attention based on the instability of all texels
|
|
BRANCH
|
|
if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE)
|
|
{
|
|
// Calculate instability statistics for a whole probe
|
|
float instabilityAvg = 0;
|
|
for (uint i = 0; i < DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION; i++)
|
|
instabilityAvg += OutputInstability[i];
|
|
instabilityAvg *= 1.0f / float(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION);
|
|
instabilityAvg = saturate(instabilityAvg);
|
|
instability = instabilityAvg;
|
|
|
|
// Calculate probe attention
|
|
float taregAttention = lerp(0.5f, DDGI_PROBE_ATTENTION_MAX, instability); // Use some base level
|
|
if (taregAttention >= probeAttention)
|
|
probeAttention = taregAttention; // Quick jump up
|
|
else
|
|
probeAttention = lerp(probeAttention, taregAttention, 0.2f); // Slow blend down
|
|
if (probeState == DDGI_PROBE_STATE_ACTIVATED)
|
|
probeAttention = DDGI_PROBE_ATTENTION_MAX;
|
|
|
|
// Update probe data for the next frame
|
|
probeState = DDGI_PROBE_STATE_ACTIVE;
|
|
RWProbesData[probeDataCoords] = EncodeDDGIProbeData(probeData.xyz, probeState, probeAttention);
|
|
}
|
|
|
|
#if DDGI_DEBUG_INSTABILITY
|
|
// Copy border pixels
|
|
for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)
|
|
{
|
|
uint4 borderOffsets = BorderOffsets[borderIndex];
|
|
RWOutputInstability[baseCoords + borderOffsets.zw] = RWOutputInstability[baseCoords + borderOffsets.xy];
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
// Copy border pixels
|
|
for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)
|
|
{
|
|
uint4 borderOffsets = BorderOffsets[borderIndex];
|
|
RWOutput[baseCoords + borderOffsets.zw] = RWOutput[baseCoords + borderOffsets.xy];
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef _PS_IndirectLighting
|
|
|
|
#include "./Flax/GBuffer.hlsl"
|
|
#include "./Flax/Random.hlsl"
|
|
#include "./Flax/LightingCommon.hlsl"
|
|
|
|
Texture2D<snorm float4> ProbesData : register(t4);
|
|
Texture2D<float4> ProbesDistance : register(t5);
|
|
Texture2D<float4> ProbesIrradiance : register(t6);
|
|
|
|
// Pixel shader for drawing indirect lighting in fullscreen
|
|
META_PS(true, FEATURE_LEVEL_SM5)
|
|
META_PERMUTATION_1(DDGI_CASCADE_BLEND_SMOOTH=0)
|
|
META_PERMUTATION_1(DDGI_CASCADE_BLEND_SMOOTH=1)
|
|
void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0)
|
|
{
|
|
output = 0;
|
|
|
|
// Sample GBuffer
|
|
GBufferSample gBuffer = SampleGBuffer(GBuffer, input.TexCoord);
|
|
|
|
// Check if cannot shadow pixel
|
|
BRANCH
|
|
if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT)
|
|
{
|
|
discard;
|
|
return;
|
|
}
|
|
|
|
// Sample irradiance
|
|
float bias = 0.2f;
|
|
float dither = RandN2(input.TexCoord + TemporalTime).x;
|
|
float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesData, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias, dither);
|
|
|
|
// Calculate lighting
|
|
float3 diffuseColor = GetDiffuseColor(gBuffer);
|
|
float3 diffuse = Diffuse_Lambert(diffuseColor);
|
|
output.rgb = diffuse * irradiance * gBuffer.AO;
|
|
}
|
|
|
|
#endif
|