Add support for cascades to DDGI

This commit is contained in:
Wojciech Figat
2022-06-09 08:55:45 +02:00
parent 73d762cf0c
commit 6a74ebd62e
14 changed files with 395 additions and 234 deletions

Binary file not shown.

BIN
Content/Shaders/GI/DDGI.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Shaders/SSR.flax (Stored with Git LFS)

Binary file not shown.

View File

@@ -202,13 +202,13 @@ DECLARE_SCRIPTING_TYPE_NO_SPAWN(AmbientOcclusionSettings);
/// <summary>
/// Ambient occlusion intensity.
/// </summary>
API_FIELD(Attributes="DefaultValue(0.8f), Limit(0, 5.0f, 0.01f), EditorOrder(1), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Intensity)")
API_FIELD(Attributes="DefaultValue(0.8f), Limit(0, 10.0f, 0.01f), EditorOrder(1), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Intensity)")
float Intensity = 0.8f;
/// <summary>
/// Ambient occlusion power.
/// </summary>
API_FIELD(Attributes="DefaultValue(0.75f), Limit(0, 4.0f, 0.01f), EditorOrder(2), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Power)")
API_FIELD(Attributes="DefaultValue(0.75f), Limit(0, 10.0f, 0.01f), EditorOrder(2), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Power)")
float Power = 0.75f;
/// <summary>

View File

@@ -12,6 +12,7 @@
#include "Engine/Engine/Engine.h"
#include "Engine/Content/Content.h"
#include "Engine/Debug/DebugDraw.h"
#include "Engine/Engine/Time.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/RenderBuffers.h"
@@ -43,35 +44,51 @@ PACK_STRUCT(struct Data0
GlobalSignDistanceFieldPass::ConstantsData GlobalSDF;
GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas;
GBufferData GBuffer;
Vector2 Padding0;
float ResetBlend;
float TemporalTime;
float IndirectLightingIntensity;
float Padding0;
});
PACK_STRUCT(struct Data1
{
Vector3 Padding1;
uint32 CascadeIndex; // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD
});
class DDGICustomBuffer : public RenderBuffers::CustomBuffer
{
public:
struct
{
Vector3 ProbesOrigin;
float ProbesSpacing = 0.0f;
Int3 ProbeScrollOffsets;
Int3 ProbeScrollDirections;
bool ProbeScrollClear[3];
void Clear()
{
ProbesOrigin = Vector3::Zero;
ProbeScrollOffsets = Int3::Zero;
ProbeScrollDirections = Int3::Zero;
ProbeScrollClear[0] = false;
ProbeScrollClear[1] = false;
ProbeScrollClear[2] = false;
}
} Cascades[4];
int32 CascadesCount = 0;
int32 ProbeRaysCount = 0;
float ProbesSpacing = 0.0f;
Int3 ProbeCounts = Int3::Zero;
Vector3 ProbesOrigin;
Int3 ProbeScrollOffsets;
Int3 ProbeScrollDirections;
bool ProbeScrollClear[3];
GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance)
GPUTexture* ProbesState = nullptr; // Probes state: (RGB: world-space offset, A: state)
GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color)
GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2)
DynamicDiffuseGlobalIlluminationPass::BindingData Result;
FORCE_INLINE void Clear()
FORCE_INLINE void Release()
{
ProbesOrigin = Vector3::Zero;
ProbeScrollOffsets = Int3::Zero;
ProbeScrollDirections = Int3::Zero;
ProbeScrollClear[0] = false;
ProbeScrollClear[1] = false;
ProbeScrollClear[2] = false;
RenderTargetPool::Release(ProbesTrace);
RenderTargetPool::Release(ProbesState);
RenderTargetPool::Release(ProbesIrradiance);
@@ -80,7 +97,7 @@ public:
~DDGICustomBuffer()
{
Clear();
Release();
}
};
@@ -150,7 +167,8 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources()
// Initialize resources
const auto shader = _shader->GetShader();
_cb0 = shader->GetCB(0);
if (!_cb0)
_cb1 = shader->GetCB(1);
if (!_cb0 || !_cb1)
return true;
_csClassify = shader->GetCS("CS_Classify");
_csTraceRays = shader->GetCS("CS_TraceRays");
@@ -199,6 +217,7 @@ void DynamicDiffuseGlobalIlluminationPass::Dispose()
// Cleanup
_cb0 = nullptr;
_cb1 = nullptr;
_csTraceRays = nullptr;
_shader = nullptr;
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
@@ -250,51 +269,77 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
// TODO: configurable via postFx settings (maybe use Global SDF distance?)
const float indirectLightingIntensity = 1.0f;
const float probeHistoryWeight = 0.8f;
const Vector3 giDistance(2000, 2000, 2000); // GI distance around the view (in each direction)
const float giResolution = 100.0f; // GI probes placement spacing
const Int3 probesCounts(Vector3::Ceil(giDistance / giResolution));
const Vector3 probesDistance = Vector3(probesCounts) * giResolution;
const int32 cascadesCount = 4; // in range 1-4
// TODO: use GI.Distance as a easier to adjust total distance and automatically calculate distanceExtent from it
const float distance = 20000.0f; // GI distance around the view (in each direction)
const float cascadesDistanceScales[] = { 1.0f, 3.0f, 6.0f, 10.0f }; // Scales each cascade further away from the camera origin
const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1];
const float verticalRangeScale = 0.8f; // Scales the probes volume size at Y axis (horizontal aspect ratio makes the DDGI use less probes vertically to cover whole screen)
const float probesSpacing = 200.0f; // GI probes placement spacing nearby camera (for closest cascade; gets automatically reduced for further cascades)
const Int3 probesCounts(Vector3::Ceil(Vector3(distanceExtent, distanceExtent * verticalRangeScale, distanceExtent) / probesSpacing));
const int32 probeRaysCount = Math::Min(Math::AlignUp(256, DDGI_TRACE_RAYS_GROUP_SIZE_X), DDGI_TRACE_RAYS_LIMIT); // TODO: make it based on the GI Quality
// Calculate view origin
Vector3 viewOrigin = renderContext.View.Position;
Vector3 viewDirection = renderContext.View.Direction;
const float probesDistanceMax = probesDistance.MaxValue();
const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance);
const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.8f;
viewOrigin += viewDirection * viewOriginOffset;
const float viewOriginSnapping = giResolution;
viewOrigin = Vector3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping;
//viewOrigin = Vector3::Zero;
// Initialize cascades
float probesSpacings[4];
Vector3 viewOrigins[4];
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
// Each cascade has higher spacing between probes
float cascadeDistanceScale = cascadesDistanceScales[cascadeIndex];
float cascadeProbesSpacing = probesSpacing * cascadeDistanceScale;
probesSpacings[cascadeIndex] = cascadeProbesSpacing;
// Calculate view origin for cascade by shifting it towards the view direction to account for better view frustum coverage
Vector3 viewOrigin = renderContext.View.Position;
Vector3 viewDirection = renderContext.View.Direction;
const Vector3 probesDistance = Vector3(probesCounts) * cascadeProbesSpacing;
const float probesDistanceMax = probesDistance.MaxValue();
const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance);
const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.6f;
viewOrigin += viewDirection * viewOriginOffset;
const float viewOriginSnapping = cascadeProbesSpacing;
viewOrigin = Vector3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping;
//viewOrigin = Vector3::Zero;
viewOrigins[cascadeIndex] = viewOrigin;
}
// Init buffers
const int32 probesCount = probesCounts.X * probesCounts.Y * probesCounts.Z;
if (probesCount == 0 || indirectLightingIntensity <= ZeroTolerance)
const int32 probesCountCascade = probesCounts.X * probesCounts.Y * probesCounts.Z;
const int32 probesCountTotal = probesCountCascade * cascadesCount;
if (probesCountTotal == 0 || indirectLightingIntensity <= ZeroTolerance)
return true;
int32 probesCountX = probesCounts.X * probesCounts.Y;
int32 probesCountY = probesCounts.Z;
int32 probesCountCascadeX = probesCounts.X * probesCounts.Y;
int32 probesCountCascadeY = probesCounts.Z;
int32 probesCountTotalX = probesCountCascadeX;
int32 probesCountTotalY = probesCountCascadeY * cascadesCount;
bool clear = false;
if (Math::NotNearEqual(ddgiData.ProbesSpacing, giResolution) || ddgiData.ProbeCounts != probesCounts || ddgiData.ProbeRaysCount != probeRaysCount)
if (ddgiData.CascadesCount != cascadesCount || Math::NotNearEqual(ddgiData.Cascades[0].ProbesSpacing, probesSpacing) || ddgiData.ProbeCounts != probesCounts || ddgiData.ProbeRaysCount != probeRaysCount)
{
PROFILE_CPU_NAMED("Init");
ddgiData.Clear();
ddgiData.Release();
ddgiData.CascadesCount = cascadesCount;
ddgiData.ProbeRaysCount = probeRaysCount;
ddgiData.ProbesSpacing = giResolution;
ddgiData.ProbeCounts = probesCounts;
ddgiData.ProbesOrigin = viewOrigin;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = ddgiData.Cascades[cascadeIndex];
cascade.Clear();
cascade.ProbesSpacing = probesSpacings[cascadeIndex];
cascade.ProbesOrigin = viewOrigins[cascadeIndex];
}
// Allocate probes textures
uint64 memUsage = 0;
auto desc = GPUTextureDescription::New2D(probesCountX, probesCountY, PixelFormat::Unknown);
auto desc = GPUTextureDescription::New2D(probesCountTotalX, probesCountTotalY, PixelFormat::Unknown);
// TODO rethink probes data placement in memory -> what if we get [50x50x30] resolution? That's 75000 probes! Use sparse storage with active-only probes
#define INIT_TEXTURE(texture, format, width, height) desc.Format = format; desc.Width = width; desc.Height = height; ddgiData.texture = RenderTargetPool::Get(desc); if (!ddgiData.texture) return true; memUsage += ddgiData.texture->GetMemoryUsage()
desc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess;
INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, probesCount);
INIT_TEXTURE(ProbesState, PixelFormat::R16G16B16A16_Float, probesCountX, probesCountY); // TODO: optimize to a RGBA32 (pos offset can be normalized to [0-0.5] range of ProbesSpacing and packed with state flag)
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, probesCountTotal); // TODO: limit to 4k probes for a single batch to trace
INIT_TEXTURE(ProbesState, PixelFormat::R16G16B16A16_Float, probesCountTotalX, probesCountTotalY); // TODO: optimize to a RGBA32 (pos offset can be normalized to [0-0.5] range of ProbesSpacing and packed with state flag)
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
#undef INIT_TEXTURE
LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCount);
LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCountTotal);
clear = true;
}
#if USE_EDITOR
@@ -309,46 +354,62 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
context->ClearUA(ddgiData.ProbesDistance, Vector4::Zero);
}
// Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement)
// Calculate which cascades should be updated this frame
//const uint64 cascadeFrequencies[] = { 1, 2, 3, 5 };
// TODO: prevent updating 2 cascades at once on Low quality
const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 };
bool cascadeSkipUpdate[4];
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
cascadeSkipUpdate[cascadeIndex] = !clear && (currentFrame % cascadeFrequencies[cascadeIndex]) != 0;
}
// Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement)
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
if (cascadeSkipUpdate[cascadeIndex])
continue;
auto& cascade = ddgiData.Cascades[cascadeIndex];
// Reset the volume origin and scroll offsets for each axis
for (int32 axis = 0; axis < 3; axis++)
{
if (ddgiData.ProbeScrollOffsets.Raw[axis] != 0 && (ddgiData.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0))
if (cascade.ProbeScrollOffsets.Raw[axis] != 0 && (cascade.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0))
{
ddgiData.ProbesOrigin.Raw[axis] += (float)ddgiData.ProbeCounts.Raw[axis] * ddgiData.ProbesSpacing * (float)ddgiData.ProbeScrollDirections.Raw[axis];
ddgiData.ProbeScrollOffsets.Raw[axis] = 0;
cascade.ProbesOrigin.Raw[axis] += (float)ddgiData.ProbeCounts.Raw[axis] * cascade.ProbesSpacing * (float)cascade.ProbeScrollDirections.Raw[axis];
cascade.ProbeScrollOffsets.Raw[axis] = 0;
}
}
// Calculate the count of grid cells between the view origin and the scroll anchor
const Vector3 volumeOrigin = ddgiData.ProbesOrigin + Vector3(ddgiData.ProbeScrollOffsets) * ddgiData.ProbesSpacing;
const Vector3 translation = viewOrigin - volumeOrigin;
const Vector3 volumeOrigin = cascade.ProbesOrigin + Vector3(cascade.ProbeScrollOffsets) * cascade.ProbesSpacing;
const Vector3 translation = viewOrigins[cascadeIndex] - volumeOrigin;
for (int32 axis = 0; axis < 3; axis++)
{
const float value = translation.Raw[axis] / ddgiData.ProbesSpacing;
const float value = translation.Raw[axis] / cascade.ProbesSpacing;
const int32 scroll = value >= 0.0f ? (int32)Math::Floor(value) : (int32)Math::Ceil(value);
ddgiData.ProbeScrollOffsets.Raw[axis] += scroll;
ddgiData.ProbeScrollClear[axis] = scroll != 0;
ddgiData.ProbeScrollDirections.Raw[axis] = translation.Raw[axis] >= 0.0f ? 1 : -1;
cascade.ProbeScrollOffsets.Raw[axis] += scroll;
cascade.ProbeScrollClear[axis] = scroll != 0;
cascade.ProbeScrollDirections.Raw[axis] = translation.Raw[axis] >= 0.0f ? 1 : -1;
}
}
// Upload constants
{
ddgiData.Result.Constants.ProbesOrigin = ddgiData.ProbesOrigin;
ddgiData.Result.Constants.ProbesSpacing = ddgiData.ProbesSpacing;
ddgiData.Result.Constants.CascadesCount = cascadesCount;
ddgiData.Result.Constants.ProbesCounts[0] = probesCounts.X;
ddgiData.Result.Constants.ProbesCounts[1] = probesCounts.Y;
ddgiData.Result.Constants.ProbesCounts[2] = probesCounts.Z;
ddgiData.Result.Constants.ProbesScrollOffsets = ddgiData.ProbeScrollOffsets;
ddgiData.Result.Constants.ProbeScrollDirections = ddgiData.ProbeScrollDirections;
ddgiData.Result.Constants.ProbeScrollClear[0] = ddgiData.ProbeScrollClear[0] != 0;
ddgiData.Result.Constants.ProbeScrollClear[1] = ddgiData.ProbeScrollClear[1] != 0;
ddgiData.Result.Constants.ProbeScrollClear[2] = ddgiData.ProbeScrollClear[2] != 0;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = ddgiData.Cascades[cascadeIndex];
int32 probeScrollClear = cascade.ProbeScrollClear[0] + cascade.ProbeScrollClear[1] * 2 + cascade.ProbeScrollClear[2] * 4; // Pack clear flags into bits
ddgiData.Result.Constants.ProbesOriginAndSpacing[cascadeIndex] = Vector4(cascade.ProbesOrigin, cascade.ProbesSpacing);
ddgiData.Result.Constants.ProbesScrollOffsets[cascadeIndex] = Int4(cascade.ProbeScrollOffsets, probeScrollClear);
ddgiData.Result.Constants.ProbeScrollDirections[cascadeIndex] = Int4(cascade.ProbeScrollDirections, 0);
}
ddgiData.Result.Constants.RayMaxDistance = 10000.0f; // TODO: adjust to match perf/quality ratio (make it based on Global SDF and Global Surface Atlas distance)
ddgiData.Result.Constants.ViewDir = viewDirection;
ddgiData.Result.Constants.ViewDir = renderContext.View.Direction;
ddgiData.Result.Constants.RaysCount = probeRaysCount;
ddgiData.Result.Constants.ProbeHistoryWeight = probeHistoryWeight;
ddgiData.Result.Constants.IrradianceGamma = 5.0f;
@@ -368,6 +429,18 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
data.GlobalSDF = bindingDataSDF.Constants;
data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants;
data.ResetBlend = clear ? 1.0f : 0.0f;
if (renderContext.List->Settings.AntiAliasing.Mode == AntialiasingMode::TemporalAntialiasing)
{
// Use temporal offset in the dithering factor (gets cleaned out by TAA)
const float time = Time::Draw.UnscaledTime.GetTotalSeconds();
const float scale = 10;
const float integral = roundf(time / scale) * scale;
data.TemporalTime = time - integral;
}
else
{
data.TemporalTime = 0.0f;
}
data.IndirectLightingIntensity = indirectLightingIntensity;
GBufferPass::SetInputs(renderContext.View, data.GBuffer);
context->UpdateCB(_cb0, &data);
@@ -377,72 +450,117 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
// Classify probes (activation/deactivation and relocation)
{
PROFILE_GPU_CPU("Probes Classification");
uint32 threadGroups = Math::DivideAndRoundUp(probesCount, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
uint32 threadGroups = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
for (int32 i = 0; i < 4; i++)
{
context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume());
}
context->BindUA(0, ddgiData.Result.ProbesState);
context->Dispatch(_csClassify, threadGroups, 1, 1);
context->ResetUA();
}
// Trace rays from probes
{
PROFILE_GPU_CPU("Trace Rays");
// Global SDF with Global Surface Atlas software raytracing (X - per probe ray, Y - per probe)
ASSERT_LOW_LAYER((probeRaysCount % DDGI_TRACE_RAYS_GROUP_SIZE_X) == 0);
for (int32 i = 0; i < 4; i++)
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume());
context->BindSR(i + 4, bindingDataSDF.CascadeMips[i]->ViewVolume());
if (cascadeSkipUpdate[cascadeIndex])
continue;
Data1 data;
data.CascadeIndex = cascadeIndex;
context->UpdateCB(_cb1, &data);
context->BindCB(1, _cb1);
context->Dispatch(_csClassify, threadGroups, 1, 1);
}
context->BindSR(8, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(9, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(10, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(11, bindingDataSurfaceAtlas.AtlasLighting->View());
context->BindSR(12, ddgiData.Result.ProbesState);
context->BindSR(13, skybox);
context->BindUA(0, ddgiData.ProbesTrace->View());
context->Dispatch(_csTraceRays, probeRaysCount / DDGI_TRACE_RAYS_GROUP_SIZE_X, probesCount, 1);
context->ResetUA();
context->ResetSR();
#if 0
// Probes trace debug preview
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
context->SetRenderTarget(lightBuffer);
context->Draw(ddgiData.ProbesTrace);
return false;
#endif
}
// Update probes
{
PROFILE_GPU_CPU("Update Probes");
context->BindSR(0, ddgiData.Result.ProbesState);
context->BindSR(1, ddgiData.ProbesTrace->View());
PROFILE_GPU_CPU("Probes Update");
bool anyDirty = false;
uint32 threadGroupsX, threadGroupsY;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
if (cascadeSkipUpdate[cascadeIndex])
continue;
anyDirty = true;
Data1 data;
data.CascadeIndex = cascadeIndex;
context->UpdateCB(_cb1, &data);
context->BindCB(1, _cb1);
// Update irradiance
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
context->Dispatch(_csUpdateProbesIrradiance, probesCountX, probesCountY, 1);
uint32 threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
uint32 threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1);
// TODO: run probes tracing+update in 4k batches
// Update distance
context->BindUA(0, ddgiData.Result.ProbesDistance);
context->Dispatch(_csUpdateProbesDistance, probesCountX, probesCountY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1);
// Trace rays from probes
{
PROFILE_GPU_CPU("Trace Rays");
// Global SDF with Global Surface Atlas software raytracing (thread X - per probe ray, thread Y - per probe)
ASSERT_LOW_LAYER((probeRaysCount % DDGI_TRACE_RAYS_GROUP_SIZE_X) == 0);
for (int32 i = 0; i < 4; i++)
{
context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume());
context->BindSR(i + 4, bindingDataSDF.CascadeMips[i]->ViewVolume());
}
context->BindSR(8, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(9, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(10, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(11, bindingDataSurfaceAtlas.AtlasLighting->View());
context->BindSR(12, ddgiData.Result.ProbesState);
context->BindSR(13, skybox);
context->BindUA(0, ddgiData.ProbesTrace->View());
context->Dispatch(_csTraceRays, probeRaysCount / DDGI_TRACE_RAYS_GROUP_SIZE_X, probesCountCascade, 1);
context->ResetUA();
context->ResetSR();
#if 0
// Probes trace debug preview
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
context->SetRenderTarget(lightBuffer);
context->Draw(ddgiData.ProbesTrace);
return false;
#endif
}
context->BindSR(0, ddgiData.Result.ProbesState);
context->BindSR(1, ddgiData.ProbesTrace->View());
// Update probes irradiance texture
{
PROFILE_GPU_CPU("Update Irradiance");
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
context->Dispatch(_csUpdateProbesIrradiance, probesCountCascadeX, probesCountCascadeY, 1);
}
// Update probes distance texture
{
PROFILE_GPU_CPU("Update Distance");
context->BindUA(0, ddgiData.Result.ProbesDistance);
context->Dispatch(_csUpdateProbesDistance, probesCountCascadeX, probesCountCascadeY, 1);
}
}
// Update probes border pixels
if (anyDirty)
{
PROFILE_GPU_CPU("Update Borders");
// Irradiance
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1);
// Distance
context->BindUA(0, ddgiData.Result.ProbesDistance);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1);
context->ResetUA();
context->ResetSR();
}
}
// Render indirect lighting
@@ -453,7 +571,6 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
// DDGI indirect lighting debug preview
context->Clear(lightBuffer, Color::Transparent);
#endif
context->ResetUA();
context->BindSR(0, renderContext.Buffers->GBuffer0->View());
context->BindSR(1, renderContext.Buffers->GBuffer1->View());
context->BindSR(2, renderContext.Buffers->GBuffer2->View());
@@ -485,7 +602,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
Matrix world;
Matrix::Scaling(Vector3(0.2f), world);
const Mesh& debugMesh = _debugModel->LODs[0].Meshes[0];
for (int32 probeIndex = 0; probeIndex < probesCount; probeIndex++)
for (int32 probeIndex = 0; probeIndex < probesCountTotal; probeIndex++)
debugMesh.Draw(debugRenderContext, _debugMaterial, world, StaticFlags::None, true, DrawPass::GBuffer, (float)probeIndex);
debugRenderContext.List->SortDrawCalls(debugRenderContext, false, DrawCallsListType::GBuffer);
context->SetViewportAndScissors(debugRenderContext.View.ScreenSize.X, debugRenderContext.View.ScreenSize.Y);

View File

@@ -3,7 +3,7 @@
#pragma once
#include "../RendererPass.h"
#include "Engine/Core/Math/Int3.h"
#include "Engine/Core/Math/Int4.h"
#include "Engine/Graphics/Textures/GPUTexture.h"
/// <summary>
@@ -15,19 +15,18 @@ public:
// Constant buffer data for DDGI access on a GPU.
PACK_STRUCT(struct ConstantsData
{
Vector3 ProbesOrigin;
float ProbesSpacing;
Vector4 RaysRotation;
Vector4 ProbesOriginAndSpacing[4];
Int4 ProbesScrollOffsets[4];
Int4 ProbeScrollDirections[4];
uint32 ProbesCounts[3];
uint32 CascadesCount;
float IrradianceGamma;
Int3 ProbesScrollOffsets;
float ProbeHistoryWeight;
float RayMaxDistance;
float Padding0;
Vector4 RaysRotation;
Vector3 ViewDir;
uint32 RaysCount;
Int3 ProbeScrollDirections;
float RayMaxDistance;
uint32 ProbeScrollClear[3];
uint32 Padding0;
});
// Binding data for the GPU.
@@ -43,6 +42,7 @@ private:
bool _supported = false;
AssetReference<Shader> _shader;
GPUConstantBuffer* _cb0 = nullptr;
GPUConstantBuffer* _cb1 = nullptr;
GPUShaderProgramCS* _csClassify;
GPUShaderProgramCS* _csTraceRays;
GPUShaderProgramCS* _csUpdateProbesIrradiance;

View File

@@ -346,8 +346,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// TODO: configurable via graphics settings
const int32 resolution = 2048;
const float resolutionInv = 1.0f / resolution;
// TODO: configurable via postFx settings (maybe use Global SDF distance?)
const float distance = 20000;
// TODO: configurable via postFx settings (use GI distance)
const float distance = 20000.0f;
// Initialize buffers
bool noCache = surfaceAtlasData.Resolution != resolution;

View File

@@ -383,8 +383,10 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
const int32 resolution = 256;
const int32 resolutionMip = Math::DivideAndRoundUp(resolution, GLOBAL_SDF_RASTERIZE_MIP_FACTOR);
// TODO: configurable via postFx settings
const float distanceExtent = 2000.0f;
const float cascadesDistances[] = { distanceExtent, distanceExtent * 2.0f, distanceExtent * 4.0f, distanceExtent * 8.0f };
const int32 cascadesCount = 4; // in range 1-4
const float distance = true ? 20000.0f : 16000.0f; // TODO: switch based if using GI, then use GI range
const float cascadesDistanceScales[] = { 1.0f, 2.0f, 4.0f, 8.0f };
const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1];
// Initialize buffers
auto desc = GPUTextureDescription::New3D(resolution, resolution, resolution, GLOBAL_SDF_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess, 1);
@@ -449,23 +451,22 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
bool anyDraw = false;
const uint64 cascadeFrequencies[] = { 2, 3, 5, 11 };
//const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 };
for (int32 cascade = 0; cascade < 4; cascade++)
for (int32 cascadeIndex = 0; cascadeIndex < 4; cascadeIndex++)
{
// Reduce frequency of the updates
if (useCache && (Engine::FrameCount % cascadeFrequencies[cascadeIndex]) != 0)
continue;
auto& cascade = sdfData.Cascades[cascadeIndex];
const float distance = cascadesDistances[cascadeIndex];
const float maxDistance = distance * 2;
const float voxelSize = maxDistance / resolution;
const float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE;
const float cascadeDistance = distanceExtent * cascadesDistanceScales[cascadeIndex];
const float cascadeMaxDistance = cascadeDistance * 2;
const float cascadeVoxelSize = cascadeMaxDistance / resolution;
const float cascadeChunkSize = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE;
static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_MIP_FACTOR == 0, "Adjust chunk size to match the mip factor scale.");
const Vector3 center = Vector3::Floor(renderContext.View.Position / chunkSize) * chunkSize;
const Vector3 center = Vector3::Floor(renderContext.View.Position / cascadeChunkSize) * cascadeChunkSize;
//const Vector3 center = Vector3::Zero;
BoundingBox cascadeBounds(center - distance, center + distance);
BoundingBox cascadeBounds(center - cascadeDistance, center + cascadeDistance);
// TODO: add scene detail scale factor to PostFx settings (eg. to increase or decrease scene details and quality)
const float minObjectRadius = Math::Max(20.0f, voxelSize * 0.5f); // Skip too small objects for this cascade
const float minObjectRadius = Math::Max(20.0f, cascadeVoxelSize * 0.5f); // Skip too small objects for this cascade
GPUTextureView* cascadeView = cascade.Texture->ViewVolume();
GPUTextureView* cascadeMipView = cascade.Mip->ViewVolume();
@@ -478,18 +479,18 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
}
// Check if cascade center has been moved
if (!(useCache && Vector3::NearEqual(cascade.Position, center, voxelSize)))
if (!(useCache && Vector3::NearEqual(cascade.Position, center, cascadeVoxelSize)))
{
// TODO: optimize for moving camera (copy sdf for cached chunks)
cascade.StaticChunks.Clear();
}
cascade.Position = center;
cascade.VoxelSize = voxelSize;
cascade.VoxelSize = cascadeVoxelSize;
cascade.Bounds = cascadeBounds;
// Draw all objects from all scenes into the cascade
_objectsBufferCount = 0;
_voxelSize = voxelSize;
_voxelSize = cascadeVoxelSize;
_cascadeBounds = cascadeBounds;
_cascadeIndex = cascadeIndex;
_sdfData = &sdfData;
@@ -518,12 +519,12 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
}
ModelsRasterizeData data;
data.CascadeCoordToPosMul = cascadeBounds.GetSize() / resolution;
data.CascadeCoordToPosAdd = cascadeBounds.Minimum + voxelSize * 0.5f;
data.MaxDistance = maxDistance;
data.CascadeCoordToPosAdd = cascadeBounds.Minimum + cascadeVoxelSize * 0.5f;
data.MaxDistance = cascadeMaxDistance;
data.CascadeResolution = resolution;
data.CascadeMipResolution = resolutionMip;
data.CascadeMipFactor = GLOBAL_SDF_RASTERIZE_MIP_FACTOR;
data.CascadeVoxelSize = voxelSize;
data.CascadeVoxelSize = cascadeVoxelSize;
context->BindUA(0, cascadeView);
context->BindCB(1, _cb1);
const int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE;
@@ -728,12 +729,12 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
for (int32 cascadeIndex = 0; cascadeIndex < 4; cascadeIndex++)
{
auto& cascade = sdfData.Cascades[cascadeIndex];
const float distance = cascadesDistances[cascadeIndex];
const float maxDistance = distance * 2;
const float voxelSize = maxDistance / resolution;
const float cascadeDistance = distanceExtent * cascadesDistanceScales[cascadeIndex];
const float cascadeMaxDistance = cascadeDistance * 2;
const float cascadeVoxelSize = cascadeMaxDistance / resolution;
const Vector3 center = cascade.Position;
result.Constants.CascadePosDistance[cascadeIndex] = Vector4(center, distance);
result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = voxelSize;
result.Constants.CascadePosDistance[cascadeIndex] = Vector4(center, cascadeDistance);
result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = cascadeVoxelSize;
result.Cascades[cascadeIndex] = cascade.Texture;
result.CascadeMips[cascadeIndex] = cascade.Mip;
}

View File

@@ -22,19 +22,18 @@
// DDGI data for a constant buffer
struct DDGIData
{
float3 ProbesOrigin;
float ProbesSpacing;
float4 RaysRotation;
float4 ProbesOriginAndSpacing[4];
int4 ProbesScrollOffsets[4];
int4 ProbeScrollDirections[4];
uint3 ProbesCounts;
uint CascadesCount;
float IrradianceGamma;
int3 ProbesScrollOffsets;
float ProbeHistoryWeight;
float RayMaxDistance;
float Padding0;
float4 RaysRotation;
float3 ViewDir;
uint RaysCount;
int3 ProbeScrollDirections;
float RayMaxDistance;
uint3 ProbeScrollClear; // TODO: pack into bits
uint Padding0;
};
uint GetDDGIProbeIndex(DDGIData data, uint3 probeCoords)
@@ -62,88 +61,106 @@ uint3 GetDDGIProbeCoords(DDGIData data, uint probeIndex)
return probeCoords;
}
uint2 GetDDGIProbeTexelCoords(DDGIData data, uint probeIndex)
uint2 GetDDGIProbeTexelCoords(DDGIData data, uint cascadeIndex, uint probeIndex)
{
uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z;
uint planeIndex = probeIndex / probesPerPlane;
uint gridSpaceX = probeIndex % data.ProbesCounts.x;
uint gridSpaceY = probeIndex / data.ProbesCounts.x;
uint x = gridSpaceX + (planeIndex * data.ProbesCounts.x);
uint y = gridSpaceY % data.ProbesCounts.z;
uint y = gridSpaceY % data.ProbesCounts.z + cascadeIndex * data.ProbesCounts.z;
return uint2(x, y);
}
uint GetDDGIScrollingProbeIndex(DDGIData data, uint3 probeCoords)
uint GetDDGIScrollingProbeIndex(DDGIData data, uint cascadeIndex, uint3 probeCoords)
{
// Probes are scrolled on edges to stabilize GI when camera moves
return GetDDGIProbeIndex(data, (probeCoords + data.ProbesScrollOffsets + data.ProbesCounts) % data.ProbesCounts);
return GetDDGIProbeIndex(data, (probeCoords + data.ProbesScrollOffsets[cascadeIndex].xyz + data.ProbesCounts) % data.ProbesCounts);
}
float3 GetDDGIProbeWorldPosition(DDGIData data, uint3 probeCoords)
float3 GetDDGIProbeWorldPosition(DDGIData data, uint cascadeIndex, uint3 probeCoords)
{
float3 probePosition = probeCoords * data.ProbesSpacing;
float3 probeGridOffset = (data.ProbesSpacing * (data.ProbesCounts - 1)) * 0.5f;
return data.ProbesOrigin + probePosition - probeGridOffset + (data.ProbesScrollOffsets * data.ProbesSpacing);
float3 probesOrigin = data.ProbesOriginAndSpacing[cascadeIndex].xyz;
float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w;
float3 probePosition = probeCoords * probesSpacing;
float3 probeGridOffset = (probesSpacing * (data.ProbesCounts - 1)) * 0.5f;
return probesOrigin + probePosition - probeGridOffset + (data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing);
}
// Loads probe probe state
float LoadDDGIProbeState(DDGIData data, Texture2D<float4> probesState, uint probeIndex)
float LoadDDGIProbeState(DDGIData data, Texture2D<float4> probesState, uint cascadeIndex, uint probeIndex)
{
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex);
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex);
float4 probeState = probesState.Load(int3(probeDataCoords, 0));
return probeState.w;
}
// Loads probe world-space position (XYZ) and probe state (W)
float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D<float4> probesState, uint probeIndex, uint3 probeCoords)
float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D<float4> probesState, uint cascadeIndex, uint probeIndex, uint3 probeCoords)
{
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex);
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex);
float4 probeState = probesState.Load(int3(probeDataCoords, 0));
probeState.xyz += GetDDGIProbeWorldPosition(data, probeCoords);
probeState.xyz += GetDDGIProbeWorldPosition(data, cascadeIndex, probeCoords);
return probeState;
}
// Calculates texture UVs for sampling probes atlas texture (irradiance or distance)
float2 GetDDGIProbeUV(DDGIData data, uint probeIndex, float2 octahedralCoords, uint resolution)
float2 GetDDGIProbeUV(DDGIData data, uint cascadeIndex, uint probeIndex, float2 octahedralCoords, uint resolution)
{
uint2 coords = GetDDGIProbeTexelCoords(data, probeIndex);
uint2 coords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex);
float probeTexelSize = resolution + 2.0f;
float textureWidth = probeTexelSize * (data.ProbesCounts.x * data.ProbesCounts.y);
float textureHeight = probeTexelSize * data.ProbesCounts.z;
float2 textureSize = float2(data.ProbesCounts.x * data.ProbesCounts.y, data.ProbesCounts.z * data.CascadesCount) * probeTexelSize;
float2 uv = float2(coords.x * probeTexelSize, coords.y * probeTexelSize) + (probeTexelSize * 0.5f);
uv += octahedralCoords.xy * (resolution * 0.5f);
uv /= float2(textureWidth, textureHeight);
uv /= textureSize;
return uv;
}
// Samples DDGI probes volume at the given world-space position and returns the irradiance.
float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Texture2D<float4> probesDistance, Texture2D<float4> probesIrradiance, float3 worldPosition, float3 worldNormal, float bias)
// rand - randomized per-pixel value in range 0-1, used to smooth dithering for cascades blending
float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Texture2D<float4> probesDistance, Texture2D<float4> probesIrradiance, float3 worldPosition, float3 worldNormal, float bias, float dither = 0.0f)
{
float4 irradiance = float4(0, 0, 0, 0);
float3 probesOrigin = data.ProbesScrollOffsets * data.ProbesSpacing + data.ProbesOrigin;
float3 probesExtent = (data.ProbesCounts - 1) * (data.ProbesSpacing * 0.5f);
// Select the highest cascade that contains the sample location
uint cascadeIndex = 0;
for (; cascadeIndex < data.CascadesCount; cascadeIndex++)
{
float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w;
float3 probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz;
float3 probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f);
float fadeDistance = probesSpacing * 0.5f;
float cascadeWeight = saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance);
if (cascadeWeight > dither) // Use dither to make transition smoother
break;
}
if (cascadeIndex == data.CascadesCount)
return float3(0, 0, 0);
float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w;
float3 probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz;
float3 probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f);
// Bias the world-space position to reduce artifacts
float3 surfaceBias = (worldNormal * bias) + (data.ViewDir * (bias * -4.0f));
float3 biasedWorldPosition = worldPosition + surfaceBias;
// Get the grid coordinates of the probe nearest the biased world position
uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / data.ProbesSpacing), 0, data.ProbesCounts - 1);
float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, baseProbeCoords);
float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / data.ProbesSpacing);
uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), 0, data.ProbesCounts - 1);
float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, cascadeIndex, baseProbeCoords);
float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / probesSpacing);
// Loop over the closest probes to accumulate their contributions
float4 irradiance = float4(0, 0, 0, 0);
for (uint i = 0; i < 8; i++)
{
uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1;
uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, 0, data.ProbesCounts - 1);
uint probeIndex = GetDDGIScrollingProbeIndex(data, probeCoords);
uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords);
// Load probe position and state
float4 probeState = probesState.Load(int3(GetDDGIProbeTexelCoords(data, probeIndex), 0));
float4 probeState = probesState.Load(int3(GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex), 0));
if (probeState.w == DDGI_PROBE_STATE_INACTIVE)
continue;
float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * data.ProbesSpacing);
float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * probesSpacing);
float3 probePosition = probeBasePosition + probeState.xyz;
// Calculate the distance and direction from the (biased and non-biased) shading point and the probe
@@ -156,7 +173,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Textur
// Sample distance texture
float2 octahedralCoords = GetOctahedralCoords(-biasedPosToProbe);
float2 uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE);
float2 uv = GetDDGIProbeUV(data, cascadeIndex, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE);
float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f;
float probeDistanceMean = probeDistance.x;
float probeDistanceMean2 = probeDistance.y;
@@ -183,7 +200,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Textur
// Sample irradiance texture
octahedralCoords = GetOctahedralCoords(worldNormal);
uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_IRRADIANCE);
uv = GetDDGIProbeUV(data, cascadeIndex, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_IRRADIANCE);
float3 probeIrradiance = probesIrradiance.SampleLevel(SamplerLinearClamp, uv, 0).rgb;
#if DDGI_SRGB_BLENDING
probeIrradiance = pow(probeIrradiance, data.IrradianceGamma * 0.5f);
@@ -196,6 +213,18 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Textur
irradiance += float4(probeIrradiance * weight, weight);
}
#if 0
// Debug DDGI cascades with colors
if (cascadeIndex == 0)
irradiance = float4(1, 0, 0, 1);
else if (cascadeIndex == 1)
irradiance = float4(0, 1, 0, 1);
else if (cascadeIndex == 2)
irradiance = float4(0, 0, 1, 1);
else
irradiance = float4(1, 0, 1, 1);
#endif
if (irradiance.a > 0.0f)
{
// Normalize irradiance
@@ -204,10 +233,6 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Textur
irradiance.rgb *= irradiance.rgb;
#endif
irradiance.rgb *= 2.0f * PI;
// Fade-out outside the probes volume
float fadeDistance = data.ProbesSpacing * 0.5f;
irradiance.rgb *= saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance);
}
return irradiance.rgb;
}

View File

@@ -22,14 +22,20 @@
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
META_CB_BEGIN(0, Data)
META_CB_BEGIN(0, Data0)
DDGIData DDGI;
GlobalSDFData GlobalSDF;
GlobalSurfaceAtlasData GlobalSurfaceAtlas;
GBufferData GBuffer;
float2 Padding0;
float ResetBlend;
float TemporalTime;
float IndirectLightingIntensity;
float2 Padding0;
META_CB_END
META_CB_BEGIN(1, Data1)
float3 Padding1;
uint CascadeIndex;
META_CB_END
// Calculates the evenly distributed direction ray on a sphere (Spherical Fibonacci lattice)
@@ -66,22 +72,24 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
if (probeIndex >= probesCount)
return;
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords);
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex);
float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w;
// Load probe state and position
float4 probeState = RWProbesState[probeDataCoords];
float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, probeCoords);
float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords);
float3 probePosition = probeBasePosition + probeState.xyz;
probeState.w = DDGI_PROBE_STATE_ACTIVE;
// Use Global SDF to quickly get distance and direction to the scene geometry
float sdf;
float3 sdfNormal = normalize(SampleGlobalSDFGradient(GlobalSDF, GlobalSDFTex, probePosition.xyz, sdf));
float threshold = GlobalSDF.CascadeVoxelSize[0] * 0.5f;
float distanceLimit = length(DDGI.ProbesSpacing) * 2.0f;
float relocateLimit = length(DDGI.ProbesSpacing) * 0.6f;
if (abs(sdf) > distanceLimit) // Probe is too far from geometry
float sdfDst = abs(sdf);
float threshold = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.5f;
float distanceLimit = length(probesSpacing) * 2.0f;
float relocateLimit = length(probesSpacing) * 0.6f;
if (sdfDst > distanceLimit) // Probe is too far from geometry
{
// Disable it
probeState = float4(0, 0, 0, DDGI_PROBE_STATE_INACTIVE);
@@ -90,9 +98,9 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
{
if (sdf < threshold) // Probe is inside geometry
{
if (abs(sdf) < relocateLimit)
if (sdfDst < relocateLimit)
{
float3 offsetToAdd = sdfNormal * sdf;
float3 offsetToAdd = sdfNormal * (sdf + threshold);
if (distance(probeState.xyz, offsetToAdd) < relocateLimit)
{
// Relocate it
@@ -105,7 +113,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
probeState.xyz = float3(0, 0, 0);
}
}
else if (sdf > threshold * 2.0f) // Probe is far enough any geometry
else if (sdf > threshold * 4.0f) // Probe is far enough any geometry
{
// Reset relocation
probeState.xyz = float3(0, 0, 0);
@@ -146,10 +154,10 @@ void CS_TraceRays(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Dispat
uint rayIndex = DispatchThreadId.x;
uint probeIndex = DispatchThreadId.y;
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
// Load current probe state and position
float4 probePositionAndState = LoadDDGIProbePositionAndState(DDGI, ProbesState, probeIndex, probeCoords);
float4 probePositionAndState = LoadDDGIProbePositionAndState(DDGI, ProbesState, CascadeIndex, probeIndex, probeCoords);
if (probePositionAndState.w == DDGI_PROBE_STATE_INACTIVE)
return; // Skip disabled probes
float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex);
@@ -222,16 +230,20 @@ void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupInd
uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z;
bool skip = probeIndex >= probesCount;
uint2 outputCoords = uint2(1, 1) + DispatchThreadId.xy + (DispatchThreadId.xy / DDGI_PROBE_RESOLUTION) * 2;
outputCoords.y += CascadeIndex * DDGI.ProbesCounts.z * (DDGI_PROBE_RESOLUTION + 2);
// Clear probes that have been scrolled to a new positions (blending with current irradiance will happen the next frame)
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
int3 probesScrollOffsets = DDGI.ProbesScrollOffsets[CascadeIndex].xyz;
int probeScrollClear = DDGI.ProbesScrollOffsets[CascadeIndex].w;
int3 probeScrollDirections = DDGI.ProbeScrollDirections[CascadeIndex].xyz;
UNROLL
for (uint planeIndex = 0; planeIndex < 3; planeIndex++)
{
if (DDGI.ProbeScrollClear[planeIndex])
if (probeScrollClear & (1 << planeIndex) && !skip)
{
int scrollOffset = DDGI.ProbesScrollOffsets[planeIndex];
int scrollDirection = DDGI.ProbeScrollDirections[planeIndex];
int scrollOffset = probesScrollOffsets[planeIndex];
int scrollDirection = probeScrollDirections[planeIndex];
uint probeCount = DDGI.ProbesCounts[planeIndex];
uint coord = (probeCount + (scrollDirection ? (scrollOffset - 1) : (scrollOffset % probeCount))) % probeCount;
if (probeCoords[planeIndex] == coord)
@@ -244,7 +256,7 @@ void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupInd
}
// Skip disabled probes
float probeState = LoadDDGIProbeState(DDGI, ProbesState, probeIndex);
float probeState = LoadDDGIProbeState(DDGI, ProbesState, CascadeIndex, probeIndex);
if (probeState == DDGI_PROBE_STATE_INACTIVE)
skip = true;
@@ -275,7 +287,8 @@ void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupInd
uint backfacesCount = 0;
uint backfacesLimit = uint(DDGI.RaysCount * 0.1f);
#else
float distanceLimit = length(DDGI.ProbesSpacing) * 1.5f;
float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w;
float distanceLimit = length(probesSpacing) * 1.5f;
#endif
LOOP
for (uint rayIndex = 0; rayIndex < DDGI.RaysCount; rayIndex++)
@@ -420,6 +433,7 @@ void CS_UpdateBorders(uint3 DispatchThreadId : SV_DispatchThreadID)
#ifdef _PS_IndirectLighting
#include "./Flax/GBuffer.hlsl"
#include "./Flax/Random.hlsl"
#include "./Flax/LightingCommon.hlsl"
Texture2D<float4> ProbesState : register(t4);
@@ -445,8 +459,9 @@ void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0)
// Sample irradiance
float bias = 1.0f;
float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias);
float dither = RandN2(input.TexCoord + TemporalTime).x;
float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias, dither);
// Calculate lighting
float3 diffuseColor = GetDiffuseColor(gBuffer);
float3 diffuse = Diffuse_Lambert(diffuseColor);

View File

@@ -209,5 +209,5 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4]
float GetGlobalSurfaceAtlasThreshold(GlobalSDFHit hit)
{
// Scale the threshold based on the hit cascade (less precision)
return hit.HitCascade * 10.0f + 20.0f;
return hit.HitCascade * 20.0f + 25.0f;
}

View File

@@ -9,6 +9,18 @@ float PseudoRandom(float2 xy)
return frac(dot(p.xyx * p.xyy, float3(20.390625f, 60.703125f, 2.4281209f)));
}
// Generic noise (1-component)
float RandN1(float n)
{
return frac(sin(n) * 43758.5453123);
}
// Generic noise (2-components)
float2 RandN2(float2 n)
{
return frac(sin(dot(n, float2(12.9898, 78.233))) * float2(43758.5453123, 28001.8384));
}
void FindBestAxisVectors(float3 input, out float3 axis1, out float3 axis2)
{
const float3 a = abs(input);

View File

@@ -6,16 +6,6 @@
#include "./Flax/MonteCarlo.hlsl"
#include "./Flax/GBufferCommon.hlsl"
float max2(float2 v)
{
return max(v.x, v.y);
}
float2 RandN2(float2 pos, float2 random)
{
return frac(sin(dot(pos.xy + random, float2(12.9898, 78.233))) * float2(43758.5453, 28001.8384));
}
// 1:-1 to 0:1
float2 ClipToUv(float2 clipPos)
{
@@ -62,7 +52,7 @@ float3 TraceSceenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D dep
float3 normalVS = mul(gBuffer.Normal, (float3x3)viewMatrix);
// Randomize it a little
float2 jitter = RandN2(uv, temporalTime);
float2 jitter = RandN2(uv + temporalTime);
float2 Xi = jitter;
Xi.y = lerp(Xi.y, 0.0, brdfBias);
float3 H = temporal ? TangentToWorld(gBuffer.Normal, ImportanceSampleGGX(Xi, gBuffer.Roughness)) : gBuffer.Normal;
@@ -80,7 +70,8 @@ float3 TraceSceenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D dep
float3 endUV = ProjectWorldToUv(startWS + reflectWS, viewProjectionMatrix);
float3 rayUV = endUV - startUV;
rayUV *= stepSize / max2(abs(rayUV.xy));
float2 rayUVAbs = abs(rayUV.xy);
rayUV *= stepSize / max(rayUVAbs.x, rayUVAbs.y);
float3 startUv = startUV + rayUV * 2;
float3 currOffset = startUv;

View File

@@ -138,7 +138,7 @@ float4 PS_ResolvePass(Quad_VS2PS input) : SV_Target0
float3 viewVector = normalize(gBufferData.ViewPos - gBuffer.WorldPos);
// Randomize it a little
float2 random = RandN2(uv, TemporalTime);
float2 random = RandN2(uv + TemporalTime);
float2 blueNoise = random.xy * 2.0 - 1.0;
float2x2 offsetRotationMatrix = float2x2(blueNoise.x, blueNoise.y, -blueNoise.y, blueNoise.x);