Initial DDGI implementation

This commit is contained in:
Wojciech Figat
2022-05-23 10:15:02 +02:00
parent 1a64df9116
commit 375222a089
14 changed files with 1306 additions and 9 deletions

BIN
Content/Editor/Camera/O_Camera.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Editor/DebugMaterials/DDGIDebugProbes.flax (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Content/Engine/Models/SphereLowPoly.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Shaders/GI/DDGI.flax (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Content/Shaders/GI/GlobalSurfaceAtlas.flax (Stored with Git LFS)

Binary file not shown.

View File

@@ -70,6 +70,7 @@ bool DeployDataStep::Perform(CookingData& data)
data.AddRootEngineAsset(TEXT("Shaders/GPUParticlesSorting")); data.AddRootEngineAsset(TEXT("Shaders/GPUParticlesSorting"));
data.AddRootEngineAsset(TEXT("Shaders/GlobalSignDistanceField")); data.AddRootEngineAsset(TEXT("Shaders/GlobalSignDistanceField"));
data.AddRootEngineAsset(TEXT("Shaders/GI/GlobalSurfaceAtlas")); data.AddRootEngineAsset(TEXT("Shaders/GI/GlobalSurfaceAtlas"));
data.AddRootEngineAsset(TEXT("Shaders/GI/DDGI"));
data.AddRootEngineAsset(TEXT("Shaders/Quad")); data.AddRootEngineAsset(TEXT("Shaders/Quad"));
data.AddRootEngineAsset(TEXT("Shaders/Reflections")); data.AddRootEngineAsset(TEXT("Shaders/Reflections"));
data.AddRootEngineAsset(TEXT("Shaders/Shadows")); data.AddRootEngineAsset(TEXT("Shaders/Shadows"));

View File

@@ -0,0 +1,528 @@
// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved.
#include "DynamicDiffuseGlobalIllumination.h"
#include "GlobalSurfaceAtlasPass.h"
#include "../GlobalSignDistanceFieldPass.h"
#include "../RenderList.h"
#include "Engine/Core/Random.h"
#include "Engine/Core/Types/Variant.h"
#include "Engine/Core/Math/Int3.h"
#include "Engine/Core/Math/Matrix3x3.h"
#include "Engine/Core/Math/Quaternion.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Content/Content.h"
#include "Engine/Debug/DebugDraw.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/RenderBuffers.h"
#include "Engine/Graphics/RenderTargetPool.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Level/Actors/BrushMode.h"
#include "Engine/Renderer/GBufferPass.h"
// Implementation based on:
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019
// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire
// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/
//
// Additional references:
// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/
// This must match HLSL
#define DDGI_TRACE_RAYS_GROUP_SIZE_X 32
#define DDGI_TRACE_RAYS_LIMIT 512 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
PACK_STRUCT(struct Data0
{
DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI;
GlobalSignDistanceFieldPass::ConstantsData GlobalSDF;
GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas;
GBufferData GBuffer;
Vector3 Padding0;
float IndirectLightingIntensity;
});
class DDGICustomBuffer : public RenderBuffers::CustomBuffer
{
public:
int32 ProbeRaysCount = 0;
float ProbesSpacing = 0.0f;
Int3 ProbeCounts = Int3::Zero;
Vector3 ProbesOrigin;
Int3 ProbeScrollOffsets;
Int3 ProbeScrollDirections;
bool ProbeScrollClear[3];
GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance)
GPUTexture* ProbesState = nullptr; // Probes state: (RGB: world-space offset, A: state)
GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color)
GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2)
DynamicDiffuseGlobalIlluminationPass::BindingData Result;
FORCE_INLINE void Clear()
{
ProbesOrigin = Vector3::Zero;
ProbeScrollOffsets = Int3::Zero;
ProbeScrollDirections = Int3::Zero;
ProbeScrollClear[0] = false;
ProbeScrollClear[1] = false;
ProbeScrollClear[2] = false;
RenderTargetPool::Release(ProbesTrace);
RenderTargetPool::Release(ProbesState);
RenderTargetPool::Release(ProbesIrradiance);
RenderTargetPool::Release(ProbesDistance);
}
~DDGICustomBuffer()
{
Clear();
}
};
void CalculateVolumeRandomRotation(Matrix3x3& matrix)
{
// Reference: James Arvo's algorithm Graphics Gems 3 (pages 117-120)
// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.53.1357&rep=rep1&type=pdf
float u1 = TWO_PI * Random::Rand();
float cos1 = Math::Cos(u1);
float sin1 = Math::Sin(u1);
float u2 = TWO_PI * Random::Rand();
float cos2 = Math::Cos(u2);
float sin2 = Math::Sin(u2);
float u3 = Random::Rand();
float sq3 = 2.0f * sqrtf(u3 * (1.0f - u3));
float s2 = 2.0f * u3 * sin2 * sin2 - 1.0f;
float c2 = 2.0f * u3 * cos2 * cos2 - 1.0f;
float sc = 2.0f * u3 * sin2 * cos2;
matrix.M11 = cos1 * c2 - sin1 * sc;
matrix.M12 = sin1 * c2 + cos1 * sc;
matrix.M13 = sq3 * cos2;
matrix.M21 = cos1 * sc - sin1 * s2;
matrix.M22 = sin1 * sc + cos1 * s2;
matrix.M23 = sq3 * sin2;
matrix.M31 = cos1 * (sq3 * cos2) - sin1 * (sq3 * sin2);
matrix.M32 = sin1 * (sq3 * cos2) + cos1 * (sq3 * sin2);
matrix.M33 = 1.0f - 2.0f * u3;
}
int32 AbsFloor(const float value)
{
return value >= 0.0f ? (int32)Math::Floor(value) : (int32)Math::Ceil(value);
}
int32 GetSignNotZero(const float value)
{
return value >= 0.0f ? 1 : -1;
}
Vector3 GetVolumeOrigin(DDGICustomBuffer& ddgiData)
{
return ddgiData.ProbesOrigin + Vector3(ddgiData.ProbeScrollOffsets) * ddgiData.ProbesSpacing;
}
void CalculateVolumeScrolling(DDGICustomBuffer& ddgiData, const Vector3& viewOrigin)
{
// Reset the volume origin and scroll offsets for each axis
for (int32 axis = 0; axis < 3; axis++)
{
if (ddgiData.ProbeScrollOffsets.Raw[axis] != 0 && (ddgiData.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0))
{
ddgiData.ProbesOrigin.Raw[axis] += (float)ddgiData.ProbeCounts.Raw[axis] * ddgiData.ProbesSpacing * (float)ddgiData.ProbeScrollDirections.Raw[axis];
ddgiData.ProbeScrollOffsets.Raw[axis] = 0;
}
}
// Calculate the count of grid cells between the view origin and the scroll anchor
const Vector3 translation = viewOrigin - GetVolumeOrigin(ddgiData);
for (int32 axis = 0; axis < 3; axis++)
{
const int32 scroll = AbsFloor(translation.Raw[axis] / ddgiData.ProbesSpacing);
ddgiData.ProbeScrollOffsets.Raw[axis] += scroll;
ddgiData.ProbeScrollClear[axis] = scroll != 0;
ddgiData.ProbeScrollDirections.Raw[axis] = GetSignNotZero(translation.Raw[axis]);
}
}
String DynamicDiffuseGlobalIlluminationPass::ToString() const
{
return TEXT("DynamicDiffuseGlobalIlluminationPass");
}
bool DynamicDiffuseGlobalIlluminationPass::Init()
{
// Check platform support
const auto device = GPUDevice::Instance;
_supported = device->GetFeatureLevel() >= FeatureLevel::SM5 && device->Limits.HasCompute && device->Limits.HasTypedUAVLoad;
return false;
}
bool DynamicDiffuseGlobalIlluminationPass::setupResources()
{
if (!_supported)
return true;
// Load shader
if (!_shader)
{
_shader = Content::LoadAsyncInternal<Shader>(TEXT("Shaders/GI/DDGI"));
if (_shader == nullptr)
return true;
#if COMPILE_WITH_DEV_ENV
_shader.Get()->OnReloading.Bind<DynamicDiffuseGlobalIlluminationPass, &DynamicDiffuseGlobalIlluminationPass::OnShaderReloading>(this);
#endif
}
if (!_shader->IsLoaded())
return true;
// Initialize resources
const auto shader = _shader->GetShader();
_cb0 = shader->GetCB(0);
if (!_cb0)
return true;
_csClassify = shader->GetCS("CS_Classify");
_csTraceRays = shader->GetCS("CS_TraceRays");
_csUpdateProbesIrradiance = shader->GetCS("CS_UpdateProbes", 0);
_csUpdateProbesDistance = shader->GetCS("CS_UpdateProbes", 1);
_csUpdateBordersIrradianceRow = shader->GetCS("CS_UpdateBorders", 0);
_csUpdateBordersIrradianceCollumn = shader->GetCS("CS_UpdateBorders", 1);
_csUpdateBordersDistanceRow = shader->GetCS("CS_UpdateBorders", 2);
_csUpdateBordersDistanceCollumn = shader->GetCS("CS_UpdateBorders", 3);
auto device = GPUDevice::Instance;
auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle;
if (!_psIndirectLighting)
{
_psIndirectLighting = device->CreatePipelineState();
psDesc.PS = shader->GetPS("PS_IndirectLighting");
psDesc.BlendMode = BlendingMode::Additive;
if (_psIndirectLighting->Init(psDesc))
return true;
}
return false;
}
#if COMPILE_WITH_DEV_ENV
void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj)
{
LastFrameShaderReload = Engine::FrameCount;
_csClassify = nullptr;
_csTraceRays = nullptr;
_csUpdateProbesIrradiance = nullptr;
_csUpdateProbesDistance = nullptr;
_csUpdateBordersIrradianceRow = nullptr;
_csUpdateBordersIrradianceCollumn = nullptr;
_csUpdateBordersDistanceRow = nullptr;
_csUpdateBordersDistanceCollumn = nullptr;
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
invalidateResources();
}
#endif
void DynamicDiffuseGlobalIlluminationPass::Dispose()
{
RendererPass::Dispose();
// Cleanup
_cb0 = nullptr;
_csTraceRays = nullptr;
_shader = nullptr;
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
#if USE_EDITOR
_debugModel = nullptr;
_debugMaterial = nullptr;
#endif
}
bool DynamicDiffuseGlobalIlluminationPass::Get(const RenderBuffers* buffers, BindingData& result)
{
auto* ddgiData = buffers ? buffers->FindCustomBuffer<DDGICustomBuffer>(TEXT("DDGI")) : nullptr;
if (ddgiData && ddgiData->LastFrameUsed + 1 >= Engine::FrameCount) // Allow to use data from the previous frame (eg. particles in Editor using the Editor viewport in Game viewport - Game render task runs first)
{
result = ddgiData->Result;
return false;
}
return true;
}
bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, GPUContext* context, GPUTextureView* lightBuffer)
{
// Skip if not supported
if (checkIfSkipPass())
return true;
if (renderContext.List->Scenes.Count() == 0)
return true;
auto& ddgiData = *renderContext.Buffers->GetCustomBuffer<DDGICustomBuffer>(TEXT("DDGI"));
// Render Global SDF and Global Surface Atlas for software raytracing
GlobalSignDistanceFieldPass::BindingData bindingDataSDF;
if (GlobalSignDistanceFieldPass::Instance()->Render(renderContext, context, bindingDataSDF))
return true;
GlobalSurfaceAtlasPass::BindingData bindingDataSurfaceAtlas;
if (GlobalSurfaceAtlasPass::Instance()->Render(renderContext, context, bindingDataSurfaceAtlas))
return true;
// Skip if already done in the current frame
const auto currentFrame = Engine::FrameCount;
if (ddgiData.LastFrameUsed == currentFrame)
return false;
ddgiData.LastFrameUsed = currentFrame;
PROFILE_GPU_CPU("Dynamic Diffuse Global Illumination");
// TODO: configurable via graphics settings
const Quality quality = Quality::Ultra;
bool debugProbes = true; // TODO: add debug option to draw probes locations -> in Graphics window - Editor-only
// TODO: configurable via postFx settings (maybe use Global SDF distance?)
const float indirectLightingIntensity = 1.0f;
const Vector3 giDistance(2000, 2000, 2000); // GI distance around the view (in each direction)
const float giResolution = 100.0f; // GI probes placement spacing
const Int3 probesCounts(Vector3::Ceil(giDistance / giResolution));
const Vector3 probesDistance = Vector3(probesCounts) * giResolution;
const int32 probeRaysCount = Math::Min(Math::AlignUp(256, DDGI_TRACE_RAYS_GROUP_SIZE_X), DDGI_TRACE_RAYS_LIMIT); // TODO: make it based on the GI Quality
const float probeHistoryWeight = 0.97f;
// Init buffers
const int32 probesCount = probesCounts.X * probesCounts.Y * probesCounts.Z;
if (probesCount == 0 || indirectLightingIntensity <= ZeroTolerance)
return true;
int32 probesCountX = probesCounts.X * probesCounts.Y;
int32 probesCountY = probesCounts.Z;
bool clear = false;
if (Math::NotNearEqual(ddgiData.ProbesSpacing, giResolution) || ddgiData.ProbeCounts != probesCounts || ddgiData.ProbeRaysCount != probeRaysCount)
{
PROFILE_CPU_NAMED("Init");
ddgiData.Clear();
ddgiData.ProbeRaysCount = probeRaysCount;
ddgiData.ProbesSpacing = giResolution;
ddgiData.ProbeCounts = probesCounts;
// Allocate probes textures
uint64 memUsage = 0;
auto desc = GPUTextureDescription::New2D(probesCountX, probesCountY, PixelFormat::Unknown);
// TODO rethink probes data placement in memory -> what if we get [50x50x30] resolution? That's 75000 probes! Use sparse storage with active-only probes
#define INIT_TEXTURE(texture, format, width, height) desc.Format = format; desc.Width = width; desc.Height = height; ddgiData.texture = RenderTargetPool::Get(desc); if (!ddgiData.texture) return true; memUsage += ddgiData.texture->GetMemoryUsage()
desc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess;
INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, probesCount);
INIT_TEXTURE(ProbesState, PixelFormat::R16G16B16A16_Float, probesCountX, probesCountY); // TODO: optimize to a RGBA32 (pos offset can be normalized to [0-0.5] range of ProbesSpacing and packed with state flag)
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
#undef INIT_TEXTURE
LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCount);
clear = true;
}
#if USE_EDITOR
clear |= ddgiData.LastFrameUsed <= LastFrameShaderReload;
#endif
if (clear)
{
// Clear probes
PROFILE_GPU("Clear");
context->ClearUA(ddgiData.ProbesState, Vector4::Zero);
context->ClearUA(ddgiData.ProbesIrradiance, Vector4::Zero);
context->ClearUA(ddgiData.ProbesDistance, Vector4::Zero);
}
// Compute random rotation matrix for probe rays orientation (randomized every frame)
Matrix3x3 raysRotationMatrix;
CalculateVolumeRandomRotation(raysRotationMatrix);
// Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement)
Vector3 viewOrigin = renderContext.View.Position;
Vector3 viewDirection = renderContext.View.Direction;
const float probesDistanceMax = probesDistance.MaxValue();
const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance);
const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.8f;
viewOrigin += viewDirection * viewOriginOffset;
const float viewOriginSnapping = giResolution;
viewOrigin = Vector3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping;
CalculateVolumeScrolling(ddgiData, viewOrigin);
// Upload constants
{
ddgiData.Result.Constants.ProbesOrigin = ddgiData.ProbesOrigin;
ddgiData.Result.Constants.ProbesSpacing = ddgiData.ProbesSpacing;
Quaternion& raysRotation = *(Quaternion*)&ddgiData.Result.Constants.RaysRotation;
Quaternion::RotationMatrix(raysRotationMatrix, raysRotation);
raysRotation.Conjugate();
ddgiData.Result.Constants.ProbesCounts[0] = probesCounts.X;
ddgiData.Result.Constants.ProbesCounts[1] = probesCounts.Y;
ddgiData.Result.Constants.ProbesCounts[2] = probesCounts.Z;
ddgiData.Result.Constants.ProbesScrollOffsets = ddgiData.ProbeScrollOffsets;
ddgiData.Result.Constants.ProbeScrollDirections = ddgiData.ProbeScrollDirections;
ddgiData.Result.Constants.ProbeScrollClear[0] = ddgiData.ProbeScrollClear[0] != 0;
ddgiData.Result.Constants.ProbeScrollClear[1] = ddgiData.ProbeScrollClear[1] != 0;
ddgiData.Result.Constants.ProbeScrollClear[2] = ddgiData.ProbeScrollClear[2] != 0;
ddgiData.Result.Constants.RayMaxDistance = 10000.0f; // TODO: adjust to match perf/quality ratio (make it based on Global SDF and Global Surface Atlas distance)
ddgiData.Result.Constants.ViewDir = viewDirection;
ddgiData.Result.Constants.RaysCount = probeRaysCount;
ddgiData.Result.Constants.ProbeHistoryWeight = probeHistoryWeight;
ddgiData.Result.Constants.IrradianceGamma = 5.0f;
ddgiData.Result.ProbesState = ddgiData.ProbesState->View();
ddgiData.Result.ProbesDistance = ddgiData.ProbesDistance->View();
ddgiData.Result.ProbesIrradiance = ddgiData.ProbesIrradiance->View();
Data0 data;
data.DDGI = ddgiData.Result.Constants;
data.GlobalSDF = bindingDataSDF.Constants;
data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants;
data.IndirectLightingIntensity = indirectLightingIntensity;
GBufferPass::SetInputs(renderContext.View, data.GBuffer);
context->UpdateCB(_cb0, &data);
context->BindCB(0, _cb0);
}
// Classify probes (activation/deactivation and relocation)
{
PROFILE_GPU_CPU("Probes Classification");
uint32 threadGroups = Math::DivideAndRoundUp(probesCount, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
for (int32 i = 0; i < 4; i++)
{
context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume());
}
context->BindUA(0, ddgiData.Result.ProbesState);
context->Dispatch(_csClassify, threadGroups, 1, 1);
context->ResetUA();
}
// Trace rays from probes
{
PROFILE_GPU_CPU("Trace Rays");
// Global SDF with Global Surface Atlas software raytracing (X - per probe ray, Y - per probe)
ASSERT_LOW_LAYER((probeRaysCount % DDGI_TRACE_RAYS_GROUP_SIZE_X) == 0);
for (int32 i = 0; i < 4; i++)
{
context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume());
context->BindSR(i + 4, bindingDataSDF.CascadeMips[i]->ViewVolume());
}
context->BindSR(8, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(9, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(10, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(11, bindingDataSurfaceAtlas.AtlasLighting->View());
context->BindSR(12, ddgiData.Result.ProbesState);
context->BindUA(0, ddgiData.ProbesTrace->View());
context->Dispatch(_csTraceRays, probeRaysCount / DDGI_TRACE_RAYS_GROUP_SIZE_X, probesCount, 1);
context->ResetUA();
context->ResetSR();
#if 0
// Probes trace debug preview
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
context->SetRenderTarget(lightBuffer);
context->Draw(ddgiData.ProbesTrace);
return false;
#endif
}
// Update probes
{
PROFILE_GPU_CPU("Update Probes");
context->BindSR(0, ddgiData.Result.ProbesState);
context->BindSR(1, ddgiData.ProbesTrace->View());
// Update irradiance
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
context->Dispatch(_csUpdateProbesIrradiance, probesCountX, probesCountY, 1);
uint32 threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
uint32 threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1);
// Update distance
context->BindUA(0, ddgiData.Result.ProbesDistance);
context->Dispatch(_csUpdateProbesDistance, probesCountX, probesCountY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1);
}
// Render indirect lighting
{
PROFILE_GPU_CPU("Indirect Lighting");
#if 0
// DDGI indirect lighting debug preview
context->Clear(lightBuffer, Color::Transparent);
#endif
context->ResetUA();
context->BindSR(0, renderContext.Buffers->GBuffer0->View());
context->BindSR(1, renderContext.Buffers->GBuffer1->View());
context->BindSR(2, renderContext.Buffers->GBuffer2->View());
context->BindSR(3, renderContext.Buffers->DepthBuffer->View());
context->BindSR(4, ddgiData.Result.ProbesState);
context->BindSR(5, ddgiData.Result.ProbesDistance);
context->BindSR(6, ddgiData.Result.ProbesIrradiance);
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
context->SetRenderTarget(lightBuffer);
context->SetState(_psIndirectLighting);
context->DrawFullscreenTriangle();
}
#if USE_EDITOR
// Probes debug drawing
if (debugProbes)
{
PROFILE_GPU_CPU("Debug Probes");
if (!_debugModel)
_debugModel = Content::LoadAsyncInternal<Model>(TEXT("Editor/Primitives/Sphere"));
if (!_debugMaterial)
_debugMaterial = Content::LoadAsyncInternal<MaterialBase>(TEXT("Editor/DebugMaterials/DDGIDebugProbes"));
if (_debugModel && _debugModel->IsLoaded() && _debugModel->CanBeRendered() && _debugMaterial && _debugMaterial->IsLoaded())
{
RenderContext debugRenderContext(renderContext);
debugRenderContext.List = RenderList::GetFromPool();
debugRenderContext.View.Pass = DrawPass::GBuffer;
debugRenderContext.View.Prepare(debugRenderContext);
Matrix world;
Matrix::Scaling(Vector3(0.2f), world);
const Mesh& debugMesh = _debugModel->LODs[0].Meshes[0];
for (int32 probeIndex = 0; probeIndex < probesCount; probeIndex++)
debugMesh.Draw(debugRenderContext, _debugMaterial, world, StaticFlags::None, true, DrawPass::GBuffer, (float)probeIndex);
debugRenderContext.List->SortDrawCalls(debugRenderContext, false, DrawCallsListType::GBuffer);
context->SetViewportAndScissors(debugRenderContext.View.ScreenSize.X, debugRenderContext.View.ScreenSize.Y);
GPUTextureView* targetBuffers[5] =
{
lightBuffer,
renderContext.Buffers->GBuffer0->View(),
renderContext.Buffers->GBuffer1->View(),
renderContext.Buffers->GBuffer2->View(),
renderContext.Buffers->GBuffer3->View(),
};
context->SetRenderTarget(*renderContext.Buffers->DepthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
{
// Pass DDGI data to the material
_debugMaterial->SetParameterValue(TEXT("ProbesState"), Variant(ddgiData.ProbesState));
_debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance));
_debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance));
auto cb = _debugMaterial->GetShader()->GetCB(3);
if (cb)
{
context->UpdateCB(cb, &ddgiData.Result.Constants);
context->BindCB(3, cb);
}
}
debugRenderContext.List->ExecuteDrawCalls(debugRenderContext, DrawCallsListType::GBuffer);
RenderList::ReturnToPool(debugRenderContext.List);
context->UnBindCB(3);
context->ResetRenderTarget();
}
}
#endif
return false;
}

View File

@@ -0,0 +1,93 @@
// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved.
#pragma once
#include "../RendererPass.h"
#include "Engine/Core/Math/Int3.h"
#include "Engine/Graphics/Textures/GPUTexture.h"
/// <summary>
/// Dynamic Diffuse Global Illumination rendering pass.
/// </summary>
class FLAXENGINE_API DynamicDiffuseGlobalIlluminationPass : public RendererPass<DynamicDiffuseGlobalIlluminationPass>
{
public:
// Constant buffer data for DDGI access on a GPU.
PACK_STRUCT(struct ConstantsData
{
Vector3 ProbesOrigin;
float ProbesSpacing;
Vector4 RaysRotation;
uint32 ProbesCounts[3];
float IrradianceGamma;
Int3 ProbesScrollOffsets;
float ProbeHistoryWeight;
Vector3 ViewDir;
uint32 RaysCount;
Int3 ProbeScrollDirections;
float RayMaxDistance;
uint32 ProbeScrollClear[3];
uint32 Padding0;
});
// Binding data for the GPU.
struct BindingData
{
ConstantsData Constants;
GPUTextureView* ProbesState;
GPUTextureView* ProbesDistance;
GPUTextureView* ProbesIrradiance;
};
private:
bool _supported = false;
AssetReference<Shader> _shader;
GPUConstantBuffer* _cb0 = nullptr;
GPUShaderProgramCS* _csClassify;
GPUShaderProgramCS* _csTraceRays;
GPUShaderProgramCS* _csUpdateProbesIrradiance;
GPUShaderProgramCS* _csUpdateProbesDistance;
GPUShaderProgramCS* _csUpdateBordersIrradianceRow;
GPUShaderProgramCS* _csUpdateBordersIrradianceCollumn;
GPUShaderProgramCS* _csUpdateBordersDistanceRow;
GPUShaderProgramCS* _csUpdateBordersDistanceCollumn;
GPUPipelineState* _psIndirectLighting;
#if USE_EDITOR
AssetReference<Model> _debugModel;
AssetReference<MaterialBase> _debugMaterial;
#endif
public:
/// <summary>
/// Gets the DDGI binding data (only if enabled).
/// </summary>
/// <param name="buffers">The rendering context buffers.</param>
/// <param name="result">The result DDGI data for binding to the shaders.</param>
/// <returns>True if failed to render (platform doesn't support it, out of video memory, disabled feature or effect is not ready), otherwise false.</returns>
bool Get(const RenderBuffers* buffers, BindingData& result);
/// <summary>
/// Renders the DDGI.
/// </summary>
/// <param name="renderContext">The rendering context.</param>
/// <param name="context">The GPU context.</param>
/// <param name="lightBuffer">The light accumulation buffer (input and output).</param>
/// <returns>True if failed to render (platform doesn't support it, out of video memory, disabled feature or effect is not ready), otherwise false.</returns>
bool Render(RenderContext& renderContext, GPUContext* context, GPUTextureView* lightBuffer);
private:
#if COMPILE_WITH_DEV_ENV
uint64 LastFrameShaderReload = 0;
void OnShaderReloading(Asset* obj);
#endif
public:
// [RendererPass]
String ToString() const override;
bool Init() override;
void Dispose() override;
protected:
// [RendererPass]
bool setupResources() override;
};

View File

@@ -23,6 +23,7 @@
#include "AtmospherePreCompute.h" #include "AtmospherePreCompute.h"
#include "GlobalSignDistanceFieldPass.h" #include "GlobalSignDistanceFieldPass.h"
#include "GI/GlobalSurfaceAtlasPass.h" #include "GI/GlobalSurfaceAtlasPass.h"
#include "GI/DynamicDiffuseGlobalIllumination.h"
#include "Utils/MultiScaler.h" #include "Utils/MultiScaler.h"
#include "Utils/BitonicSort.h" #include "Utils/BitonicSort.h"
#include "AntiAliasing/FXAA.h" #include "AntiAliasing/FXAA.h"
@@ -85,6 +86,7 @@ bool RendererService::Init()
PassList.Add(HistogramPass::Instance()); PassList.Add(HistogramPass::Instance());
PassList.Add(GlobalSignDistanceFieldPass::Instance()); PassList.Add(GlobalSignDistanceFieldPass::Instance());
PassList.Add(GlobalSurfaceAtlasPass::Instance()); PassList.Add(GlobalSurfaceAtlasPass::Instance());
PassList.Add(DynamicDiffuseGlobalIlluminationPass::Instance());
#if USE_EDITOR #if USE_EDITOR
PassList.Add(QuadOverdrawPass::Instance()); PassList.Add(QuadOverdrawPass::Instance());
#endif #endif
@@ -397,6 +399,11 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext)
// Render lighting // Render lighting
LightPass::Instance()->RenderLight(renderContext, *lightBuffer); LightPass::Instance()->RenderLight(renderContext, *lightBuffer);
if (renderContext.View.Flags & ViewFlags::GI)
{
// TODO: add option to PostFx Volume for realtime GI type (None, DDGI)
DynamicDiffuseGlobalIlluminationPass::Instance()->Render(renderContext, context, *lightBuffer);
}
if (renderContext.View.Mode == ViewMode::LightBuffer) if (renderContext.View.Mode == ViewMode::LightBuffer)
{ {
auto colorGradingLUT = ColorGradingPass::Instance()->RenderLUT(renderContext); auto colorGradingLUT = ColorGradingPass::Instance()->RenderLUT(renderContext);
@@ -499,7 +506,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext)
context->ResetRenderTarget(); context->ResetRenderTarget();
context->ResetSR(); context->ResetSR();
context->FlushState(); context->FlushState();
// Custom Post Processing // Custom Post Processing
renderContext.List->RunMaterialPostFxPass(context, renderContext, MaterialPostFxLocation::AfterPostProcessingPass, frameBuffer, tempBuffer); renderContext.List->RunMaterialPostFxPass(context, renderContext, MaterialPostFxLocation::AfterPostProcessingPass, frameBuffer, tempBuffer);
renderContext.List->RunCustomPostFxPass(context, renderContext, PostProcessEffectLocation::Default, frameBuffer, tempBuffer); renderContext.List->RunCustomPostFxPass(context, renderContext, PostProcessEffectLocation::Default, frameBuffer, tempBuffer);

214
Source/Shaders/GI/DDGI.hlsl Normal file
View File

@@ -0,0 +1,214 @@
// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved.
// Implementation based on:
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019
// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire
// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/
//
// Additional references:
// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/
#include "./Flax/Common.hlsl"
#include "./Flax/Math.hlsl"
#include "./Flax/Octahedral.hlsl"
#define DDGI_PROBE_STATE_ACTIVE 0
#define DDGI_PROBE_STATE_INACTIVE 1
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space
// DDGI data for a constant buffer
struct DDGIData
{
float3 ProbesOrigin;
float ProbesSpacing;
float4 RaysRotation;
uint3 ProbesCounts;
float IrradianceGamma;
int3 ProbesScrollOffsets;
float ProbeHistoryWeight;
float3 ViewDir;
uint RaysCount;
int3 ProbeScrollDirections;
float RayMaxDistance;
uint3 ProbeScrollClear; // TODO: pack into bits
uint Padding0;
};
uint GetDDGIProbeIndex(DDGIData data, uint3 probeCoords)
{
uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z;
uint planeIndex = probeCoords.y;
uint probeIndexInPlane = probeCoords.x + (data.ProbesCounts.x * probeCoords.z);
return planeIndex * probesPerPlane + probeIndexInPlane;
}
uint GetDDGIProbeIndex(DDGIData data, uint2 texCoords, uint texResolution)
{
uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z;
uint planeIndex = texCoords.x / (data.ProbesCounts.x * texResolution);
uint probeIndexInPlane = (texCoords.x / texResolution) - (planeIndex * data.ProbesCounts.x) + (data.ProbesCounts.x * (texCoords.y / texResolution));
return planeIndex * probesPerPlane + probeIndexInPlane;
}
uint3 GetDDGIProbeCoords(DDGIData data, uint probeIndex)
{
uint3 probeCoords;
probeCoords.x = probeIndex % data.ProbesCounts.x;
probeCoords.y = probeIndex / (data.ProbesCounts.x * data.ProbesCounts.z);
probeCoords.z = (probeIndex / data.ProbesCounts.x) % data.ProbesCounts.z;
return probeCoords;
}
uint2 GetDDGIProbeTexelCoords(DDGIData data, uint probeIndex)
{
uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z;
uint planeIndex = probeIndex / probesPerPlane;
uint gridSpaceX = probeIndex % data.ProbesCounts.x;
uint gridSpaceY = probeIndex / data.ProbesCounts.x;
uint x = gridSpaceX + (planeIndex * data.ProbesCounts.x);
uint y = gridSpaceY % data.ProbesCounts.z;
return uint2(x, y);
}
uint GetDDGIScrollingProbeIndex(DDGIData data, uint3 probeCoords)
{
// Probes are scrolled on edges to stabilize GI when camera moves
return GetDDGIProbeIndex(data, (probeCoords + data.ProbesScrollOffsets + data.ProbesCounts) % data.ProbesCounts);
}
float3 GetDDGIProbeWorldPosition(DDGIData data, uint3 probeCoords)
{
float3 probePosition = probeCoords * data.ProbesSpacing;
float3 probeGridOffset = (data.ProbesSpacing * (data.ProbesCounts - 1)) * 0.5f;
return data.ProbesOrigin + probePosition - probeGridOffset + (data.ProbesScrollOffsets * data.ProbesSpacing);
}
// Loads probe probe state
float LoadDDGIProbeState(DDGIData data, Texture2D<float4> probesState, uint probeIndex)
{
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex);
float4 probeState = probesState.Load(int3(probeDataCoords, 0));
return probeState.w;
}
// Loads probe world-space position (XYZ) and probe state (W)
float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D<float4> probesState, uint probeIndex, uint3 probeCoords)
{
float4 result;
result.xyz = GetDDGIProbeWorldPosition(data, probeCoords);
// Probe state contains relocation's offset and the classification's state
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex);
float4 probeState = probesState.Load(int3(probeDataCoords, 0));
result.xyz += probeState.xyz;
result.w = probeState.w;
return result;
}
// Calculates texture UVs for sampling probes atlas texture (irradiance or distance)
float2 GetDDGIProbeUV(DDGIData data, uint probeIndex, float2 octahedralCoords, uint resolution)
{
uint2 coords = GetDDGIProbeTexelCoords(data, probeIndex);
float probeTexelSize = resolution + 2.0f;
float textureWidth = probeTexelSize * (data.ProbesCounts.x * data.ProbesCounts.y);
float textureHeight = probeTexelSize * data.ProbesCounts.z;
float2 uv = float2(coords.x * probeTexelSize, coords.y * probeTexelSize) + (probeTexelSize * 0.5f);
uv += octahedralCoords.xy * (resolution * 0.5f);
uv /= float2(textureWidth, textureHeight);
return uv;
}
// Samples DDGI probes volume at the given world-space position and returns the irradiance.
float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Texture2D<float4> probesDistance, Texture2D<float4> probesIrradiance, float3 worldPosition, float3 worldNormal, float bias)
{
float4 irradiance = float4(0, 0, 0, 0);
float3 probesOrigin = data.ProbesScrollOffsets * data.ProbesSpacing + data.ProbesOrigin;
float3 probesExtent = (data.ProbesCounts - 1) * (data.ProbesSpacing * 0.5f);
// Bias the world-space position to reduce artifacts
float3 surfaceBias = (worldNormal * bias) + (data.ViewDir * (bias * -4.0f));
float3 biasedWorldPosition = worldPosition + surfaceBias;
// Get the grid coordinates of the probe nearest the biased world position
uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / data.ProbesSpacing), 0, data.ProbesCounts - 1);
float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, baseProbeCoords);
float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / data.ProbesSpacing);
// Loop over the closest probes to accumulate their contributions
for (uint i = 0; i < 8; i++)
{
uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1;
uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, 0, data.ProbesCounts - 1);
uint probeIndex = GetDDGIScrollingProbeIndex(data, probeCoords);
// Load probe position and state
float4 probePositionAndState = LoadDDGIProbePositionAndState(data, probesState, probeIndex, probeCoords);
if (probePositionAndState.w == DDGI_PROBE_STATE_INACTIVE)
continue;
// Calculate the distance and direction from the (biased and non-biased) shading point and the probe
float3 worldPosToProbe = normalize(probePositionAndState.xyz - worldPosition);
float3 biasedPosToProbe = normalize(probePositionAndState.xyz - biasedWorldPosition);
float biasedPosToProbeDist = length(probePositionAndState.xyz - biasedWorldPosition);
// Smooth backface test
float weight = Square(dot(worldPosToProbe, worldNormal) * 0.5f + 0.5f);
// Sample distance texture
float2 octahedralCoords = GetOctahedralCoords(-biasedPosToProbe);
float2 uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE);
float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f;
float probeDistanceMean = probeDistance.x;
float probeDistanceMean2 = probeDistance.y;
// Visibility weight (Chebyshev)
if (biasedPosToProbeDist > probeDistanceMean)
{
float probeDistanceVariance = abs(Square(probeDistanceMean) - probeDistanceMean2);
float chebyshevWeight = probeDistanceVariance / (probeDistanceVariance + Square(biasedPosToProbeDist - probeDistanceMean));
weight *= max(chebyshevWeight * chebyshevWeight * chebyshevWeight, 0.05f);
}
// Avoid a weight of zero
weight = max(weight, 0.000001f);
// Adjust weight curve to inject a small portion of light
const float minWeightThreshold = 0.2f;
if (weight < minWeightThreshold)
weight *= Square(weight) * (1.0f / (minWeightThreshold * minWeightThreshold));
// Calculate trilinear weights based on the distance to each probe to smoothly transition between grid of 8 probes
float3 trilinear = lerp(1.0f - biasAlpha, biasAlpha, probeCoordsOffset);
weight *= max(trilinear.x * trilinear.y * trilinear.z, 0.001f);
// Sample irradiance texture
octahedralCoords = GetOctahedralCoords(worldNormal);
uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_IRRADIANCE);
float3 probeIrradiance = probesIrradiance.SampleLevel(SamplerLinearClamp, uv, 0).rgb;
#if DDGI_SRGB_BLENDING
probeIrradiance = pow(probeIrradiance, data.IrradianceGamma * 0.5f);
#endif
// Accumulate weighted irradiance
irradiance += float4(probeIrradiance * weight, weight);
}
if (irradiance.a > 0.0f)
{
// Normalize irradiance
irradiance.rgb *= 1.f / irradiance.a;
#if DDGI_SRGB_BLENDING
irradiance.rgb *= irradiance.rgb;
#endif
irradiance.rgb *= 2.0f * PI;
// Fade-out outside the probes volume
float fadeDistance = data.ProbesSpacing * 0.5f;
irradiance.rgb *= saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance);
}
return irradiance.rgb;
}

View File

@@ -0,0 +1,439 @@
// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved.
// Implementation based on:
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019
// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire
// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/
//
// Additional references:
// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/
#include "./Flax/Common.hlsl"
#include "./Flax/Math.hlsl"
#include "./Flax/Quaternion.hlsl"
#include "./Flax/GlobalSignDistanceField.hlsl"
#include "./Flax/GI/GlobalSurfaceAtlas.hlsl"
#include "./Flax/GI/DDGI.hlsl"
// This must match C++
#define DDGI_TRACE_RAYS_LIMIT 512 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_TRACE_RAYS_GROUP_SIZE_X 32
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
META_CB_BEGIN(0, Data)
DDGIData DDGI;
GlobalSDFData GlobalSDF;
GlobalSurfaceAtlasData GlobalSurfaceAtlas;
GBufferData GBuffer;
float3 Padding0;
float IndirectLightingIntensity;
META_CB_END
// Calculates the evenly distributed direction ray on a sphere (Spherical Fibonacci lattice)
float3 GetSphericalFibonacci(float sampleIndex, float samplesCount)
{
float b = (sqrt(5.0f) * 0.5f + 0.5f) - 1.0f;
float s = sampleIndex * b;
float phi = (2.0f * PI) * (s - floor(s));
float cosTheta = 1.0f - (2.0f * sampleIndex + 1.0f) * (1.0f / samplesCount);
float sinTheta = sqrt(saturate(1.0f - (cosTheta * cosTheta)));
return float3(cos(phi) * sinTheta, sin(phi) * sinTheta, cosTheta);
}
// Calculates a random normalized ray direction (based on the ray index and the current probes rotation phrase)
float3 GetProbeRayDirection(DDGIData data, uint rayIndex)
{
float3 direction = GetSphericalFibonacci(rayIndex, data.RaysCount);
return normalize(QuaternionRotate(data.RaysRotation, direction));
}
#ifdef _CS_Classify
RWTexture2D<float4> RWProbesState : register(u0);
Texture3D<float> GlobalSDFTex[4] : register(t0);
// Compute shader for updating probes state between active and inactive.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)]
void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint probeIndex = DispatchThreadId.x;
uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z;
if (probeIndex >= probesCount)
return;
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords);
int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, probeIndex);
// Load probe state and position
float4 probeState = RWProbesState[probeDataCoords];
float3 probePosition = GetDDGIProbeWorldPosition(DDGI, probeCoords);
// TODO: reset probe offset for scrolled probes
probePosition.xyz += probeState.xyz;
probeState.w = DDGI_PROBE_STATE_ACTIVE;
// Use Global SDF to quickly get distance and direction to the scene geometry
float sdf;
float3 sdfNormal = normalize(SampleGlobalSDFGradient(GlobalSDF, GlobalSDFTex, probePosition.xyz, sdf));
float threshold = GlobalSDF.CascadeVoxelSize[0] * 0.5f;
float distanceLimit = length(DDGI.ProbesSpacing) * 1.5f + threshold;
float relocateLimit = length(DDGI.ProbesSpacing) * 0.6f;
if (abs(sdf) > distanceLimit + threshold) // Probe is too far from geometry
{
// Disable it
probeState = float4(0, 0, 0, DDGI_PROBE_STATE_INACTIVE);
}
else if (sdf < threshold) // Probe is inside geometry
{
if (abs(sdf) < relocateLimit)
{
// Relocate it
probeState.xyz = probeState.xyz + sdfNormal * (sdf + threshold);
}
else
{
// Reset relocation
probeState.xyz = float3(0, 0, 0);
}
}
else if (sdf > relocateLimit) // Probe is far enough any geometry
{
// Reset relocation
probeState.xyz = float3(0, 0, 0);
}
RWProbesState[probeDataCoords] = probeState;
}
#endif
#ifdef _CS_TraceRays
RWTexture2D<float4> RWProbesTrace : register(u0);
Texture3D<float> GlobalSDFTex[4] : register(t0);
Texture3D<float> GlobalSDFMip[4] : register(t4);
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t8);
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t9);
Texture2D GlobalSurfaceAtlasDepth : register(t10);
Texture2D GlobalSurfaceAtlasTex : register(t11);
Texture2D<float4> ProbesState : register(t12);
// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(DDGI_TRACE_RAYS_GROUP_SIZE_X, 1, 1)]
void CS_TraceRays(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
{
uint rayIndex = DispatchThreadId.x;
uint probeIndex = DispatchThreadId.y;
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords);
// Load current probe state and position
float4 probePositionAndState = LoadDDGIProbePositionAndState(DDGI, ProbesState, probeIndex, probeCoords);
if (probePositionAndState.w == DDGI_PROBE_STATE_INACTIVE)
return; // Skip disabled probes
float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex);
// Trace ray with Global SDF
GlobalSDFTrace trace;
trace.Init(probePositionAndState.xyz, probeRayDirection, 0.0f, DDGI.RayMaxDistance);
GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace);
// Calculate radiance and distance
float4 radiance;
if (hit.IsHit())
{
if (hit.HitSDF <= 0.0f && hit.HitTime <= GlobalSDF.CascadeVoxelSize[0])
{
// Ray starts inside geometry (mark as negative distance and reduce it's influence during irradiance blending)
radiance = float4(0, 0, 0, hit.HitTime * -0.25f);
}
else
{
// Sample Global Surface Atlas to get the lighting at the hit location
float3 hitPosition = hit.GetHitPosition(trace);
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(hit);
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
radiance = float4(surfaceColor.rgb, hit.HitTime);
// Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale
radiance.w = max(radiance.w + GlobalSDF.CascadeVoxelSize[hit.HitCascade] * 0.5f, 0);
}
}
else
{
// Ray hits sky
radiance.rgb = float3(0, 0, 0); // TODO: sample sky/skybox with a fallback radiance
radiance.a = 1e27f; // Sky is the limit
}
// Write into probes trace results
RWProbesTrace[uint2(rayIndex, probeIndex)] = radiance;
}
#endif
#if defined(_CS_UpdateProbes) || defined(_CS_UpdateBorders)
#if DDGI_PROBE_UPDATE_MODE == 0
// Update irradiance
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE
#else
// Update distance
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE
#endif
groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT];
groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT];
RWTexture2D<float4> RWOutput : register(u0);
Texture2D<float4> ProbesState : register(t0);
Texture2D<float4> ProbesTrace : register(t1);
// Compute shader for updating probes irradiance or distance texture.
META_CS(true, FEATURE_LEVEL_SM5)
META_PERMUTATION_1(DDGI_PROBE_UPDATE_MODE=0)
META_PERMUTATION_1(DDGI_PROBE_UPDATE_MODE=1)
[numthreads(DDGI_PROBE_RESOLUTION, DDGI_PROBE_RESOLUTION, 1)]
void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupIndex : SV_GroupIndex)
{
// Get probe index and atlas location in the atlas
uint probeIndex = GetDDGIProbeIndex(DDGI, DispatchThreadId.xy, DDGI_PROBE_RESOLUTION);
uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z;
bool skip = probeIndex >= probesCount;
uint2 outputCoords = uint2(1, 1) + DispatchThreadId.xy + (DispatchThreadId.xy / DDGI_PROBE_RESOLUTION) * 2;
// Clear probes that have been scrolled to a new positions (blending with current irradiance will happen the next frame)
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
UNROLL
for (uint planeIndex = 0; planeIndex < 3; planeIndex++)
{
if (DDGI.ProbeScrollClear[planeIndex])
{
int scrollOffset = DDGI.ProbesScrollOffsets[planeIndex];
int scrollDirection = DDGI.ProbeScrollDirections[planeIndex];
uint probeCount = DDGI.ProbesCounts[planeIndex];
uint coord = (probeCount + (scrollDirection ? (scrollOffset - 1) : (scrollOffset % probeCount))) % probeCount;
if (probeCoords[planeIndex] == coord)
{
// Clear probe and return
//RWOutput[outputCoords] = float4(0, 0, 0, 0);
if (!skip)
RWOutput[outputCoords] = float4(0, 0, 0, 0);
skip = true;
}
}
}
// Skip disabled probes
float probeState = LoadDDGIProbeState(DDGI, ProbesState, probeIndex);
if (probeState == DDGI_PROBE_STATE_INACTIVE)
skip = true;
// Calculate octahedral projection for probe (unwraps spherical projection into a square)
float2 octahedralCoords = GetOctahedralCoords(DispatchThreadId.xy, DDGI_PROBE_RESOLUTION);
float3 octahedralDirection = GetOctahedralDirection(octahedralCoords);
// Load trace rays results into shared memory to reuse across whole thread group
uint count = (uint)(ceil((float)(DDGI_TRACE_RAYS_LIMIT) / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)));
for (uint i = 0; i < count; i++)
{
uint rayIndex = (GroupIndex * count) + i;
if (rayIndex >= DDGI.RaysCount)
break;
CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, probeIndex)];
CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex);
}
GroupMemoryBarrierWithGroupSync();
// TODO: optimize probes updating to build indirect dispatch args and probes indices list before tracing rays and blending irradiance/distance
if (skip)
{
// Clear probe
//RWOutput[outputCoords] = float4(0, 0, 0, 0);
return;
}
// Loop over rays
float4 result = float4(0, 0, 0, 0);
#if DDGI_PROBE_UPDATE_MODE == 0
uint backfacesCount = 0;
uint backfacesLimit = uint(DDGI.RaysCount * 0.1f);
#else
float distanceLimit = length(DDGI.ProbesSpacing) * 1.5f;
#endif
LOOP
for (uint rayIndex = 0; rayIndex < DDGI.RaysCount; rayIndex++)
{
float3 rayDirection = CachedProbesTraceDirection[rayIndex];
float rayWeight = max(dot(octahedralDirection, rayDirection), 0.0f);
float4 rayRadiance = CachedProbesTraceRadiance[rayIndex];
#if DDGI_PROBE_UPDATE_MODE == 0
if (rayRadiance.w < 0.0f)
{
// Count backface hits
backfacesCount++;
// Skip further blending after reaching backfaces limit
if (backfacesCount >= backfacesLimit)
return;
continue;
}
// Add radiance (RGB) and weight (A)
result += float4(rayRadiance.rgb * rayWeight, rayWeight);
#else
// Increase reaction speed for depth discontinuities
rayWeight = pow(rayWeight, 4.0f);
// Add distance (R), distance^2 (G) and weight (A)
float rayDistance = min(abs(rayRadiance.w), distanceLimit);
result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight);
#endif
}
// Normalize results
float epsilon = (float)DDGI.RaysCount * 1e-9f;
result.rgb *= 1.0f / (2.0f * max(result.a, epsilon));
// Blend current value with the previous probe data
float3 previous = RWOutput[outputCoords].rgb;
float historyWeight = DDGI.ProbeHistoryWeight;
if (dot(previous, previous) == 0)
{
// Cut any blend from zero
historyWeight = 0.0f;
}
#if DDGI_PROBE_UPDATE_MODE == 0
result *= IndirectLightingIntensity;
#if DDGI_SRGB_BLENDING
result.rgb = pow(result.rgb, 1.0f / DDGI.IrradianceGamma);
#endif
float3 irradianceDelta = result.rgb - previous.rgb;
float irradianceDeltaMax = Max3(abs(irradianceDelta));
if (irradianceDeltaMax > 0.25f)
{
// Reduce history weight after significant lighting change
historyWeight = max(historyWeight - 0.2f, 0.0f);
}
if (irradianceDeltaMax > 0.8f)
{
// Reduce flickering during rapid brightness changes
result.rgb = previous.rgb + (irradianceDelta * 0.25f);
}
float3 resultDelta = (1.0f - historyWeight) * irradianceDelta;
if (Max3(result.rgb) < Max3(previous.rgb))
resultDelta = min(max(abs(resultDelta), 1.0f / 1024.0f), abs(irradianceDelta)) * sign(resultDelta);
result = float4(previous.rgb + resultDelta, 1.0f);
#else
result = float4(lerp(result.rg, previous.rg, historyWeight), 0.0f, 1.0f);
#endif
RWOutput[outputCoords] = result;
}
// Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering)
META_CS(true, FEATURE_LEVEL_SM5)
META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=1)
META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=0)
META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=1)
META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=0)
[numthreads(DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, 1)]
void CS_UpdateBorders(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint probeSideLength = DDGI_PROBE_RESOLUTION + 2;
uint probeSideLengthMinusOne = probeSideLength - 1;
uint2 copyCoordinates = uint2(0, 0);
uint2 threadCoordinates = DispatchThreadId.xy;
#if BORDER_ROW
threadCoordinates.y *= probeSideLength;
uint corner = DispatchThreadId.x % probeSideLength;
#else
threadCoordinates.x *= probeSideLength;
uint corner = threadCoordinates.y % probeSideLength;
#endif
if (corner == 0 || corner == probeSideLengthMinusOne)
{
#if !BORDER_ROW
// Left corner
copyCoordinates.x = threadCoordinates.x + DDGI_PROBE_RESOLUTION;
copyCoordinates.y = threadCoordinates.y - sign(corner - 1) * DDGI_PROBE_RESOLUTION;
RWOutput[threadCoordinates] = RWOutput[copyCoordinates];
// Right corner
threadCoordinates.x += probeSideLengthMinusOne;
copyCoordinates.x = threadCoordinates.x - DDGI_PROBE_RESOLUTION;
RWOutput[threadCoordinates] = RWOutput[copyCoordinates];
#endif
return;
}
#if BORDER_ROW
// Top row
uint probeStart = uint(threadCoordinates.x / probeSideLength) * probeSideLength;
uint offset = probeSideLengthMinusOne - (threadCoordinates.x % probeSideLength);
copyCoordinates = uint2(probeStart + offset, threadCoordinates.y + 1);
#else
// Left column
uint probeStart = uint(threadCoordinates.y / probeSideLength) * probeSideLength;
uint offset = probeSideLengthMinusOne - (threadCoordinates.y % probeSideLength);
copyCoordinates = uint2(threadCoordinates.x + 1, probeStart + offset);
#endif
RWOutput[threadCoordinates] = RWOutput[copyCoordinates];
#if BORDER_ROW
// Bottom row
threadCoordinates.y += probeSideLengthMinusOne;
copyCoordinates = uint2(probeStart + offset, threadCoordinates.y - 1);
#else
// Right column
threadCoordinates.x += probeSideLengthMinusOne;
copyCoordinates = uint2(threadCoordinates.x - 1, probeStart + offset);
#endif
RWOutput[threadCoordinates] = RWOutput[copyCoordinates];
}
#endif
#ifdef _PS_IndirectLighting
#include "./Flax/GBuffer.hlsl"
#include "./Flax/LightingCommon.hlsl"
Texture2D<float4> ProbesState : register(t4);
Texture2D<float4> ProbesDistance : register(t5);
Texture2D<float4> ProbesIrradiance : register(t6);
// Pixel shader for drawing indirect lighting in fullscreen
META_PS(true, FEATURE_LEVEL_SM5)
void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0)
{
output = 0;
// Sample GBuffer
GBufferSample gBuffer = SampleGBuffer(GBuffer, input.TexCoord);
// Check if cannot shadow pixel
BRANCH
if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT)
{
discard;
return;
}
// Sample irradiance
float bias = 1.0f;
float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias);
// Calculate lighting
float3 diffuseColor = GetDiffuseColor(gBuffer);
float3 diffuse = Diffuse_Lambert(diffuseColor);
output = float4(diffuse * irradiance, 1);
}
#endif

View File

@@ -7,7 +7,7 @@
#define GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION 40 // Amount of chunks (in each direction) to split atlas draw distance for objects culling #define GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION 40 // Amount of chunks (in each direction) to split atlas draw distance for objects culling
#define GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE 4 #define GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE 4
#define GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE 5 // Amount of float4s per-tile #define GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE 5 // Amount of float4s per-tile
#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD 0.1f // Cut-off value for tiles transitions blending during sampling #define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD 0.05f // Cut-off value for tiles transitions blending during sampling
#define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes) #define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes)
struct GlobalSurfaceTile struct GlobalSurfaceTile

View File

@@ -267,7 +267,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target
//return float4(hit.HitNormal * 0.5f + 0.5f, 1); //return float4(hit.HitNormal * 0.5f + 0.5f, 1);
// Sample Global Surface Atlas at the hit location // Sample Global Surface Atlas at the hit location
float surfaceThreshold = hit.HitCascade * 10.0f + 20.0f; // Scale the threshold based on the hit cascade (less precision) float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(hit);
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold); float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold);
return float4(surfaceColor.rgb, 1); return float4(surfaceColor.rgb, 1);
} }

View File

@@ -45,6 +45,7 @@ struct GlobalSDFHit
float HitTime; float HitTime;
uint HitCascade; uint HitCascade;
uint StepsCount; uint StepsCount;
float HitSDF;
bool IsHit() bool IsHit()
{ {
@@ -180,6 +181,7 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4]
// Surface hit // Surface hit
hit.HitTime = max(stepTime + stepDistance - minSurfaceThickness, 0.0f); hit.HitTime = max(stepTime + stepDistance - minSurfaceThickness, 0.0f);
hit.HitCascade = cascade; hit.HitCascade = cascade;
hit.HitSDF = stepDistance;
if (trace.NeedsHitNormal) if (trace.NeedsHitNormal)
{ {
// Calculate hit normal from SDF gradient // Calculate hit normal from SDF gradient
@@ -202,3 +204,10 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4]
} }
return hit; return hit;
} }
// Calculates the surface threshold for Global Surface Atlas sampling which matches the Global SDF trace to reduce artifacts
float GetGlobalSurfaceAtlasThreshold(GlobalSDFHit hit)
{
// Scale the threshold based on the hit cascade (less precision)
return hit.HitCascade * 10.0f + 20.0f;
}