diff --git a/Content/Editor/Camera/O_Camera.flax b/Content/Editor/Camera/O_Camera.flax index 443f7502f..5e0940624 100644 --- a/Content/Editor/Camera/O_Camera.flax +++ b/Content/Editor/Camera/O_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c356aa9250b8d42c6ba44fa23bad29a4b2c216f31061df03f20efd3c871414a -size 88395 +oid sha256:a2ec3410338bc342f7de1c4af6ae0f6310c739140e83de45632f3a3bc7c47f12 +size 88720 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax new file mode 100644 index 000000000..f38eb04bd --- /dev/null +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7232c07e5969b4db266528006dc0bad9b737f77da7bca56730fcf652509e9a +size 37759 diff --git a/Content/Engine/Models/SphereLowPoly.flax b/Content/Engine/Models/SphereLowPoly.flax index 57dc378b2..57dfc621c 100644 --- a/Content/Engine/Models/SphereLowPoly.flax +++ b/Content/Engine/Models/SphereLowPoly.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cd1093c7666a9a8df46b7b1d5cb1fe302bdf64eabc8d85c1cf1b2f91d35526d -size 3448 +oid sha256:572d0b6d951c1fe3a39aa2bfc6007b951a89abb010b72d138dd9d51ec12ec617 +size 3757 diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax new file mode 100644 index 000000000..a8463dbec --- /dev/null +++ b/Content/Shaders/GI/DDGI.flax @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b8d640c0f47b0eedba8bb90190043081ae26a5e371eba1c5e5c1271e411ddf +size 18125 diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax index 2376e451e..460f42dc1 100644 --- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax +++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ec7fc26caf2d2c9c216cc47684ac2adc6b872a5a61cdd7028a59e9230eae0f4 -size 10668 +oid sha256:d10f34a19ddbeea914082a23362317a2fc6ca31ca18d9307447023c9f2789414 +size 10610 diff --git a/Source/Editor/Cooker/Steps/DeployDataStep.cpp b/Source/Editor/Cooker/Steps/DeployDataStep.cpp index b41b28e43..4a027096e 100644 --- a/Source/Editor/Cooker/Steps/DeployDataStep.cpp +++ b/Source/Editor/Cooker/Steps/DeployDataStep.cpp @@ -70,6 +70,7 @@ bool DeployDataStep::Perform(CookingData& data) data.AddRootEngineAsset(TEXT("Shaders/GPUParticlesSorting")); data.AddRootEngineAsset(TEXT("Shaders/GlobalSignDistanceField")); data.AddRootEngineAsset(TEXT("Shaders/GI/GlobalSurfaceAtlas")); + data.AddRootEngineAsset(TEXT("Shaders/GI/DDGI")); data.AddRootEngineAsset(TEXT("Shaders/Quad")); data.AddRootEngineAsset(TEXT("Shaders/Reflections")); data.AddRootEngineAsset(TEXT("Shaders/Shadows")); diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp new file mode 100644 index 000000000..45fb8ba58 --- /dev/null +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -0,0 +1,528 @@ +// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved. + +#include "DynamicDiffuseGlobalIllumination.h" +#include "GlobalSurfaceAtlasPass.h" +#include "../GlobalSignDistanceFieldPass.h" +#include "../RenderList.h" +#include "Engine/Core/Random.h" +#include "Engine/Core/Types/Variant.h" +#include "Engine/Core/Math/Int3.h" +#include "Engine/Core/Math/Matrix3x3.h" +#include "Engine/Core/Math/Quaternion.h" +#include "Engine/Engine/Engine.h" +#include "Engine/Content/Content.h" +#include "Engine/Debug/DebugDraw.h" +#include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/RenderBuffers.h" +#include "Engine/Graphics/RenderTargetPool.h" +#include "Engine/Graphics/Shaders/GPUShader.h" +#include "Engine/Level/Actors/BrushMode.h" +#include "Engine/Renderer/GBufferPass.h" + +// Implementation based on: +// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019 +// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire +// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/ +// +// Additional references: +// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/ +// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/ + +// This must match HLSL +#define DDGI_TRACE_RAYS_GROUP_SIZE_X 32 +#define DDGI_TRACE_RAYS_LIMIT 512 // Limit of rays per-probe (runtime value can be smaller) +#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) +#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) +#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 +#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 + +PACK_STRUCT(struct Data0 + { + DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI; + GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; + GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; + GBufferData GBuffer; + Vector3 Padding0; + float IndirectLightingIntensity; + }); + +class DDGICustomBuffer : public RenderBuffers::CustomBuffer +{ +public: + int32 ProbeRaysCount = 0; + float ProbesSpacing = 0.0f; + Int3 ProbeCounts = Int3::Zero; + Vector3 ProbesOrigin; + Int3 ProbeScrollOffsets; + Int3 ProbeScrollDirections; + bool ProbeScrollClear[3]; + GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance) + GPUTexture* ProbesState = nullptr; // Probes state: (RGB: world-space offset, A: state) + GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color) + GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2) + DynamicDiffuseGlobalIlluminationPass::BindingData Result; + + FORCE_INLINE void Clear() + { + ProbesOrigin = Vector3::Zero; + ProbeScrollOffsets = Int3::Zero; + ProbeScrollDirections = Int3::Zero; + ProbeScrollClear[0] = false; + ProbeScrollClear[1] = false; + ProbeScrollClear[2] = false; + RenderTargetPool::Release(ProbesTrace); + RenderTargetPool::Release(ProbesState); + RenderTargetPool::Release(ProbesIrradiance); + RenderTargetPool::Release(ProbesDistance); + } + + ~DDGICustomBuffer() + { + Clear(); + } +}; + +void CalculateVolumeRandomRotation(Matrix3x3& matrix) +{ + // Reference: James Arvo's algorithm Graphics Gems 3 (pages 117-120) + // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.53.1357&rep=rep1&type=pdf + + float u1 = TWO_PI * Random::Rand(); + float cos1 = Math::Cos(u1); + float sin1 = Math::Sin(u1); + float u2 = TWO_PI * Random::Rand(); + float cos2 = Math::Cos(u2); + float sin2 = Math::Sin(u2); + + float u3 = Random::Rand(); + float sq3 = 2.0f * sqrtf(u3 * (1.0f - u3)); + + float s2 = 2.0f * u3 * sin2 * sin2 - 1.0f; + float c2 = 2.0f * u3 * cos2 * cos2 - 1.0f; + float sc = 2.0f * u3 * sin2 * cos2; + + matrix.M11 = cos1 * c2 - sin1 * sc; + matrix.M12 = sin1 * c2 + cos1 * sc; + matrix.M13 = sq3 * cos2; + + matrix.M21 = cos1 * sc - sin1 * s2; + matrix.M22 = sin1 * sc + cos1 * s2; + matrix.M23 = sq3 * sin2; + + matrix.M31 = cos1 * (sq3 * cos2) - sin1 * (sq3 * sin2); + matrix.M32 = sin1 * (sq3 * cos2) + cos1 * (sq3 * sin2); + matrix.M33 = 1.0f - 2.0f * u3; +} + +int32 AbsFloor(const float value) +{ + return value >= 0.0f ? (int32)Math::Floor(value) : (int32)Math::Ceil(value); +} + +int32 GetSignNotZero(const float value) +{ + return value >= 0.0f ? 1 : -1; +} + +Vector3 GetVolumeOrigin(DDGICustomBuffer& ddgiData) +{ + return ddgiData.ProbesOrigin + Vector3(ddgiData.ProbeScrollOffsets) * ddgiData.ProbesSpacing; +} + +void CalculateVolumeScrolling(DDGICustomBuffer& ddgiData, const Vector3& viewOrigin) +{ + // Reset the volume origin and scroll offsets for each axis + for (int32 axis = 0; axis < 3; axis++) + { + if (ddgiData.ProbeScrollOffsets.Raw[axis] != 0 && (ddgiData.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0)) + { + ddgiData.ProbesOrigin.Raw[axis] += (float)ddgiData.ProbeCounts.Raw[axis] * ddgiData.ProbesSpacing * (float)ddgiData.ProbeScrollDirections.Raw[axis]; + ddgiData.ProbeScrollOffsets.Raw[axis] = 0; + } + } + + // Calculate the count of grid cells between the view origin and the scroll anchor + const Vector3 translation = viewOrigin - GetVolumeOrigin(ddgiData); + for (int32 axis = 0; axis < 3; axis++) + { + const int32 scroll = AbsFloor(translation.Raw[axis] / ddgiData.ProbesSpacing); + ddgiData.ProbeScrollOffsets.Raw[axis] += scroll; + ddgiData.ProbeScrollClear[axis] = scroll != 0; + ddgiData.ProbeScrollDirections.Raw[axis] = GetSignNotZero(translation.Raw[axis]); + } +} + +String DynamicDiffuseGlobalIlluminationPass::ToString() const +{ + return TEXT("DynamicDiffuseGlobalIlluminationPass"); +} + +bool DynamicDiffuseGlobalIlluminationPass::Init() +{ + // Check platform support + const auto device = GPUDevice::Instance; + _supported = device->GetFeatureLevel() >= FeatureLevel::SM5 && device->Limits.HasCompute && device->Limits.HasTypedUAVLoad; + return false; +} + +bool DynamicDiffuseGlobalIlluminationPass::setupResources() +{ + if (!_supported) + return true; + + // Load shader + if (!_shader) + { + _shader = Content::LoadAsyncInternal(TEXT("Shaders/GI/DDGI")); + if (_shader == nullptr) + return true; +#if COMPILE_WITH_DEV_ENV + _shader.Get()->OnReloading.Bind(this); +#endif + } + if (!_shader->IsLoaded()) + return true; + + // Initialize resources + const auto shader = _shader->GetShader(); + _cb0 = shader->GetCB(0); + if (!_cb0) + return true; + _csClassify = shader->GetCS("CS_Classify"); + _csTraceRays = shader->GetCS("CS_TraceRays"); + _csUpdateProbesIrradiance = shader->GetCS("CS_UpdateProbes", 0); + _csUpdateProbesDistance = shader->GetCS("CS_UpdateProbes", 1); + _csUpdateBordersIrradianceRow = shader->GetCS("CS_UpdateBorders", 0); + _csUpdateBordersIrradianceCollumn = shader->GetCS("CS_UpdateBorders", 1); + _csUpdateBordersDistanceRow = shader->GetCS("CS_UpdateBorders", 2); + _csUpdateBordersDistanceCollumn = shader->GetCS("CS_UpdateBorders", 3); + auto device = GPUDevice::Instance; + auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + if (!_psIndirectLighting) + { + _psIndirectLighting = device->CreatePipelineState(); + psDesc.PS = shader->GetPS("PS_IndirectLighting"); + psDesc.BlendMode = BlendingMode::Additive; + if (_psIndirectLighting->Init(psDesc)) + return true; + } + + return false; +} + +#if COMPILE_WITH_DEV_ENV + +void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj) +{ + LastFrameShaderReload = Engine::FrameCount; + _csClassify = nullptr; + _csTraceRays = nullptr; + _csUpdateProbesIrradiance = nullptr; + _csUpdateProbesDistance = nullptr; + _csUpdateBordersIrradianceRow = nullptr; + _csUpdateBordersIrradianceCollumn = nullptr; + _csUpdateBordersDistanceRow = nullptr; + _csUpdateBordersDistanceCollumn = nullptr; + SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting); + invalidateResources(); +} + +#endif + +void DynamicDiffuseGlobalIlluminationPass::Dispose() +{ + RendererPass::Dispose(); + + // Cleanup + _cb0 = nullptr; + _csTraceRays = nullptr; + _shader = nullptr; + SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting); +#if USE_EDITOR + _debugModel = nullptr; + _debugMaterial = nullptr; +#endif +} + +bool DynamicDiffuseGlobalIlluminationPass::Get(const RenderBuffers* buffers, BindingData& result) +{ + auto* ddgiData = buffers ? buffers->FindCustomBuffer(TEXT("DDGI")) : nullptr; + if (ddgiData && ddgiData->LastFrameUsed + 1 >= Engine::FrameCount) // Allow to use data from the previous frame (eg. particles in Editor using the Editor viewport in Game viewport - Game render task runs first) + { + result = ddgiData->Result; + return false; + } + return true; +} + +bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, GPUContext* context, GPUTextureView* lightBuffer) +{ + // Skip if not supported + if (checkIfSkipPass()) + return true; + if (renderContext.List->Scenes.Count() == 0) + return true; + auto& ddgiData = *renderContext.Buffers->GetCustomBuffer(TEXT("DDGI")); + + // Render Global SDF and Global Surface Atlas for software raytracing + GlobalSignDistanceFieldPass::BindingData bindingDataSDF; + if (GlobalSignDistanceFieldPass::Instance()->Render(renderContext, context, bindingDataSDF)) + return true; + GlobalSurfaceAtlasPass::BindingData bindingDataSurfaceAtlas; + if (GlobalSurfaceAtlasPass::Instance()->Render(renderContext, context, bindingDataSurfaceAtlas)) + return true; + + // Skip if already done in the current frame + const auto currentFrame = Engine::FrameCount; + if (ddgiData.LastFrameUsed == currentFrame) + return false; + ddgiData.LastFrameUsed = currentFrame; + PROFILE_GPU_CPU("Dynamic Diffuse Global Illumination"); + + // TODO: configurable via graphics settings + const Quality quality = Quality::Ultra; + bool debugProbes = true; // TODO: add debug option to draw probes locations -> in Graphics window - Editor-only + // TODO: configurable via postFx settings (maybe use Global SDF distance?) + const float indirectLightingIntensity = 1.0f; + const Vector3 giDistance(2000, 2000, 2000); // GI distance around the view (in each direction) + const float giResolution = 100.0f; // GI probes placement spacing + const Int3 probesCounts(Vector3::Ceil(giDistance / giResolution)); + const Vector3 probesDistance = Vector3(probesCounts) * giResolution; + const int32 probeRaysCount = Math::Min(Math::AlignUp(256, DDGI_TRACE_RAYS_GROUP_SIZE_X), DDGI_TRACE_RAYS_LIMIT); // TODO: make it based on the GI Quality + const float probeHistoryWeight = 0.97f; + + // Init buffers + const int32 probesCount = probesCounts.X * probesCounts.Y * probesCounts.Z; + if (probesCount == 0 || indirectLightingIntensity <= ZeroTolerance) + return true; + int32 probesCountX = probesCounts.X * probesCounts.Y; + int32 probesCountY = probesCounts.Z; + bool clear = false; + if (Math::NotNearEqual(ddgiData.ProbesSpacing, giResolution) || ddgiData.ProbeCounts != probesCounts || ddgiData.ProbeRaysCount != probeRaysCount) + { + PROFILE_CPU_NAMED("Init"); + ddgiData.Clear(); + ddgiData.ProbeRaysCount = probeRaysCount; + ddgiData.ProbesSpacing = giResolution; + ddgiData.ProbeCounts = probesCounts; + + // Allocate probes textures + uint64 memUsage = 0; + auto desc = GPUTextureDescription::New2D(probesCountX, probesCountY, PixelFormat::Unknown); + // TODO rethink probes data placement in memory -> what if we get [50x50x30] resolution? That's 75000 probes! Use sparse storage with active-only probes +#define INIT_TEXTURE(texture, format, width, height) desc.Format = format; desc.Width = width; desc.Height = height; ddgiData.texture = RenderTargetPool::Get(desc); if (!ddgiData.texture) return true; memUsage += ddgiData.texture->GetMemoryUsage() + desc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess; + INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, probesCount); + INIT_TEXTURE(ProbesState, PixelFormat::R16G16B16A16_Float, probesCountX, probesCountY); // TODO: optimize to a RGBA32 (pos offset can be normalized to [0-0.5] range of ProbesSpacing and packed with state flag) + INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); + INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); +#undef INIT_TEXTURE + LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCount); + clear = true; + } +#if USE_EDITOR + clear |= ddgiData.LastFrameUsed <= LastFrameShaderReload; +#endif + if (clear) + { + // Clear probes + PROFILE_GPU("Clear"); + context->ClearUA(ddgiData.ProbesState, Vector4::Zero); + context->ClearUA(ddgiData.ProbesIrradiance, Vector4::Zero); + context->ClearUA(ddgiData.ProbesDistance, Vector4::Zero); + } + + // Compute random rotation matrix for probe rays orientation (randomized every frame) + Matrix3x3 raysRotationMatrix; + CalculateVolumeRandomRotation(raysRotationMatrix); + + // Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement) + Vector3 viewOrigin = renderContext.View.Position; + Vector3 viewDirection = renderContext.View.Direction; + const float probesDistanceMax = probesDistance.MaxValue(); + const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance); + const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.8f; + viewOrigin += viewDirection * viewOriginOffset; + const float viewOriginSnapping = giResolution; + viewOrigin = Vector3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping; + CalculateVolumeScrolling(ddgiData, viewOrigin); + + // Upload constants + { + ddgiData.Result.Constants.ProbesOrigin = ddgiData.ProbesOrigin; + ddgiData.Result.Constants.ProbesSpacing = ddgiData.ProbesSpacing; + Quaternion& raysRotation = *(Quaternion*)&ddgiData.Result.Constants.RaysRotation; + Quaternion::RotationMatrix(raysRotationMatrix, raysRotation); + raysRotation.Conjugate(); + ddgiData.Result.Constants.ProbesCounts[0] = probesCounts.X; + ddgiData.Result.Constants.ProbesCounts[1] = probesCounts.Y; + ddgiData.Result.Constants.ProbesCounts[2] = probesCounts.Z; + ddgiData.Result.Constants.ProbesScrollOffsets = ddgiData.ProbeScrollOffsets; + ddgiData.Result.Constants.ProbeScrollDirections = ddgiData.ProbeScrollDirections; + ddgiData.Result.Constants.ProbeScrollClear[0] = ddgiData.ProbeScrollClear[0] != 0; + ddgiData.Result.Constants.ProbeScrollClear[1] = ddgiData.ProbeScrollClear[1] != 0; + ddgiData.Result.Constants.ProbeScrollClear[2] = ddgiData.ProbeScrollClear[2] != 0; + ddgiData.Result.Constants.RayMaxDistance = 10000.0f; // TODO: adjust to match perf/quality ratio (make it based on Global SDF and Global Surface Atlas distance) + ddgiData.Result.Constants.ViewDir = viewDirection; + ddgiData.Result.Constants.RaysCount = probeRaysCount; + ddgiData.Result.Constants.ProbeHistoryWeight = probeHistoryWeight; + ddgiData.Result.Constants.IrradianceGamma = 5.0f; + ddgiData.Result.ProbesState = ddgiData.ProbesState->View(); + ddgiData.Result.ProbesDistance = ddgiData.ProbesDistance->View(); + ddgiData.Result.ProbesIrradiance = ddgiData.ProbesIrradiance->View(); + + Data0 data; + data.DDGI = ddgiData.Result.Constants; + data.GlobalSDF = bindingDataSDF.Constants; + data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants; + data.IndirectLightingIntensity = indirectLightingIntensity; + GBufferPass::SetInputs(renderContext.View, data.GBuffer); + context->UpdateCB(_cb0, &data); + context->BindCB(0, _cb0); + } + + // Classify probes (activation/deactivation and relocation) + { + PROFILE_GPU_CPU("Probes Classification"); + uint32 threadGroups = Math::DivideAndRoundUp(probesCount, DDGI_PROBE_CLASSIFY_GROUP_SIZE); + for (int32 i = 0; i < 4; i++) + { + context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume()); + } + context->BindUA(0, ddgiData.Result.ProbesState); + context->Dispatch(_csClassify, threadGroups, 1, 1); + context->ResetUA(); + } + + // Trace rays from probes + { + PROFILE_GPU_CPU("Trace Rays"); + + // Global SDF with Global Surface Atlas software raytracing (X - per probe ray, Y - per probe) + ASSERT_LOW_LAYER((probeRaysCount % DDGI_TRACE_RAYS_GROUP_SIZE_X) == 0); + for (int32 i = 0; i < 4; i++) + { + context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume()); + context->BindSR(i + 4, bindingDataSDF.CascadeMips[i]->ViewVolume()); + } + context->BindSR(8, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr); + context->BindSR(9, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr); + context->BindSR(10, bindingDataSurfaceAtlas.AtlasDepth->View()); + context->BindSR(11, bindingDataSurfaceAtlas.AtlasLighting->View()); + context->BindSR(12, ddgiData.Result.ProbesState); + context->BindUA(0, ddgiData.ProbesTrace->View()); + context->Dispatch(_csTraceRays, probeRaysCount / DDGI_TRACE_RAYS_GROUP_SIZE_X, probesCount, 1); + context->ResetUA(); + context->ResetSR(); + +#if 0 + // Probes trace debug preview + context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y); + context->SetRenderTarget(lightBuffer); + context->Draw(ddgiData.ProbesTrace); + return false; +#endif + } + + // Update probes + { + PROFILE_GPU_CPU("Update Probes"); + context->BindSR(0, ddgiData.Result.ProbesState); + context->BindSR(1, ddgiData.ProbesTrace->View()); + + // Update irradiance + context->BindUA(0, ddgiData.Result.ProbesIrradiance); + context->Dispatch(_csUpdateProbesIrradiance, probesCountX, probesCountY, 1); + uint32 threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + uint32 threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1); + threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1); + + // Update distance + context->BindUA(0, ddgiData.Result.ProbesDistance); + context->Dispatch(_csUpdateProbesDistance, probesCountX, probesCountY, 1); + threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1); + threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1); + } + + // Render indirect lighting + { + PROFILE_GPU_CPU("Indirect Lighting"); +#if 0 + // DDGI indirect lighting debug preview + context->Clear(lightBuffer, Color::Transparent); +#endif + context->ResetUA(); + context->BindSR(0, renderContext.Buffers->GBuffer0->View()); + context->BindSR(1, renderContext.Buffers->GBuffer1->View()); + context->BindSR(2, renderContext.Buffers->GBuffer2->View()); + context->BindSR(3, renderContext.Buffers->DepthBuffer->View()); + context->BindSR(4, ddgiData.Result.ProbesState); + context->BindSR(5, ddgiData.Result.ProbesDistance); + context->BindSR(6, ddgiData.Result.ProbesIrradiance); + context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y); + context->SetRenderTarget(lightBuffer); + context->SetState(_psIndirectLighting); + context->DrawFullscreenTriangle(); + } + +#if USE_EDITOR + // Probes debug drawing + if (debugProbes) + { + PROFILE_GPU_CPU("Debug Probes"); + if (!_debugModel) + _debugModel = Content::LoadAsyncInternal(TEXT("Editor/Primitives/Sphere")); + if (!_debugMaterial) + _debugMaterial = Content::LoadAsyncInternal(TEXT("Editor/DebugMaterials/DDGIDebugProbes")); + if (_debugModel && _debugModel->IsLoaded() && _debugModel->CanBeRendered() && _debugMaterial && _debugMaterial->IsLoaded()) + { + RenderContext debugRenderContext(renderContext); + debugRenderContext.List = RenderList::GetFromPool(); + debugRenderContext.View.Pass = DrawPass::GBuffer; + debugRenderContext.View.Prepare(debugRenderContext); + Matrix world; + Matrix::Scaling(Vector3(0.2f), world); + const Mesh& debugMesh = _debugModel->LODs[0].Meshes[0]; + for (int32 probeIndex = 0; probeIndex < probesCount; probeIndex++) + debugMesh.Draw(debugRenderContext, _debugMaterial, world, StaticFlags::None, true, DrawPass::GBuffer, (float)probeIndex); + debugRenderContext.List->SortDrawCalls(debugRenderContext, false, DrawCallsListType::GBuffer); + context->SetViewportAndScissors(debugRenderContext.View.ScreenSize.X, debugRenderContext.View.ScreenSize.Y); + GPUTextureView* targetBuffers[5] = + { + lightBuffer, + renderContext.Buffers->GBuffer0->View(), + renderContext.Buffers->GBuffer1->View(), + renderContext.Buffers->GBuffer2->View(), + renderContext.Buffers->GBuffer3->View(), + }; + context->SetRenderTarget(*renderContext.Buffers->DepthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers))); + { + // Pass DDGI data to the material + _debugMaterial->SetParameterValue(TEXT("ProbesState"), Variant(ddgiData.ProbesState)); + _debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance)); + _debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance)); + auto cb = _debugMaterial->GetShader()->GetCB(3); + if (cb) + { + context->UpdateCB(cb, &ddgiData.Result.Constants); + context->BindCB(3, cb); + } + } + debugRenderContext.List->ExecuteDrawCalls(debugRenderContext, DrawCallsListType::GBuffer); + RenderList::ReturnToPool(debugRenderContext.List); + context->UnBindCB(3); + context->ResetRenderTarget(); + } + } +#endif + + return false; +} diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h new file mode 100644 index 000000000..5440cf7be --- /dev/null +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -0,0 +1,93 @@ +// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved. + +#pragma once + +#include "../RendererPass.h" +#include "Engine/Core/Math/Int3.h" +#include "Engine/Graphics/Textures/GPUTexture.h" + +/// +/// Dynamic Diffuse Global Illumination rendering pass. +/// +class FLAXENGINE_API DynamicDiffuseGlobalIlluminationPass : public RendererPass +{ +public: + // Constant buffer data for DDGI access on a GPU. + PACK_STRUCT(struct ConstantsData + { + Vector3 ProbesOrigin; + float ProbesSpacing; + Vector4 RaysRotation; + uint32 ProbesCounts[3]; + float IrradianceGamma; + Int3 ProbesScrollOffsets; + float ProbeHistoryWeight; + Vector3 ViewDir; + uint32 RaysCount; + Int3 ProbeScrollDirections; + float RayMaxDistance; + uint32 ProbeScrollClear[3]; + uint32 Padding0; + }); + + // Binding data for the GPU. + struct BindingData + { + ConstantsData Constants; + GPUTextureView* ProbesState; + GPUTextureView* ProbesDistance; + GPUTextureView* ProbesIrradiance; + }; + +private: + bool _supported = false; + AssetReference _shader; + GPUConstantBuffer* _cb0 = nullptr; + GPUShaderProgramCS* _csClassify; + GPUShaderProgramCS* _csTraceRays; + GPUShaderProgramCS* _csUpdateProbesIrradiance; + GPUShaderProgramCS* _csUpdateProbesDistance; + GPUShaderProgramCS* _csUpdateBordersIrradianceRow; + GPUShaderProgramCS* _csUpdateBordersIrradianceCollumn; + GPUShaderProgramCS* _csUpdateBordersDistanceRow; + GPUShaderProgramCS* _csUpdateBordersDistanceCollumn; + GPUPipelineState* _psIndirectLighting; +#if USE_EDITOR + AssetReference _debugModel; + AssetReference _debugMaterial; +#endif + +public: + /// + /// Gets the DDGI binding data (only if enabled). + /// + /// The rendering context buffers. + /// The result DDGI data for binding to the shaders. + /// True if failed to render (platform doesn't support it, out of video memory, disabled feature or effect is not ready), otherwise false. + bool Get(const RenderBuffers* buffers, BindingData& result); + + /// + /// Renders the DDGI. + /// + /// The rendering context. + /// The GPU context. + /// The light accumulation buffer (input and output). + /// True if failed to render (platform doesn't support it, out of video memory, disabled feature or effect is not ready), otherwise false. + bool Render(RenderContext& renderContext, GPUContext* context, GPUTextureView* lightBuffer); + +private: +#if COMPILE_WITH_DEV_ENV + uint64 LastFrameShaderReload = 0; + void OnShaderReloading(Asset* obj); +#endif + +public: + // [RendererPass] + String ToString() const override; + bool Init() override; + void Dispose() override; + +protected: + // [RendererPass] + bool setupResources() override; +}; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 056870571..ea3d127f0 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -23,6 +23,7 @@ #include "AtmospherePreCompute.h" #include "GlobalSignDistanceFieldPass.h" #include "GI/GlobalSurfaceAtlasPass.h" +#include "GI/DynamicDiffuseGlobalIllumination.h" #include "Utils/MultiScaler.h" #include "Utils/BitonicSort.h" #include "AntiAliasing/FXAA.h" @@ -85,6 +86,7 @@ bool RendererService::Init() PassList.Add(HistogramPass::Instance()); PassList.Add(GlobalSignDistanceFieldPass::Instance()); PassList.Add(GlobalSurfaceAtlasPass::Instance()); + PassList.Add(DynamicDiffuseGlobalIlluminationPass::Instance()); #if USE_EDITOR PassList.Add(QuadOverdrawPass::Instance()); #endif @@ -397,6 +399,11 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext) // Render lighting LightPass::Instance()->RenderLight(renderContext, *lightBuffer); + if (renderContext.View.Flags & ViewFlags::GI) + { + // TODO: add option to PostFx Volume for realtime GI type (None, DDGI) + DynamicDiffuseGlobalIlluminationPass::Instance()->Render(renderContext, context, *lightBuffer); + } if (renderContext.View.Mode == ViewMode::LightBuffer) { auto colorGradingLUT = ColorGradingPass::Instance()->RenderLUT(renderContext); @@ -499,7 +506,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext) context->ResetRenderTarget(); context->ResetSR(); context->FlushState(); - + // Custom Post Processing renderContext.List->RunMaterialPostFxPass(context, renderContext, MaterialPostFxLocation::AfterPostProcessingPass, frameBuffer, tempBuffer); renderContext.List->RunCustomPostFxPass(context, renderContext, PostProcessEffectLocation::Default, frameBuffer, tempBuffer); diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl new file mode 100644 index 000000000..b36cc5d3d --- /dev/null +++ b/Source/Shaders/GI/DDGI.hlsl @@ -0,0 +1,214 @@ +// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved. + +// Implementation based on: +// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019 +// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire +// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/ +// +// Additional references: +// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/ +// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/ + +#include "./Flax/Common.hlsl" +#include "./Flax/Math.hlsl" +#include "./Flax/Octahedral.hlsl" + +#define DDGI_PROBE_STATE_ACTIVE 0 +#define DDGI_PROBE_STATE_INACTIVE 1 +#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) +#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) +#define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space + +// DDGI data for a constant buffer +struct DDGIData +{ + float3 ProbesOrigin; + float ProbesSpacing; + float4 RaysRotation; + uint3 ProbesCounts; + float IrradianceGamma; + int3 ProbesScrollOffsets; + float ProbeHistoryWeight; + float3 ViewDir; + uint RaysCount; + int3 ProbeScrollDirections; + float RayMaxDistance; + uint3 ProbeScrollClear; // TODO: pack into bits + uint Padding0; +}; + +uint GetDDGIProbeIndex(DDGIData data, uint3 probeCoords) +{ + uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z; + uint planeIndex = probeCoords.y; + uint probeIndexInPlane = probeCoords.x + (data.ProbesCounts.x * probeCoords.z); + return planeIndex * probesPerPlane + probeIndexInPlane; +} + +uint GetDDGIProbeIndex(DDGIData data, uint2 texCoords, uint texResolution) +{ + uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z; + uint planeIndex = texCoords.x / (data.ProbesCounts.x * texResolution); + uint probeIndexInPlane = (texCoords.x / texResolution) - (planeIndex * data.ProbesCounts.x) + (data.ProbesCounts.x * (texCoords.y / texResolution)); + return planeIndex * probesPerPlane + probeIndexInPlane; +} + +uint3 GetDDGIProbeCoords(DDGIData data, uint probeIndex) +{ + uint3 probeCoords; + probeCoords.x = probeIndex % data.ProbesCounts.x; + probeCoords.y = probeIndex / (data.ProbesCounts.x * data.ProbesCounts.z); + probeCoords.z = (probeIndex / data.ProbesCounts.x) % data.ProbesCounts.z; + return probeCoords; +} + +uint2 GetDDGIProbeTexelCoords(DDGIData data, uint probeIndex) +{ + uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z; + uint planeIndex = probeIndex / probesPerPlane; + uint gridSpaceX = probeIndex % data.ProbesCounts.x; + uint gridSpaceY = probeIndex / data.ProbesCounts.x; + uint x = gridSpaceX + (planeIndex * data.ProbesCounts.x); + uint y = gridSpaceY % data.ProbesCounts.z; + return uint2(x, y); +} + +uint GetDDGIScrollingProbeIndex(DDGIData data, uint3 probeCoords) +{ + // Probes are scrolled on edges to stabilize GI when camera moves + return GetDDGIProbeIndex(data, (probeCoords + data.ProbesScrollOffsets + data.ProbesCounts) % data.ProbesCounts); +} + +float3 GetDDGIProbeWorldPosition(DDGIData data, uint3 probeCoords) +{ + float3 probePosition = probeCoords * data.ProbesSpacing; + float3 probeGridOffset = (data.ProbesSpacing * (data.ProbesCounts - 1)) * 0.5f; + return data.ProbesOrigin + probePosition - probeGridOffset + (data.ProbesScrollOffsets * data.ProbesSpacing); +} + +// Loads probe probe state +float LoadDDGIProbeState(DDGIData data, Texture2D probesState, uint probeIndex) +{ + int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex); + float4 probeState = probesState.Load(int3(probeDataCoords, 0)); + return probeState.w; +} + +// Loads probe world-space position (XYZ) and probe state (W) +float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D probesState, uint probeIndex, uint3 probeCoords) +{ + float4 result; + result.xyz = GetDDGIProbeWorldPosition(data, probeCoords); + + // Probe state contains relocation's offset and the classification's state + int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex); + float4 probeState = probesState.Load(int3(probeDataCoords, 0)); + result.xyz += probeState.xyz; + result.w = probeState.w; + + return result; +} + +// Calculates texture UVs for sampling probes atlas texture (irradiance or distance) +float2 GetDDGIProbeUV(DDGIData data, uint probeIndex, float2 octahedralCoords, uint resolution) +{ + uint2 coords = GetDDGIProbeTexelCoords(data, probeIndex); + float probeTexelSize = resolution + 2.0f; + float textureWidth = probeTexelSize * (data.ProbesCounts.x * data.ProbesCounts.y); + float textureHeight = probeTexelSize * data.ProbesCounts.z; + float2 uv = float2(coords.x * probeTexelSize, coords.y * probeTexelSize) + (probeTexelSize * 0.5f); + uv += octahedralCoords.xy * (resolution * 0.5f); + uv /= float2(textureWidth, textureHeight); + return uv; +} + +// Samples DDGI probes volume at the given world-space position and returns the irradiance. +float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesState, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias) +{ + float4 irradiance = float4(0, 0, 0, 0); + float3 probesOrigin = data.ProbesScrollOffsets * data.ProbesSpacing + data.ProbesOrigin; + float3 probesExtent = (data.ProbesCounts - 1) * (data.ProbesSpacing * 0.5f); + + // Bias the world-space position to reduce artifacts + float3 surfaceBias = (worldNormal * bias) + (data.ViewDir * (bias * -4.0f)); + float3 biasedWorldPosition = worldPosition + surfaceBias; + + // Get the grid coordinates of the probe nearest the biased world position + uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / data.ProbesSpacing), 0, data.ProbesCounts - 1); + float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, baseProbeCoords); + float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / data.ProbesSpacing); + + // Loop over the closest probes to accumulate their contributions + for (uint i = 0; i < 8; i++) + { + uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; + uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, 0, data.ProbesCounts - 1); + uint probeIndex = GetDDGIScrollingProbeIndex(data, probeCoords); + + // Load probe position and state + float4 probePositionAndState = LoadDDGIProbePositionAndState(data, probesState, probeIndex, probeCoords); + if (probePositionAndState.w == DDGI_PROBE_STATE_INACTIVE) + continue; + + // Calculate the distance and direction from the (biased and non-biased) shading point and the probe + float3 worldPosToProbe = normalize(probePositionAndState.xyz - worldPosition); + float3 biasedPosToProbe = normalize(probePositionAndState.xyz - biasedWorldPosition); + float biasedPosToProbeDist = length(probePositionAndState.xyz - biasedWorldPosition); + + // Smooth backface test + float weight = Square(dot(worldPosToProbe, worldNormal) * 0.5f + 0.5f); + + // Sample distance texture + float2 octahedralCoords = GetOctahedralCoords(-biasedPosToProbe); + float2 uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE); + float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f; + float probeDistanceMean = probeDistance.x; + float probeDistanceMean2 = probeDistance.y; + + // Visibility weight (Chebyshev) + if (biasedPosToProbeDist > probeDistanceMean) + { + float probeDistanceVariance = abs(Square(probeDistanceMean) - probeDistanceMean2); + float chebyshevWeight = probeDistanceVariance / (probeDistanceVariance + Square(biasedPosToProbeDist - probeDistanceMean)); + weight *= max(chebyshevWeight * chebyshevWeight * chebyshevWeight, 0.05f); + } + + // Avoid a weight of zero + weight = max(weight, 0.000001f); + + // Adjust weight curve to inject a small portion of light + const float minWeightThreshold = 0.2f; + if (weight < minWeightThreshold) + weight *= Square(weight) * (1.0f / (minWeightThreshold * minWeightThreshold)); + + // Calculate trilinear weights based on the distance to each probe to smoothly transition between grid of 8 probes + float3 trilinear = lerp(1.0f - biasAlpha, biasAlpha, probeCoordsOffset); + weight *= max(trilinear.x * trilinear.y * trilinear.z, 0.001f); + + // Sample irradiance texture + octahedralCoords = GetOctahedralCoords(worldNormal); + uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_IRRADIANCE); + float3 probeIrradiance = probesIrradiance.SampleLevel(SamplerLinearClamp, uv, 0).rgb; +#if DDGI_SRGB_BLENDING + probeIrradiance = pow(probeIrradiance, data.IrradianceGamma * 0.5f); +#endif + + // Accumulate weighted irradiance + irradiance += float4(probeIrradiance * weight, weight); + } + + if (irradiance.a > 0.0f) + { + // Normalize irradiance + irradiance.rgb *= 1.f / irradiance.a; +#if DDGI_SRGB_BLENDING + irradiance.rgb *= irradiance.rgb; +#endif + irradiance.rgb *= 2.0f * PI; + + // Fade-out outside the probes volume + float fadeDistance = data.ProbesSpacing * 0.5f; + irradiance.rgb *= saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance); + } + return irradiance.rgb; +} diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader new file mode 100644 index 000000000..59e7b0468 --- /dev/null +++ b/Source/Shaders/GI/DDGI.shader @@ -0,0 +1,439 @@ +// Copyright (c) 2012-2022 Wojciech Figat. All rights reserved. + +// Implementation based on: +// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019 +// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire +// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/ +// +// Additional references: +// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/ +// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/ + +#include "./Flax/Common.hlsl" +#include "./Flax/Math.hlsl" +#include "./Flax/Quaternion.hlsl" +#include "./Flax/GlobalSignDistanceField.hlsl" +#include "./Flax/GI/GlobalSurfaceAtlas.hlsl" +#include "./Flax/GI/DDGI.hlsl" + +// This must match C++ +#define DDGI_TRACE_RAYS_LIMIT 512 // Limit of rays per-probe (runtime value can be smaller) +#define DDGI_TRACE_RAYS_GROUP_SIZE_X 32 +#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 +#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 + +META_CB_BEGIN(0, Data) +DDGIData DDGI; +GlobalSDFData GlobalSDF; +GlobalSurfaceAtlasData GlobalSurfaceAtlas; +GBufferData GBuffer; +float3 Padding0; +float IndirectLightingIntensity; +META_CB_END + +// Calculates the evenly distributed direction ray on a sphere (Spherical Fibonacci lattice) +float3 GetSphericalFibonacci(float sampleIndex, float samplesCount) +{ + float b = (sqrt(5.0f) * 0.5f + 0.5f) - 1.0f; + float s = sampleIndex * b; + float phi = (2.0f * PI) * (s - floor(s)); + float cosTheta = 1.0f - (2.0f * sampleIndex + 1.0f) * (1.0f / samplesCount); + float sinTheta = sqrt(saturate(1.0f - (cosTheta * cosTheta))); + return float3(cos(phi) * sinTheta, sin(phi) * sinTheta, cosTheta); +} + +// Calculates a random normalized ray direction (based on the ray index and the current probes rotation phrase) +float3 GetProbeRayDirection(DDGIData data, uint rayIndex) +{ + float3 direction = GetSphericalFibonacci(rayIndex, data.RaysCount); + return normalize(QuaternionRotate(data.RaysRotation, direction)); +} + +#ifdef _CS_Classify + +RWTexture2D RWProbesState : register(u0); + +Texture3D GlobalSDFTex[4] : register(t0); + +// Compute shader for updating probes state between active and inactive. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)] +void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) +{ + uint probeIndex = DispatchThreadId.x; + uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z; + if (probeIndex >= probesCount) + return; + uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); + probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords); + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, probeIndex); + + // Load probe state and position + float4 probeState = RWProbesState[probeDataCoords]; + float3 probePosition = GetDDGIProbeWorldPosition(DDGI, probeCoords); + // TODO: reset probe offset for scrolled probes + probePosition.xyz += probeState.xyz; + probeState.w = DDGI_PROBE_STATE_ACTIVE; + + // Use Global SDF to quickly get distance and direction to the scene geometry + float sdf; + float3 sdfNormal = normalize(SampleGlobalSDFGradient(GlobalSDF, GlobalSDFTex, probePosition.xyz, sdf)); + float threshold = GlobalSDF.CascadeVoxelSize[0] * 0.5f; + float distanceLimit = length(DDGI.ProbesSpacing) * 1.5f + threshold; + float relocateLimit = length(DDGI.ProbesSpacing) * 0.6f; + if (abs(sdf) > distanceLimit + threshold) // Probe is too far from geometry + { + // Disable it + probeState = float4(0, 0, 0, DDGI_PROBE_STATE_INACTIVE); + } + else if (sdf < threshold) // Probe is inside geometry + { + if (abs(sdf) < relocateLimit) + { + // Relocate it + probeState.xyz = probeState.xyz + sdfNormal * (sdf + threshold); + } + else + { + // Reset relocation + probeState.xyz = float3(0, 0, 0); + } + } + else if (sdf > relocateLimit) // Probe is far enough any geometry + { + // Reset relocation + probeState.xyz = float3(0, 0, 0); + } + + RWProbesState[probeDataCoords] = probeState; +} + +#endif + +#ifdef _CS_TraceRays + +RWTexture2D RWProbesTrace : register(u0); + +Texture3D GlobalSDFTex[4] : register(t0); +Texture3D GlobalSDFMip[4] : register(t4); +ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t8); +Buffer GlobalSurfaceAtlasCulledObjects : register(t9); +Texture2D GlobalSurfaceAtlasDepth : register(t10); +Texture2D GlobalSurfaceAtlasTex : register(t11); +Texture2D ProbesState : register(t12); + +// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(DDGI_TRACE_RAYS_GROUP_SIZE_X, 1, 1)] +void CS_TraceRays(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) +{ + uint rayIndex = DispatchThreadId.x; + uint probeIndex = DispatchThreadId.y; + uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); + probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords); + + // Load current probe state and position + float4 probePositionAndState = LoadDDGIProbePositionAndState(DDGI, ProbesState, probeIndex, probeCoords); + if (probePositionAndState.w == DDGI_PROBE_STATE_INACTIVE) + return; // Skip disabled probes + float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex); + + // Trace ray with Global SDF + GlobalSDFTrace trace; + trace.Init(probePositionAndState.xyz, probeRayDirection, 0.0f, DDGI.RayMaxDistance); + GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace); + + // Calculate radiance and distance + float4 radiance; + if (hit.IsHit()) + { + if (hit.HitSDF <= 0.0f && hit.HitTime <= GlobalSDF.CascadeVoxelSize[0]) + { + // Ray starts inside geometry (mark as negative distance and reduce it's influence during irradiance blending) + radiance = float4(0, 0, 0, hit.HitTime * -0.25f); + } + else + { + // Sample Global Surface Atlas to get the lighting at the hit location + float3 hitPosition = hit.GetHitPosition(trace); + float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(hit); + float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold); + radiance = float4(surfaceColor.rgb, hit.HitTime); + + // Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale + radiance.w = max(radiance.w + GlobalSDF.CascadeVoxelSize[hit.HitCascade] * 0.5f, 0); + } + } + else + { + // Ray hits sky + radiance.rgb = float3(0, 0, 0); // TODO: sample sky/skybox with a fallback radiance + radiance.a = 1e27f; // Sky is the limit + } + + // Write into probes trace results + RWProbesTrace[uint2(rayIndex, probeIndex)] = radiance; +} + +#endif + +#if defined(_CS_UpdateProbes) || defined(_CS_UpdateBorders) + +#if DDGI_PROBE_UPDATE_MODE == 0 +// Update irradiance +#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE +#else +// Update distance +#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE +#endif + +groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT]; +groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT]; + +RWTexture2D RWOutput : register(u0); +Texture2D ProbesState : register(t0); +Texture2D ProbesTrace : register(t1); + +// Compute shader for updating probes irradiance or distance texture. +META_CS(true, FEATURE_LEVEL_SM5) +META_PERMUTATION_1(DDGI_PROBE_UPDATE_MODE=0) +META_PERMUTATION_1(DDGI_PROBE_UPDATE_MODE=1) +[numthreads(DDGI_PROBE_RESOLUTION, DDGI_PROBE_RESOLUTION, 1)] +void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupIndex : SV_GroupIndex) +{ + // Get probe index and atlas location in the atlas + uint probeIndex = GetDDGIProbeIndex(DDGI, DispatchThreadId.xy, DDGI_PROBE_RESOLUTION); + uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z; + bool skip = probeIndex >= probesCount; + uint2 outputCoords = uint2(1, 1) + DispatchThreadId.xy + (DispatchThreadId.xy / DDGI_PROBE_RESOLUTION) * 2; + + // Clear probes that have been scrolled to a new positions (blending with current irradiance will happen the next frame) + uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); + UNROLL + for (uint planeIndex = 0; planeIndex < 3; planeIndex++) + { + if (DDGI.ProbeScrollClear[planeIndex]) + { + int scrollOffset = DDGI.ProbesScrollOffsets[planeIndex]; + int scrollDirection = DDGI.ProbeScrollDirections[planeIndex]; + uint probeCount = DDGI.ProbesCounts[planeIndex]; + uint coord = (probeCount + (scrollDirection ? (scrollOffset - 1) : (scrollOffset % probeCount))) % probeCount; + if (probeCoords[planeIndex] == coord) + { + // Clear probe and return + //RWOutput[outputCoords] = float4(0, 0, 0, 0); + if (!skip) + RWOutput[outputCoords] = float4(0, 0, 0, 0); + skip = true; + } + } + } + + // Skip disabled probes + float probeState = LoadDDGIProbeState(DDGI, ProbesState, probeIndex); + if (probeState == DDGI_PROBE_STATE_INACTIVE) + skip = true; + + // Calculate octahedral projection for probe (unwraps spherical projection into a square) + float2 octahedralCoords = GetOctahedralCoords(DispatchThreadId.xy, DDGI_PROBE_RESOLUTION); + float3 octahedralDirection = GetOctahedralDirection(octahedralCoords); + + // Load trace rays results into shared memory to reuse across whole thread group + uint count = (uint)(ceil((float)(DDGI_TRACE_RAYS_LIMIT) / (float)(DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION))); + for (uint i = 0; i < count; i++) + { + uint rayIndex = (GroupIndex * count) + i; + if (rayIndex >= DDGI.RaysCount) + break; + CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, probeIndex)]; + CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex); + } + GroupMemoryBarrierWithGroupSync(); + + // TODO: optimize probes updating to build indirect dispatch args and probes indices list before tracing rays and blending irradiance/distance + if (skip) + { + // Clear probe + //RWOutput[outputCoords] = float4(0, 0, 0, 0); + return; + } + + // Loop over rays + float4 result = float4(0, 0, 0, 0); +#if DDGI_PROBE_UPDATE_MODE == 0 + uint backfacesCount = 0; + uint backfacesLimit = uint(DDGI.RaysCount * 0.1f); +#else + float distanceLimit = length(DDGI.ProbesSpacing) * 1.5f; +#endif + LOOP + for (uint rayIndex = 0; rayIndex < DDGI.RaysCount; rayIndex++) + { + float3 rayDirection = CachedProbesTraceDirection[rayIndex]; + float rayWeight = max(dot(octahedralDirection, rayDirection), 0.0f); + float4 rayRadiance = CachedProbesTraceRadiance[rayIndex]; + +#if DDGI_PROBE_UPDATE_MODE == 0 + if (rayRadiance.w < 0.0f) + { + // Count backface hits + backfacesCount++; + + // Skip further blending after reaching backfaces limit + if (backfacesCount >= backfacesLimit) + return; + continue; + } + + // Add radiance (RGB) and weight (A) + result += float4(rayRadiance.rgb * rayWeight, rayWeight); +#else + // Increase reaction speed for depth discontinuities + rayWeight = pow(rayWeight, 4.0f); + + // Add distance (R), distance^2 (G) and weight (A) + float rayDistance = min(abs(rayRadiance.w), distanceLimit); + result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight); +#endif + } + + // Normalize results + float epsilon = (float)DDGI.RaysCount * 1e-9f; + result.rgb *= 1.0f / (2.0f * max(result.a, epsilon)); + + // Blend current value with the previous probe data + float3 previous = RWOutput[outputCoords].rgb; + float historyWeight = DDGI.ProbeHistoryWeight; + if (dot(previous, previous) == 0) + { + // Cut any blend from zero + historyWeight = 0.0f; + } +#if DDGI_PROBE_UPDATE_MODE == 0 + result *= IndirectLightingIntensity; +#if DDGI_SRGB_BLENDING + result.rgb = pow(result.rgb, 1.0f / DDGI.IrradianceGamma); +#endif + float3 irradianceDelta = result.rgb - previous.rgb; + float irradianceDeltaMax = Max3(abs(irradianceDelta)); + if (irradianceDeltaMax > 0.25f) + { + // Reduce history weight after significant lighting change + historyWeight = max(historyWeight - 0.2f, 0.0f); + } + if (irradianceDeltaMax > 0.8f) + { + // Reduce flickering during rapid brightness changes + result.rgb = previous.rgb + (irradianceDelta * 0.25f); + } + float3 resultDelta = (1.0f - historyWeight) * irradianceDelta; + if (Max3(result.rgb) < Max3(previous.rgb)) + resultDelta = min(max(abs(resultDelta), 1.0f / 1024.0f), abs(irradianceDelta)) * sign(resultDelta); + result = float4(previous.rgb + resultDelta, 1.0f); +#else + result = float4(lerp(result.rg, previous.rg, historyWeight), 0.0f, 1.0f); +#endif + + RWOutput[outputCoords] = result; +} + +// Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering) +META_CS(true, FEATURE_LEVEL_SM5) +META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=1) +META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=0) +META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=1) +META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=0) +[numthreads(DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, 1)] +void CS_UpdateBorders(uint3 DispatchThreadId : SV_DispatchThreadID) +{ + uint probeSideLength = DDGI_PROBE_RESOLUTION + 2; + uint probeSideLengthMinusOne = probeSideLength - 1; + uint2 copyCoordinates = uint2(0, 0); + uint2 threadCoordinates = DispatchThreadId.xy; +#if BORDER_ROW + threadCoordinates.y *= probeSideLength; + uint corner = DispatchThreadId.x % probeSideLength; +#else + threadCoordinates.x *= probeSideLength; + uint corner = threadCoordinates.y % probeSideLength; +#endif + if (corner == 0 || corner == probeSideLengthMinusOne) + { +#if !BORDER_ROW + // Left corner + copyCoordinates.x = threadCoordinates.x + DDGI_PROBE_RESOLUTION; + copyCoordinates.y = threadCoordinates.y - sign(corner - 1) * DDGI_PROBE_RESOLUTION; + RWOutput[threadCoordinates] = RWOutput[copyCoordinates]; + + // Right corner + threadCoordinates.x += probeSideLengthMinusOne; + copyCoordinates.x = threadCoordinates.x - DDGI_PROBE_RESOLUTION; + RWOutput[threadCoordinates] = RWOutput[copyCoordinates]; +#endif + return; + } + +#if BORDER_ROW + // Top row + uint probeStart = uint(threadCoordinates.x / probeSideLength) * probeSideLength; + uint offset = probeSideLengthMinusOne - (threadCoordinates.x % probeSideLength); + copyCoordinates = uint2(probeStart + offset, threadCoordinates.y + 1); +#else + // Left column + uint probeStart = uint(threadCoordinates.y / probeSideLength) * probeSideLength; + uint offset = probeSideLengthMinusOne - (threadCoordinates.y % probeSideLength); + copyCoordinates = uint2(threadCoordinates.x + 1, probeStart + offset); +#endif + RWOutput[threadCoordinates] = RWOutput[copyCoordinates]; + +#if BORDER_ROW + // Bottom row + threadCoordinates.y += probeSideLengthMinusOne; + copyCoordinates = uint2(probeStart + offset, threadCoordinates.y - 1); +#else + // Right column + threadCoordinates.x += probeSideLengthMinusOne; + copyCoordinates = uint2(threadCoordinates.x - 1, probeStart + offset); +#endif + RWOutput[threadCoordinates] = RWOutput[copyCoordinates]; +} + +#endif + +#ifdef _PS_IndirectLighting + +#include "./Flax/GBuffer.hlsl" +#include "./Flax/LightingCommon.hlsl" + +Texture2D ProbesState : register(t4); +Texture2D ProbesDistance : register(t5); +Texture2D ProbesIrradiance : register(t6); + +// Pixel shader for drawing indirect lighting in fullscreen +META_PS(true, FEATURE_LEVEL_SM5) +void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0) +{ + output = 0; + + // Sample GBuffer + GBufferSample gBuffer = SampleGBuffer(GBuffer, input.TexCoord); + + // Check if cannot shadow pixel + BRANCH + if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT) + { + discard; + return; + } + + // Sample irradiance + float bias = 1.0f; + float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias); + + // Calculate lighting + float3 diffuseColor = GetDiffuseColor(gBuffer); + float3 diffuse = Diffuse_Lambert(diffuseColor); + output = float4(diffuse * irradiance, 1); +} + +#endif diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl b/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl index b1ea4d5d5..6d1d0adbb 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.hlsl @@ -7,7 +7,7 @@ #define GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION 40 // Amount of chunks (in each direction) to split atlas draw distance for objects culling #define GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE 4 #define GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE 5 // Amount of float4s per-tile -#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD 0.1f // Cut-off value for tiles transitions blending during sampling +#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD 0.05f // Cut-off value for tiles transitions blending during sampling #define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes) struct GlobalSurfaceTile diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index b9e805e80..c36f99b8f 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -267,7 +267,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target //return float4(hit.HitNormal * 0.5f + 0.5f, 1); // Sample Global Surface Atlas at the hit location - float surfaceThreshold = hit.HitCascade * 10.0f + 20.0f; // Scale the threshold based on the hit cascade (less precision) + float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(hit); float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold); return float4(surfaceColor.rgb, 1); } diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index f851fc192..0d561db69 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -45,6 +45,7 @@ struct GlobalSDFHit float HitTime; uint HitCascade; uint StepsCount; + float HitSDF; bool IsHit() { @@ -180,6 +181,7 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex[4] // Surface hit hit.HitTime = max(stepTime + stepDistance - minSurfaceThickness, 0.0f); hit.HitCascade = cascade; + hit.HitSDF = stepDistance; if (trace.NeedsHitNormal) { // Calculate hit normal from SDF gradient @@ -202,3 +204,10 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex[4] } return hit; } + +// Calculates the surface threshold for Global Surface Atlas sampling which matches the Global SDF trace to reduce artifacts +float GetGlobalSurfaceAtlasThreshold(GlobalSDFHit hit) +{ + // Scale the threshold based on the hit cascade (less precision) + return hit.HitCascade * 10.0f + 20.0f; +}