Files
FlaxEngine/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp
2024-07-03 08:51:58 +02:00

741 lines
35 KiB
C++

// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
#include "DynamicDiffuseGlobalIllumination.h"
#include "GlobalSurfaceAtlasPass.h"
#include "../GlobalSignDistanceFieldPass.h"
#include "../RenderList.h"
#include "Engine/Core/Random.h"
#include "Engine/Core/Types/Variant.h"
#include "Engine/Core/Math/Int3.h"
#include "Engine/Core/Math/Matrix3x3.h"
#include "Engine/Core/Math/Quaternion.h"
#include "Engine/Core/Config/GraphicsSettings.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Content/Content.h"
#include "Engine/Debug/DebugDraw.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/Graphics.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/RenderBuffers.h"
#include "Engine/Graphics/RenderTargetPool.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Level/Actors/BrushMode.h"
#include "Engine/Renderer/GBufferPass.h"
// Implementation based on:
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Probes", Journal of Computer Graphics Tools, April 2019
// Zander Majercik, Jean-Philippe Guertin, Derek Nowrouzezahrai, and Morgan McGuire
// https://morgan3d.github.io/articles/2019-04-01-ddgi/index.html and https://gdcvault.com/play/1026182/
//
// Additional references:
// "Scaling Probe-Based Real-Time Dynamic Global Illumination for Production", https://jcgt.org/published/0010/02/01/
// "Dynamic Diffuse Global Illumination with Ray-Traced Irradiance Fields", https://jcgt.org/published/0008/02/01/
// This must match HLSL
#define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending
#define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
GPU_CB_STRUCT(Data0 {
DynamicDiffuseGlobalIlluminationPass::ConstantsData DDGI;
GlobalSignDistanceFieldPass::ConstantsData GlobalSDF;
GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas;
ShaderGBufferData GBuffer;
float Padding0;
uint32 ProbesCount;
float ResetBlend;
float TemporalTime;
Int4 ProbeScrollClears[4];
});
GPU_CB_STRUCT(Data1 {
// TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD
Float2 Padding1;
uint32 CascadeIndex;
uint32 ProbeIndexOffset;
});
class DDGICustomBuffer : public RenderBuffers::CustomBuffer
{
public:
struct
{
Float3 ProbesOrigin;
float ProbesSpacing = 0.0f;
Int3 ProbeScrollOffsets;
Int3 ProbeScrollClears;
void Clear()
{
ProbesOrigin = Float3::Zero;
ProbeScrollOffsets = Int3::Zero;
ProbeScrollClears = Int3::Zero;
}
} Cascades[4];
int32 CascadesCount = 0;
int32 ProbeRaysCount = 0;
int32 ProbesCountTotal = 0;
Int3 ProbeCounts = Int3::Zero;
GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance)
GPUTexture* ProbesData = nullptr; // Probes data: (RGB: world-space offset, A: state/data)
GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color)
GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2)
GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0
GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend)
DynamicDiffuseGlobalIlluminationPass::BindingData Result;
FORCE_INLINE void Release()
{
RenderTargetPool::Release(ProbesTrace);
RenderTargetPool::Release(ProbesData);
RenderTargetPool::Release(ProbesIrradiance);
RenderTargetPool::Release(ProbesDistance);
SAFE_DELETE_GPU_RESOURCE(ActiveProbes);
SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs);
}
~DDGICustomBuffer()
{
Release();
}
};
void CalculateVolumeRandomRotation(Matrix3x3& matrix)
{
// Reference: James Arvo's algorithm Graphics Gems 3 (pages 117-120)
// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.53.1357&rep=rep1&type=pdf
float u1 = TWO_PI * Random::Rand();
float cos1 = Math::Cos(u1);
float sin1 = Math::Sin(u1);
float u2 = TWO_PI * Random::Rand();
float cos2 = Math::Cos(u2);
float sin2 = Math::Sin(u2);
float u3 = Random::Rand();
float sq3 = 2.0f * sqrtf(u3 * (1.0f - u3));
float s2 = 2.0f * u3 * sin2 * sin2 - 1.0f;
float c2 = 2.0f * u3 * cos2 * cos2 - 1.0f;
float sc = 2.0f * u3 * sin2 * cos2;
matrix.M11 = cos1 * c2 - sin1 * sc;
matrix.M12 = sin1 * c2 + cos1 * sc;
matrix.M13 = sq3 * cos2;
matrix.M21 = cos1 * sc - sin1 * s2;
matrix.M22 = sin1 * sc + cos1 * s2;
matrix.M23 = sq3 * sin2;
matrix.M31 = cos1 * (sq3 * cos2) - sin1 * (sq3 * sin2);
matrix.M32 = sin1 * (sq3 * cos2) + cos1 * (sq3 * sin2);
matrix.M33 = 1.0f - 2.0f * u3;
}
String DynamicDiffuseGlobalIlluminationPass::ToString() const
{
return TEXT("DynamicDiffuseGlobalIlluminationPass");
}
bool DynamicDiffuseGlobalIlluminationPass::Init()
{
// Check platform support
const auto device = GPUDevice::Instance;
_supported = device->GetFeatureLevel() >= FeatureLevel::SM5 && device->Limits.HasCompute && device->Limits.HasTypedUAVLoad;
return false;
}
bool DynamicDiffuseGlobalIlluminationPass::setupResources()
{
if (!_supported)
return true;
// Load shader
if (!_shader)
{
_shader = Content::LoadAsyncInternal<Shader>(TEXT("Shaders/GI/DDGI"));
if (_shader == nullptr)
return true;
#if COMPILE_WITH_DEV_ENV
_shader.Get()->OnReloading.Bind<DynamicDiffuseGlobalIlluminationPass, &DynamicDiffuseGlobalIlluminationPass::OnShaderReloading>(this);
#endif
}
if (!_shader->IsLoaded())
return true;
// Initialize resources
const auto shader = _shader->GetShader();
_cb0 = shader->GetCB(0);
_cb1 = shader->GetCB(1);
if (!_cb0 || !_cb1)
return true;
_csClassify = shader->GetCS("CS_Classify");
_csUpdateProbesInitArgs = shader->GetCS("CS_UpdateProbesInitArgs");
_csTraceRays[0] = shader->GetCS("CS_TraceRays", 0);
_csTraceRays[1] = shader->GetCS("CS_TraceRays", 1);
_csTraceRays[2] = shader->GetCS("CS_TraceRays", 2);
_csTraceRays[3] = shader->GetCS("CS_TraceRays", 3);
_csUpdateProbesIrradiance = shader->GetCS("CS_UpdateProbes", 0);
_csUpdateProbesDistance = shader->GetCS("CS_UpdateProbes", 1);
_csUpdateBordersIrradianceRow = shader->GetCS("CS_UpdateBorders", 0);
_csUpdateBordersIrradianceCollumn = shader->GetCS("CS_UpdateBorders", 1);
_csUpdateBordersDistanceRow = shader->GetCS("CS_UpdateBorders", 2);
_csUpdateBordersDistanceCollumn = shader->GetCS("CS_UpdateBorders", 3);
auto device = GPUDevice::Instance;
auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle;
if (!_psIndirectLighting)
{
_psIndirectLighting = device->CreatePipelineState();
psDesc.PS = shader->GetPS("PS_IndirectLighting");
psDesc.BlendMode = BlendingMode::Add;
if (_psIndirectLighting->Init(psDesc))
return true;
}
return false;
}
#if COMPILE_WITH_DEV_ENV
void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj)
{
LastFrameShaderReload = Engine::FrameCount;
_csClassify = nullptr;
_csUpdateProbesInitArgs = nullptr;
_csTraceRays[0] = nullptr;
_csTraceRays[1] = nullptr;
_csTraceRays[2] = nullptr;
_csTraceRays[3] = nullptr;
_csUpdateProbesIrradiance = nullptr;
_csUpdateProbesDistance = nullptr;
_csUpdateBordersIrradianceRow = nullptr;
_csUpdateBordersIrradianceCollumn = nullptr;
_csUpdateBordersDistanceRow = nullptr;
_csUpdateBordersDistanceCollumn = nullptr;
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
invalidateResources();
}
#endif
void DynamicDiffuseGlobalIlluminationPass::Dispose()
{
RendererPass::Dispose();
// Cleanup
_cb0 = nullptr;
_cb1 = nullptr;
_shader = nullptr;
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
#if USE_EDITOR
_debugModel = nullptr;
_debugMaterial = nullptr;
#endif
}
bool DynamicDiffuseGlobalIlluminationPass::Get(const RenderBuffers* buffers, BindingData& result)
{
auto* ddgiData = buffers ? buffers->FindCustomBuffer<DDGICustomBuffer>(TEXT("DDGI")) : nullptr;
if (ddgiData && ddgiData->LastFrameUsed + 1 >= Engine::FrameCount) // Allow to use data from the previous frame (eg. particles in Editor using the Editor viewport in Game viewport - Game render task runs first)
{
result = ddgiData->Result;
return false;
}
return true;
}
bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderContext, GPUContext* context, DDGICustomBuffer& ddgiData)
{
// Render Global SDF and Global Surface Atlas for software raytracing
GlobalSignDistanceFieldPass::BindingData bindingDataSDF;
if (GlobalSignDistanceFieldPass::Instance()->Render(renderContext, context, bindingDataSDF))
return true;
GlobalSurfaceAtlasPass::BindingData bindingDataSurfaceAtlas;
if (GlobalSurfaceAtlasPass::Instance()->Render(renderContext, context, bindingDataSurfaceAtlas))
return true;
GPUTextureView* skybox = GBufferPass::Instance()->RenderSkybox(renderContext, context);
// Setup options
auto& settings = renderContext.List->Settings.GlobalIllumination;
auto* graphicsSettings = GraphicsSettings::Get();
const float probesSpacing = Math::Clamp(graphicsSettings->GIProbesSpacing, 10.0f, 1000.0f); // GI probes placement spacing nearby camera (for closest cascade; gets automatically reduced for further cascades)
int32 probeRaysCount; // Amount of rays to trace randomly around each probe
switch (Graphics::GIQuality) // Ensure to match CS_TraceRays permutations
{
case Quality::Low:
probeRaysCount = 96;
break;
case Quality::Medium:
probeRaysCount = 128;
break;
case Quality::High:
probeRaysCount = 192;
break;
case Quality::Ultra:
probeRaysCount = 256;
break;
default:
return true;
}
ASSERT_LOW_LAYER(probeRaysCount <= DDGI_TRACE_RAYS_LIMIT);
const float indirectLightingIntensity = settings.Intensity;
const float probeHistoryWeight = Math::Clamp(settings.TemporalResponse, 0.0f, 0.98f);
const float distance = settings.Distance;
const Color fallbackIrradiance = settings.FallbackIrradiance;
// Automatically calculate amount of cascades to cover the GI distance at the current probes spacing
const int32 idealProbesCount = 20; // Ideal amount of probes per-cascade to try to fit in order to cover whole distance
int32 cascadesCount = 1;
float idealDistance = idealProbesCount * probesSpacing;
while (cascadesCount < 4 && idealDistance < distance)
{
idealDistance *= 2;
cascadesCount++;
}
// Calculate the probes count based on the amount of cascades and the distance to cover
const float cascadesDistanceScales[] = { 1.0f, 3.0f, 6.0f, 10.0f }; // Scales each cascade further away from the camera origin
const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1];
const float verticalRangeScale = 0.8f; // Scales the probes volume size at Y axis (horizontal aspect ratio makes the DDGI use less probes vertically to cover whole screen)
Int3 probesCounts(Float3::Ceil(Float3(distanceExtent, distanceExtent * verticalRangeScale, distanceExtent) / probesSpacing));
const int32 maxProbeSize = Math::Max(DDGI_PROBE_RESOLUTION_IRRADIANCE, DDGI_PROBE_RESOLUTION_DISTANCE) + 2;
const int32 maxTextureSize = Math::Min(GPUDevice::Instance->Limits.MaximumTexture2DSize, GPU_MAX_TEXTURE_SIZE);
while (probesCounts.X * probesCounts.Y * maxProbeSize > maxTextureSize
|| probesCounts.Z * cascadesCount * maxProbeSize > maxTextureSize)
{
// Decrease quality to ensure the probes texture won't overflow
probesCounts -= 1;
}
// Initialize cascades
float probesSpacings[4];
Float3 viewOrigins[4];
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
// Each cascade has higher spacing between probes
float cascadeDistanceScale = cascadesDistanceScales[cascadeIndex];
float cascadeProbesSpacing = probesSpacing * cascadeDistanceScale;
probesSpacings[cascadeIndex] = cascadeProbesSpacing;
// Calculate view origin for cascade by shifting it towards the view direction to account for better view frustum coverage
Float3 viewOrigin = renderContext.View.Position;
Float3 viewDirection = renderContext.View.Direction;
const Float3 probesDistance = Float3(probesCounts) * cascadeProbesSpacing;
const float probesDistanceMax = probesDistance.MaxValue();
const Float3 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance);
const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.6f;
viewOrigin += viewDirection * viewOriginOffset;
const float viewOriginSnapping = cascadeProbesSpacing;
viewOrigin = Float3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping;
//viewOrigin = Float3::Zero;
viewOrigins[cascadeIndex] = viewOrigin;
}
// Init buffers
const int32 probesCountCascade = probesCounts.X * probesCounts.Y * probesCounts.Z;
const int32 probesCountTotal = probesCountCascade * cascadesCount;
if (probesCountTotal == 0 || indirectLightingIntensity <= ZeroTolerance)
return true;
int32 probesCountCascadeX = probesCounts.X * probesCounts.Y;
int32 probesCountCascadeY = probesCounts.Z;
int32 probesCountTotalX = probesCountCascadeX;
int32 probesCountTotalY = probesCountCascadeY * cascadesCount;
bool clear = false;
if (ddgiData.CascadesCount != cascadesCount || Math::NotNearEqual(ddgiData.Cascades[0].ProbesSpacing, probesSpacing) || ddgiData.ProbeCounts != probesCounts || ddgiData.ProbeRaysCount != probeRaysCount)
{
PROFILE_CPU_NAMED("Init");
ddgiData.Release();
ddgiData.CascadesCount = cascadesCount;
ddgiData.ProbeRaysCount = probeRaysCount;
ddgiData.ProbesCountTotal = probesCountTotal;
ddgiData.ProbeCounts = probesCounts;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = ddgiData.Cascades[cascadeIndex];
cascade.Clear();
cascade.ProbesSpacing = probesSpacings[cascadeIndex];
cascade.ProbesOrigin = viewOrigins[cascadeIndex];
}
// Allocate probes textures
uint64 memUsage = 0;
auto desc = GPUTextureDescription::New2D(probesCountTotalX, probesCountTotalY, PixelFormat::Unknown);
#define INIT_TEXTURE(texture, format, width, height) desc.Format = format; desc.Width = width; desc.Height = height; ddgiData.texture = RenderTargetPool::Get(desc); if (!ddgiData.texture) return true; memUsage += ddgiData.texture->GetMemoryUsage(); RENDER_TARGET_POOL_SET_NAME(ddgiData.texture, "DDGI." #texture)
desc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess;
INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, Math::Min(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT));
INIT_TEXTURE(ProbesData, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY);
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
#undef INIT_TEXTURE
#define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage();
GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess);
INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes");
desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32));
INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs");
#undef INIT_BUFFER
LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCountTotal);
clear = true;
}
#if COMPILE_WITH_DEV_ENV
clear |= ddgiData.LastFrameUsed <= LastFrameShaderReload;
#endif
if (clear)
{
// Clear probes
PROFILE_GPU("Clear");
context->ClearUA(ddgiData.ProbesData, Float4::Zero);
context->ClearUA(ddgiData.ProbesIrradiance, Float4::Zero);
context->ClearUA(ddgiData.ProbesDistance, Float4::Zero);
}
ddgiData.LastFrameUsed = Engine::FrameCount;
// Calculate which cascades should be updated this frame
const uint64 cascadeFrequencies[] = { 2, 3, 5, 7 };
//const uint64 cascadeFrequencies[] = { 1, 2, 3, 5 };
//const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 };
//const uint64 cascadeFrequencies[] = { 10, 10, 10, 10 };
bool cascadeSkipUpdate[4];
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
cascadeSkipUpdate[cascadeIndex] = !clear && (ddgiData.LastFrameUsed % cascadeFrequencies[cascadeIndex]) != 0 && GPU_SPREAD_WORKLOAD;
}
// Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement)
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
if (cascadeSkipUpdate[cascadeIndex])
continue;
auto& cascade = ddgiData.Cascades[cascadeIndex];
// Calculate the count of grid cells between the view origin and the scroll anchor
const Float3 volumeOrigin = cascade.ProbesOrigin + Float3(cascade.ProbeScrollOffsets) * cascade.ProbesSpacing;
const Float3 translation = viewOrigins[cascadeIndex] - volumeOrigin;
for (int32 axis = 0; axis < 3; axis++)
{
const float value = translation.Raw[axis] / cascade.ProbesSpacing;
const int32 scroll = value >= 0.0f ? (int32)Math::Floor(value) : (int32)Math::Ceil(value);
cascade.ProbeScrollOffsets.Raw[axis] += scroll;
cascade.ProbeScrollClears.Raw[axis] = scroll;
}
// Shift the volume origin based on scroll offsets for each axis once it overflows
for (int32 axis = 0; axis < 3; axis++)
{
const int32 probeCount = ddgiData.ProbeCounts.Raw[axis];
int32& scrollOffset = cascade.ProbeScrollOffsets.Raw[axis];
while (scrollOffset >= probeCount)
{
cascade.ProbesOrigin.Raw[axis] += cascade.ProbesSpacing * probeCount;
scrollOffset -= probeCount;
}
while (scrollOffset <= -probeCount)
{
cascade.ProbesOrigin.Raw[axis] -= cascade.ProbesSpacing * probeCount;
scrollOffset += probeCount;
}
}
}
// Upload constants
{
ddgiData.Result.Constants.CascadesCount = cascadesCount;
ddgiData.Result.Constants.ProbesCounts[0] = probesCounts.X;
ddgiData.Result.Constants.ProbesCounts[1] = probesCounts.Y;
ddgiData.Result.Constants.ProbesCounts[2] = probesCounts.Z;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = ddgiData.Cascades[cascadeIndex];
ddgiData.Result.Constants.ProbesOriginAndSpacing[cascadeIndex] = Float4(cascade.ProbesOrigin, cascade.ProbesSpacing);
ddgiData.Result.Constants.ProbesScrollOffsets[cascadeIndex] = Int4(cascade.ProbeScrollOffsets, 0);
}
ddgiData.Result.Constants.RayMaxDistance = distance;
ddgiData.Result.Constants.ViewPos = renderContext.View.Position;
ddgiData.Result.Constants.RaysCount = probeRaysCount;
ddgiData.Result.Constants.ProbeHistoryWeight = probeHistoryWeight;
ddgiData.Result.Constants.IrradianceGamma = 1.5f;
ddgiData.Result.Constants.IndirectLightingIntensity = indirectLightingIntensity;
ddgiData.Result.Constants.FallbackIrradiance = fallbackIrradiance.ToFloat3() * fallbackIrradiance.A;
ddgiData.Result.ProbesData = ddgiData.ProbesData->View();
ddgiData.Result.ProbesDistance = ddgiData.ProbesDistance->View();
ddgiData.Result.ProbesIrradiance = ddgiData.ProbesIrradiance->View();
// Compute random rotation matrix for probe rays orientation (randomized every frame)
Matrix3x3 raysRotationMatrix;
CalculateVolumeRandomRotation(raysRotationMatrix);
Quaternion& raysRotation = *(Quaternion*)&ddgiData.Result.Constants.RaysRotation;
Quaternion::RotationMatrix(raysRotationMatrix, raysRotation);
raysRotation.Conjugate();
Data0 data;
data.DDGI = ddgiData.Result.Constants;
data.GlobalSDF = bindingDataSDF.Constants;
data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants;
data.ProbesCount = data.DDGI.ProbesCounts[0] * data.DDGI.ProbesCounts[1] * data.DDGI.ProbesCounts[2];
data.ResetBlend = clear ? 1.0f : 0.0f;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = ddgiData.Cascades[cascadeIndex];
data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0);
}
data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f;
GBufferPass::SetInputs(renderContext.View, data.GBuffer);
context->UpdateCB(_cb0, &data);
context->BindCB(0, _cb0);
}
// Update probes
{
PROFILE_GPU_CPU_NAMED("Probes Update");
bool anyDirty = false;
uint32 threadGroupsX, threadGroupsY;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
if (cascadeSkipUpdate[cascadeIndex])
continue;
anyDirty = true;
// Classify probes (activation/deactivation and relocation)
{
PROFILE_GPU_CPU_NAMED("Classify Probes");
uint32 activeProbesCount = 0;
context->UpdateBuffer(ddgiData.ActiveProbes, &activeProbesCount, sizeof(uint32), 0);
threadGroupsX = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindUA(0, ddgiData.Result.ProbesData);
context->BindUA(1, ddgiData.ActiveProbes->View());
Data1 data;
data.CascadeIndex = cascadeIndex;
context->UpdateCB(_cb1, &data);
context->BindCB(1, _cb1);
context->Dispatch(_csClassify, threadGroupsX, 1, 1);
context->ResetUA();
context->ResetSR();
}
// Build indirect args for probes updating (loop over active-only probes)
{
PROFILE_GPU_CPU_NAMED("Init Args");
context->BindSR(0, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.UpdateProbesInitArgs->View());
context->Dispatch(_csUpdateProbesInitArgs, 1, 1, 1);
context->ResetUA();
}
// Update probes in batches so ProbesTrace texture can be smaller
uint32 arg = 0;
for (int32 probesOffset = 0; probesOffset < probesCountCascade; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)
{
Data1 data;
data.CascadeIndex = cascadeIndex;
data.ProbeIndexOffset = probesOffset;
context->UpdateCB(_cb1, &data);
context->BindCB(1, _cb1);
// Trace rays from probes
{
PROFILE_GPU_CPU_NAMED("Trace Rays");
// Global SDF with Global Surface Atlas software raytracing (thread X - per probe ray, thread Y - per probe)
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindSR(2, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(3, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(4, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr);
context->BindSR(5, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(6, bindingDataSurfaceAtlas.AtlasLighting->View());
context->BindSR(7, ddgiData.Result.ProbesData);
context->BindSR(8, skybox);
context->BindSR(9, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.ProbesTrace->View());
context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg);
context->ResetUA();
context->ResetSR();
}
// Update probes irradiance and distance textures (one thread-group per probe)
{
PROFILE_GPU_CPU_NAMED("Update Probes");
context->BindSR(0, ddgiData.Result.ProbesData);
context->BindSR(1, ddgiData.ProbesTrace->View());
context->BindSR(2, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg);
context->BindUA(0, ddgiData.Result.ProbesDistance);
context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg);
context->ResetUA();
context->ResetSR();
}
arg += sizeof(GPUDispatchIndirectArgs);
}
}
// Update probes border pixels
if (anyDirty)
{
PROFILE_GPU_CPU_NAMED("Update Borders");
// Irradiance
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1);
// Distance
context->BindUA(0, ddgiData.Result.ProbesDistance);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1);
threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE);
context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1);
context->ResetUA();
context->ResetSR();
}
}
return false;
}
bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, GPUContext* context, GPUTextureView* lightBuffer)
{
if (checkIfSkipPass())
return true;
if (renderContext.List->Scenes.Count() == 0)
return true;
RenderBuffers* renderBuffers = renderContext.Buffers;
bool render = true;
if (renderContext.View.IsOfflinePass)
{
// During offline pass (eg. probes rendering) we can try reuse main game viewport or editor viewport DDGI probes
// TODO: apply it for transparency too (in DynamicDiffuseGlobalIlluminationPass::Get)
for (auto* task : RenderTask::Tasks)
{
if (auto* sceneTask = ScriptingObject::Cast<SceneRenderTask>(task))
{
auto* sceneTaskDDGI = sceneTask->Buffers ? sceneTask->Buffers->FindCustomBuffer<DDGICustomBuffer>(TEXT("DDGI")) : nullptr;
if (sceneTaskDDGI && sceneTaskDDGI->LastFrameUsed + 1 >= Engine::FrameCount)
{
// Reuse DDGI from this task
renderBuffers = sceneTask->Buffers;
render = false;
break;
}
}
}
}
auto& ddgiData = *renderBuffers->GetCustomBuffer<DDGICustomBuffer>(TEXT("DDGI"));
if (render && ddgiData.LastFrameUsed == Engine::FrameCount)
render = false;
PROFILE_GPU_CPU("Dynamic Diffuse Global Illumination");
if (render)
{
if (RenderInner(renderContext, context, ddgiData))
{
context->ResetRenderTarget();
context->ResetSR();
context->ResetUA();
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
return true;
}
}
// Render indirect lighting
if (lightBuffer)
{
PROFILE_GPU_CPU_NAMED("Indirect Lighting");
#if 0
// DDGI indirect lighting debug preview
context->Clear(lightBuffer, Color::Transparent);
#endif
if (!render)
{
Data0 data;
data.DDGI = ddgiData.Result.Constants;
data.TemporalTime = 0.0f;
GBufferPass::SetInputs(renderContext.View, data.GBuffer);
context->UpdateCB(_cb0, &data);
context->BindCB(0, _cb0);
}
context->BindSR(0, renderContext.Buffers->GBuffer0->View());
context->BindSR(1, renderContext.Buffers->GBuffer1->View());
context->BindSR(2, renderContext.Buffers->GBuffer2->View());
context->BindSR(3, renderContext.Buffers->DepthBuffer->View());
context->BindSR(4, ddgiData.Result.ProbesData);
context->BindSR(5, ddgiData.Result.ProbesDistance);
context->BindSR(6, ddgiData.Result.ProbesIrradiance);
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
context->SetRenderTarget(lightBuffer);
context->SetState(_psIndirectLighting);
context->DrawFullscreenTriangle();
}
#if USE_EDITOR
// Probes debug drawing
if (renderContext.View.Mode == ViewMode::GlobalIllumination && lightBuffer)
{
PROFILE_GPU_CPU_NAMED("Debug Probes");
if (!_debugModel)
_debugModel = Content::LoadAsyncInternal<Model>(TEXT("Editor/Primitives/Sphere"));
if (!_debugMaterial)
_debugMaterial = Content::LoadAsyncInternal<MaterialBase>(TEXT("Editor/DebugMaterials/DDGIDebugProbes"));
if (_debugModel && _debugModel->IsLoaded() && _debugModel->CanBeRendered() && _debugMaterial && _debugMaterial->IsLoaded())
{
RenderContext debugRenderContext(renderContext);
debugRenderContext.List = RenderList::GetFromPool();
debugRenderContext.View.Pass = DrawPass::GBuffer;
debugRenderContext.View.Prepare(debugRenderContext);
Matrix world;
Matrix::Scaling(Float3(0.2f), world);
const Mesh& debugMesh = _debugModel->LODs[0].Meshes[0];
for (int32 probeIndex = 0; probeIndex < ddgiData.ProbesCountTotal; probeIndex++)
debugMesh.Draw(debugRenderContext, _debugMaterial, world, StaticFlags::None, true, DrawPass::GBuffer, (float)probeIndex);
debugRenderContext.List->SortDrawCalls(debugRenderContext, false, DrawCallsListType::GBuffer);
context->SetViewportAndScissors(debugRenderContext.View.ScreenSize.X, debugRenderContext.View.ScreenSize.Y);
GPUTextureView* targetBuffers[5] =
{
lightBuffer,
renderContext.Buffers->GBuffer0->View(),
renderContext.Buffers->GBuffer1->View(),
renderContext.Buffers->GBuffer2->View(),
renderContext.Buffers->GBuffer3->View(),
};
context->SetRenderTarget(*renderContext.Buffers->DepthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
{
// Pass DDGI data to the material
_debugMaterial->SetParameterValue(TEXT("ProbesData"), Variant(ddgiData.ProbesData));
_debugMaterial->SetParameterValue(TEXT("ProbesIrradiance"), Variant(ddgiData.ProbesIrradiance));
_debugMaterial->SetParameterValue(TEXT("ProbesDistance"), Variant(ddgiData.ProbesDistance));
auto cb = _debugMaterial->GetShader()->GetCB(3);
if (cb)
{
context->UpdateCB(cb, &ddgiData.Result.Constants);
context->BindCB(3, cb);
}
}
debugRenderContext.List->ExecuteDrawCalls(debugRenderContext, DrawCallsListType::GBuffer);
RenderList::ReturnToPool(debugRenderContext.List);
}
}
#endif
context->ResetRenderTarget();
context->ResetSR();
context->ResetUA();
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
return false;
}