diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index 49c91d5b4..67269d782 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78c41dd01f6f2740fbdd08c2e0f05875398f38ba8e09bd91495636281a6ec6cc -size 37799 +oid sha256:dc3bbdd9c663f9ba6b21c9f49a645c59ba4ecd340b9d046fe60aff26bab26b3a +size 39880 diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax index e240f1dd1..8d8ba1d43 100644 --- a/Content/Shaders/GI/DDGI.flax +++ b/Content/Shaders/GI/DDGI.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6472e0a74ad1b42770b0ae80b05fd1c7ff44de32bf4294e8e4b9f97be40535c5 -size 18517 +oid sha256:6ad0a077984ef2749700d6eb08bf1f6a23ea1fd84e4296128ecbe3d57994b007 +size 19306 diff --git a/Content/Shaders/SSR.flax b/Content/Shaders/SSR.flax index 7e249afb5..e2c77a59c 100644 --- a/Content/Shaders/SSR.flax +++ b/Content/Shaders/SSR.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd2d0c05638d8e4c6c307d70d57a29f2475c9c0d8dfb43f8b8ff09189e4a62bb -size 9341 +oid sha256:15dce5625a1a074796d700946e02d8cb16f3841eb55b0031ad60c77cac5ad783 +size 9342 diff --git a/Source/Engine/Graphics/PostProcessSettings.h b/Source/Engine/Graphics/PostProcessSettings.h index f896b14d0..f1c9a353f 100644 --- a/Source/Engine/Graphics/PostProcessSettings.h +++ b/Source/Engine/Graphics/PostProcessSettings.h @@ -202,13 +202,13 @@ DECLARE_SCRIPTING_TYPE_NO_SPAWN(AmbientOcclusionSettings); /// /// Ambient occlusion intensity. /// - API_FIELD(Attributes="DefaultValue(0.8f), Limit(0, 5.0f, 0.01f), EditorOrder(1), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Intensity)") + API_FIELD(Attributes="DefaultValue(0.8f), Limit(0, 10.0f, 0.01f), EditorOrder(1), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Intensity)") float Intensity = 0.8f; /// /// Ambient occlusion power. /// - API_FIELD(Attributes="DefaultValue(0.75f), Limit(0, 4.0f, 0.01f), EditorOrder(2), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Power)") + API_FIELD(Attributes="DefaultValue(0.75f), Limit(0, 10.0f, 0.01f), EditorOrder(2), PostProcessSetting((int)AmbientOcclusionSettingsOverride.Power)") float Power = 0.75f; /// diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index d8adfacd0..679df6254 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -12,6 +12,7 @@ #include "Engine/Engine/Engine.h" #include "Engine/Content/Content.h" #include "Engine/Debug/DebugDraw.h" +#include "Engine/Engine/Time.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/RenderBuffers.h" @@ -43,35 +44,51 @@ PACK_STRUCT(struct Data0 GlobalSignDistanceFieldPass::ConstantsData GlobalSDF; GlobalSurfaceAtlasPass::ConstantsData GlobalSurfaceAtlas; GBufferData GBuffer; - Vector2 Padding0; float ResetBlend; + float TemporalTime; float IndirectLightingIntensity; + float Padding0; + }); + +PACK_STRUCT(struct Data1 + { + Vector3 Padding1; + uint32 CascadeIndex; // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD }); class DDGICustomBuffer : public RenderBuffers::CustomBuffer { public: + struct + { + Vector3 ProbesOrigin; + float ProbesSpacing = 0.0f; + Int3 ProbeScrollOffsets; + Int3 ProbeScrollDirections; + bool ProbeScrollClear[3]; + + void Clear() + { + ProbesOrigin = Vector3::Zero; + ProbeScrollOffsets = Int3::Zero; + ProbeScrollDirections = Int3::Zero; + ProbeScrollClear[0] = false; + ProbeScrollClear[1] = false; + ProbeScrollClear[2] = false; + } + } Cascades[4]; + + int32 CascadesCount = 0; int32 ProbeRaysCount = 0; - float ProbesSpacing = 0.0f; Int3 ProbeCounts = Int3::Zero; - Vector3 ProbesOrigin; - Int3 ProbeScrollOffsets; - Int3 ProbeScrollDirections; - bool ProbeScrollClear[3]; GPUTexture* ProbesTrace = nullptr; // Probes ray tracing: (RGB: hit radiance, A: hit distance) GPUTexture* ProbesState = nullptr; // Probes state: (RGB: world-space offset, A: state) GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color) GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2) DynamicDiffuseGlobalIlluminationPass::BindingData Result; - FORCE_INLINE void Clear() + FORCE_INLINE void Release() { - ProbesOrigin = Vector3::Zero; - ProbeScrollOffsets = Int3::Zero; - ProbeScrollDirections = Int3::Zero; - ProbeScrollClear[0] = false; - ProbeScrollClear[1] = false; - ProbeScrollClear[2] = false; RenderTargetPool::Release(ProbesTrace); RenderTargetPool::Release(ProbesState); RenderTargetPool::Release(ProbesIrradiance); @@ -80,7 +97,7 @@ public: ~DDGICustomBuffer() { - Clear(); + Release(); } }; @@ -150,7 +167,8 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources() // Initialize resources const auto shader = _shader->GetShader(); _cb0 = shader->GetCB(0); - if (!_cb0) + _cb1 = shader->GetCB(1); + if (!_cb0 || !_cb1) return true; _csClassify = shader->GetCS("CS_Classify"); _csTraceRays = shader->GetCS("CS_TraceRays"); @@ -199,6 +217,7 @@ void DynamicDiffuseGlobalIlluminationPass::Dispose() // Cleanup _cb0 = nullptr; + _cb1 = nullptr; _csTraceRays = nullptr; _shader = nullptr; SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting); @@ -250,51 +269,77 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, // TODO: configurable via postFx settings (maybe use Global SDF distance?) const float indirectLightingIntensity = 1.0f; const float probeHistoryWeight = 0.8f; - const Vector3 giDistance(2000, 2000, 2000); // GI distance around the view (in each direction) - const float giResolution = 100.0f; // GI probes placement spacing - const Int3 probesCounts(Vector3::Ceil(giDistance / giResolution)); - const Vector3 probesDistance = Vector3(probesCounts) * giResolution; + const int32 cascadesCount = 4; // in range 1-4 + // TODO: use GI.Distance as a easier to adjust total distance and automatically calculate distanceExtent from it + const float distance = 20000.0f; // GI distance around the view (in each direction) + const float cascadesDistanceScales[] = { 1.0f, 3.0f, 6.0f, 10.0f }; // Scales each cascade further away from the camera origin + const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1]; + const float verticalRangeScale = 0.8f; // Scales the probes volume size at Y axis (horizontal aspect ratio makes the DDGI use less probes vertically to cover whole screen) + const float probesSpacing = 200.0f; // GI probes placement spacing nearby camera (for closest cascade; gets automatically reduced for further cascades) + const Int3 probesCounts(Vector3::Ceil(Vector3(distanceExtent, distanceExtent * verticalRangeScale, distanceExtent) / probesSpacing)); const int32 probeRaysCount = Math::Min(Math::AlignUp(256, DDGI_TRACE_RAYS_GROUP_SIZE_X), DDGI_TRACE_RAYS_LIMIT); // TODO: make it based on the GI Quality - // Calculate view origin - Vector3 viewOrigin = renderContext.View.Position; - Vector3 viewDirection = renderContext.View.Direction; - const float probesDistanceMax = probesDistance.MaxValue(); - const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance); - const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.8f; - viewOrigin += viewDirection * viewOriginOffset; - const float viewOriginSnapping = giResolution; - viewOrigin = Vector3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping; - //viewOrigin = Vector3::Zero; + // Initialize cascades + float probesSpacings[4]; + Vector3 viewOrigins[4]; + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) + { + // Each cascade has higher spacing between probes + float cascadeDistanceScale = cascadesDistanceScales[cascadeIndex]; + float cascadeProbesSpacing = probesSpacing * cascadeDistanceScale; + probesSpacings[cascadeIndex] = cascadeProbesSpacing; + + // Calculate view origin for cascade by shifting it towards the view direction to account for better view frustum coverage + Vector3 viewOrigin = renderContext.View.Position; + Vector3 viewDirection = renderContext.View.Direction; + const Vector3 probesDistance = Vector3(probesCounts) * cascadeProbesSpacing; + const float probesDistanceMax = probesDistance.MaxValue(); + const Vector2 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance); + const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.6f; + viewOrigin += viewDirection * viewOriginOffset; + const float viewOriginSnapping = cascadeProbesSpacing; + viewOrigin = Vector3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping; + //viewOrigin = Vector3::Zero; + viewOrigins[cascadeIndex] = viewOrigin; + } // Init buffers - const int32 probesCount = probesCounts.X * probesCounts.Y * probesCounts.Z; - if (probesCount == 0 || indirectLightingIntensity <= ZeroTolerance) + const int32 probesCountCascade = probesCounts.X * probesCounts.Y * probesCounts.Z; + const int32 probesCountTotal = probesCountCascade * cascadesCount; + if (probesCountTotal == 0 || indirectLightingIntensity <= ZeroTolerance) return true; - int32 probesCountX = probesCounts.X * probesCounts.Y; - int32 probesCountY = probesCounts.Z; + int32 probesCountCascadeX = probesCounts.X * probesCounts.Y; + int32 probesCountCascadeY = probesCounts.Z; + int32 probesCountTotalX = probesCountCascadeX; + int32 probesCountTotalY = probesCountCascadeY * cascadesCount; bool clear = false; - if (Math::NotNearEqual(ddgiData.ProbesSpacing, giResolution) || ddgiData.ProbeCounts != probesCounts || ddgiData.ProbeRaysCount != probeRaysCount) + if (ddgiData.CascadesCount != cascadesCount || Math::NotNearEqual(ddgiData.Cascades[0].ProbesSpacing, probesSpacing) || ddgiData.ProbeCounts != probesCounts || ddgiData.ProbeRaysCount != probeRaysCount) { PROFILE_CPU_NAMED("Init"); - ddgiData.Clear(); + ddgiData.Release(); + ddgiData.CascadesCount = cascadesCount; ddgiData.ProbeRaysCount = probeRaysCount; - ddgiData.ProbesSpacing = giResolution; ddgiData.ProbeCounts = probesCounts; - ddgiData.ProbesOrigin = viewOrigin; + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) + { + auto& cascade = ddgiData.Cascades[cascadeIndex]; + cascade.Clear(); + cascade.ProbesSpacing = probesSpacings[cascadeIndex]; + cascade.ProbesOrigin = viewOrigins[cascadeIndex]; + } // Allocate probes textures uint64 memUsage = 0; - auto desc = GPUTextureDescription::New2D(probesCountX, probesCountY, PixelFormat::Unknown); + auto desc = GPUTextureDescription::New2D(probesCountTotalX, probesCountTotalY, PixelFormat::Unknown); // TODO rethink probes data placement in memory -> what if we get [50x50x30] resolution? That's 75000 probes! Use sparse storage with active-only probes #define INIT_TEXTURE(texture, format, width, height) desc.Format = format; desc.Width = width; desc.Height = height; ddgiData.texture = RenderTargetPool::Get(desc); if (!ddgiData.texture) return true; memUsage += ddgiData.texture->GetMemoryUsage() desc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess; - INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, probesCount); - INIT_TEXTURE(ProbesState, PixelFormat::R16G16B16A16_Float, probesCountX, probesCountY); // TODO: optimize to a RGBA32 (pos offset can be normalized to [0-0.5] range of ProbesSpacing and packed with state flag) - INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); - INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); + INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, probesCountTotal); // TODO: limit to 4k probes for a single batch to trace + INIT_TEXTURE(ProbesState, PixelFormat::R16G16B16A16_Float, probesCountTotalX, probesCountTotalY); // TODO: optimize to a RGBA32 (pos offset can be normalized to [0-0.5] range of ProbesSpacing and packed with state flag) + INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2)); + INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2)); #undef INIT_TEXTURE - LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCount); + LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCountTotal); clear = true; } #if USE_EDITOR @@ -309,46 +354,62 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, context->ClearUA(ddgiData.ProbesDistance, Vector4::Zero); } - // Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement) + // Calculate which cascades should be updated this frame + //const uint64 cascadeFrequencies[] = { 1, 2, 3, 5 }; + // TODO: prevent updating 2 cascades at once on Low quality + const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 }; + bool cascadeSkipUpdate[4]; + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { - + cascadeSkipUpdate[cascadeIndex] = !clear && (currentFrame % cascadeFrequencies[cascadeIndex]) != 0; + } + + // Compute scrolling (probes are placed around camera but are scrolling to increase stability during movement) + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) + { + if (cascadeSkipUpdate[cascadeIndex]) + continue; + auto& cascade = ddgiData.Cascades[cascadeIndex]; + // Reset the volume origin and scroll offsets for each axis for (int32 axis = 0; axis < 3; axis++) { - if (ddgiData.ProbeScrollOffsets.Raw[axis] != 0 && (ddgiData.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0)) + if (cascade.ProbeScrollOffsets.Raw[axis] != 0 && (cascade.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0)) { - ddgiData.ProbesOrigin.Raw[axis] += (float)ddgiData.ProbeCounts.Raw[axis] * ddgiData.ProbesSpacing * (float)ddgiData.ProbeScrollDirections.Raw[axis]; - ddgiData.ProbeScrollOffsets.Raw[axis] = 0; + cascade.ProbesOrigin.Raw[axis] += (float)ddgiData.ProbeCounts.Raw[axis] * cascade.ProbesSpacing * (float)cascade.ProbeScrollDirections.Raw[axis]; + cascade.ProbeScrollOffsets.Raw[axis] = 0; } } // Calculate the count of grid cells between the view origin and the scroll anchor - const Vector3 volumeOrigin = ddgiData.ProbesOrigin + Vector3(ddgiData.ProbeScrollOffsets) * ddgiData.ProbesSpacing; - const Vector3 translation = viewOrigin - volumeOrigin; + const Vector3 volumeOrigin = cascade.ProbesOrigin + Vector3(cascade.ProbeScrollOffsets) * cascade.ProbesSpacing; + const Vector3 translation = viewOrigins[cascadeIndex] - volumeOrigin; for (int32 axis = 0; axis < 3; axis++) { - const float value = translation.Raw[axis] / ddgiData.ProbesSpacing; + const float value = translation.Raw[axis] / cascade.ProbesSpacing; const int32 scroll = value >= 0.0f ? (int32)Math::Floor(value) : (int32)Math::Ceil(value); - ddgiData.ProbeScrollOffsets.Raw[axis] += scroll; - ddgiData.ProbeScrollClear[axis] = scroll != 0; - ddgiData.ProbeScrollDirections.Raw[axis] = translation.Raw[axis] >= 0.0f ? 1 : -1; + cascade.ProbeScrollOffsets.Raw[axis] += scroll; + cascade.ProbeScrollClear[axis] = scroll != 0; + cascade.ProbeScrollDirections.Raw[axis] = translation.Raw[axis] >= 0.0f ? 1 : -1; } } // Upload constants { - ddgiData.Result.Constants.ProbesOrigin = ddgiData.ProbesOrigin; - ddgiData.Result.Constants.ProbesSpacing = ddgiData.ProbesSpacing; + ddgiData.Result.Constants.CascadesCount = cascadesCount; ddgiData.Result.Constants.ProbesCounts[0] = probesCounts.X; ddgiData.Result.Constants.ProbesCounts[1] = probesCounts.Y; ddgiData.Result.Constants.ProbesCounts[2] = probesCounts.Z; - ddgiData.Result.Constants.ProbesScrollOffsets = ddgiData.ProbeScrollOffsets; - ddgiData.Result.Constants.ProbeScrollDirections = ddgiData.ProbeScrollDirections; - ddgiData.Result.Constants.ProbeScrollClear[0] = ddgiData.ProbeScrollClear[0] != 0; - ddgiData.Result.Constants.ProbeScrollClear[1] = ddgiData.ProbeScrollClear[1] != 0; - ddgiData.Result.Constants.ProbeScrollClear[2] = ddgiData.ProbeScrollClear[2] != 0; + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) + { + auto& cascade = ddgiData.Cascades[cascadeIndex]; + int32 probeScrollClear = cascade.ProbeScrollClear[0] + cascade.ProbeScrollClear[1] * 2 + cascade.ProbeScrollClear[2] * 4; // Pack clear flags into bits + ddgiData.Result.Constants.ProbesOriginAndSpacing[cascadeIndex] = Vector4(cascade.ProbesOrigin, cascade.ProbesSpacing); + ddgiData.Result.Constants.ProbesScrollOffsets[cascadeIndex] = Int4(cascade.ProbeScrollOffsets, probeScrollClear); + ddgiData.Result.Constants.ProbeScrollDirections[cascadeIndex] = Int4(cascade.ProbeScrollDirections, 0); + } ddgiData.Result.Constants.RayMaxDistance = 10000.0f; // TODO: adjust to match perf/quality ratio (make it based on Global SDF and Global Surface Atlas distance) - ddgiData.Result.Constants.ViewDir = viewDirection; + ddgiData.Result.Constants.ViewDir = renderContext.View.Direction; ddgiData.Result.Constants.RaysCount = probeRaysCount; ddgiData.Result.Constants.ProbeHistoryWeight = probeHistoryWeight; ddgiData.Result.Constants.IrradianceGamma = 5.0f; @@ -368,6 +429,18 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, data.GlobalSDF = bindingDataSDF.Constants; data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants; data.ResetBlend = clear ? 1.0f : 0.0f; + if (renderContext.List->Settings.AntiAliasing.Mode == AntialiasingMode::TemporalAntialiasing) + { + // Use temporal offset in the dithering factor (gets cleaned out by TAA) + const float time = Time::Draw.UnscaledTime.GetTotalSeconds(); + const float scale = 10; + const float integral = roundf(time / scale) * scale; + data.TemporalTime = time - integral; + } + else + { + data.TemporalTime = 0.0f; + } data.IndirectLightingIntensity = indirectLightingIntensity; GBufferPass::SetInputs(renderContext.View, data.GBuffer); context->UpdateCB(_cb0, &data); @@ -377,72 +450,117 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, // Classify probes (activation/deactivation and relocation) { PROFILE_GPU_CPU("Probes Classification"); - uint32 threadGroups = Math::DivideAndRoundUp(probesCount, DDGI_PROBE_CLASSIFY_GROUP_SIZE); + uint32 threadGroups = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE); for (int32 i = 0; i < 4; i++) { context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume()); } context->BindUA(0, ddgiData.Result.ProbesState); - context->Dispatch(_csClassify, threadGroups, 1, 1); - context->ResetUA(); - } - - // Trace rays from probes - { - PROFILE_GPU_CPU("Trace Rays"); - - // Global SDF with Global Surface Atlas software raytracing (X - per probe ray, Y - per probe) - ASSERT_LOW_LAYER((probeRaysCount % DDGI_TRACE_RAYS_GROUP_SIZE_X) == 0); - for (int32 i = 0; i < 4; i++) + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { - context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume()); - context->BindSR(i + 4, bindingDataSDF.CascadeMips[i]->ViewVolume()); + if (cascadeSkipUpdate[cascadeIndex]) + continue; + Data1 data; + data.CascadeIndex = cascadeIndex; + context->UpdateCB(_cb1, &data); + context->BindCB(1, _cb1); + context->Dispatch(_csClassify, threadGroups, 1, 1); } - context->BindSR(8, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr); - context->BindSR(9, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr); - context->BindSR(10, bindingDataSurfaceAtlas.AtlasDepth->View()); - context->BindSR(11, bindingDataSurfaceAtlas.AtlasLighting->View()); - context->BindSR(12, ddgiData.Result.ProbesState); - context->BindSR(13, skybox); - context->BindUA(0, ddgiData.ProbesTrace->View()); - context->Dispatch(_csTraceRays, probeRaysCount / DDGI_TRACE_RAYS_GROUP_SIZE_X, probesCount, 1); context->ResetUA(); - context->ResetSR(); - -#if 0 - // Probes trace debug preview - context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y); - context->SetRenderTarget(lightBuffer); - context->Draw(ddgiData.ProbesTrace); - return false; -#endif } // Update probes { - PROFILE_GPU_CPU("Update Probes"); - context->BindSR(0, ddgiData.Result.ProbesState); - context->BindSR(1, ddgiData.ProbesTrace->View()); + PROFILE_GPU_CPU("Probes Update"); + bool anyDirty = false; + uint32 threadGroupsX, threadGroupsY; + for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) + { + if (cascadeSkipUpdate[cascadeIndex]) + continue; + anyDirty = true; + Data1 data; + data.CascadeIndex = cascadeIndex; + context->UpdateCB(_cb1, &data); + context->BindCB(1, _cb1); - // Update irradiance - context->BindUA(0, ddgiData.Result.ProbesIrradiance); - context->Dispatch(_csUpdateProbesIrradiance, probesCountX, probesCountY, 1); - uint32 threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - uint32 threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1); - threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1); + // TODO: run probes tracing+update in 4k batches - // Update distance - context->BindUA(0, ddgiData.Result.ProbesDistance); - context->Dispatch(_csUpdateProbesDistance, probesCountX, probesCountY, 1); - threadGroupsX = Math::DivideAndRoundUp(probesCountX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1); - threadGroupsX = Math::DivideAndRoundUp(probesCountX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1); + // Trace rays from probes + { + PROFILE_GPU_CPU("Trace Rays"); + + // Global SDF with Global Surface Atlas software raytracing (thread X - per probe ray, thread Y - per probe) + ASSERT_LOW_LAYER((probeRaysCount % DDGI_TRACE_RAYS_GROUP_SIZE_X) == 0); + for (int32 i = 0; i < 4; i++) + { + context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume()); + context->BindSR(i + 4, bindingDataSDF.CascadeMips[i]->ViewVolume()); + } + context->BindSR(8, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr); + context->BindSR(9, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr); + context->BindSR(10, bindingDataSurfaceAtlas.AtlasDepth->View()); + context->BindSR(11, bindingDataSurfaceAtlas.AtlasLighting->View()); + context->BindSR(12, ddgiData.Result.ProbesState); + context->BindSR(13, skybox); + context->BindUA(0, ddgiData.ProbesTrace->View()); + context->Dispatch(_csTraceRays, probeRaysCount / DDGI_TRACE_RAYS_GROUP_SIZE_X, probesCountCascade, 1); + context->ResetUA(); + context->ResetSR(); + +#if 0 + // Probes trace debug preview + context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y); + context->SetRenderTarget(lightBuffer); + context->Draw(ddgiData.ProbesTrace); + return false; +#endif + } + + context->BindSR(0, ddgiData.Result.ProbesState); + context->BindSR(1, ddgiData.ProbesTrace->View()); + + // Update probes irradiance texture + { + PROFILE_GPU_CPU("Update Irradiance"); + context->BindUA(0, ddgiData.Result.ProbesIrradiance); + context->Dispatch(_csUpdateProbesIrradiance, probesCountCascadeX, probesCountCascadeY, 1); + } + + // Update probes distance texture + { + PROFILE_GPU_CPU("Update Distance"); + context->BindUA(0, ddgiData.Result.ProbesDistance); + context->Dispatch(_csUpdateProbesDistance, probesCountCascadeX, probesCountCascadeY, 1); + } + } + + // Update probes border pixels + if (anyDirty) + { + PROFILE_GPU_CPU("Update Borders"); + + // Irradiance + context->BindUA(0, ddgiData.Result.ProbesIrradiance); + threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1); + threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1); + + // Distance + context->BindUA(0, ddgiData.Result.ProbesDistance); + threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1); + threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); + context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1); + + context->ResetUA(); + context->ResetSR(); + } } // Render indirect lighting @@ -453,7 +571,6 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, // DDGI indirect lighting debug preview context->Clear(lightBuffer, Color::Transparent); #endif - context->ResetUA(); context->BindSR(0, renderContext.Buffers->GBuffer0->View()); context->BindSR(1, renderContext.Buffers->GBuffer1->View()); context->BindSR(2, renderContext.Buffers->GBuffer2->View()); @@ -485,7 +602,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, Matrix world; Matrix::Scaling(Vector3(0.2f), world); const Mesh& debugMesh = _debugModel->LODs[0].Meshes[0]; - for (int32 probeIndex = 0; probeIndex < probesCount; probeIndex++) + for (int32 probeIndex = 0; probeIndex < probesCountTotal; probeIndex++) debugMesh.Draw(debugRenderContext, _debugMaterial, world, StaticFlags::None, true, DrawPass::GBuffer, (float)probeIndex); debugRenderContext.List->SortDrawCalls(debugRenderContext, false, DrawCallsListType::GBuffer); context->SetViewportAndScissors(debugRenderContext.View.ScreenSize.X, debugRenderContext.View.ScreenSize.Y); diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index 5440cf7be..64bc0b3ca 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -3,7 +3,7 @@ #pragma once #include "../RendererPass.h" -#include "Engine/Core/Math/Int3.h" +#include "Engine/Core/Math/Int4.h" #include "Engine/Graphics/Textures/GPUTexture.h" /// @@ -15,19 +15,18 @@ public: // Constant buffer data for DDGI access on a GPU. PACK_STRUCT(struct ConstantsData { - Vector3 ProbesOrigin; - float ProbesSpacing; - Vector4 RaysRotation; + Vector4 ProbesOriginAndSpacing[4]; + Int4 ProbesScrollOffsets[4]; + Int4 ProbeScrollDirections[4]; uint32 ProbesCounts[3]; + uint32 CascadesCount; float IrradianceGamma; - Int3 ProbesScrollOffsets; float ProbeHistoryWeight; + float RayMaxDistance; + float Padding0; + Vector4 RaysRotation; Vector3 ViewDir; uint32 RaysCount; - Int3 ProbeScrollDirections; - float RayMaxDistance; - uint32 ProbeScrollClear[3]; - uint32 Padding0; }); // Binding data for the GPU. @@ -43,6 +42,7 @@ private: bool _supported = false; AssetReference _shader; GPUConstantBuffer* _cb0 = nullptr; + GPUConstantBuffer* _cb1 = nullptr; GPUShaderProgramCS* _csClassify; GPUShaderProgramCS* _csTraceRays; GPUShaderProgramCS* _csUpdateProbesIrradiance; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index c67709a6f..eb89f0372 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -346,8 +346,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // TODO: configurable via graphics settings const int32 resolution = 2048; const float resolutionInv = 1.0f / resolution; - // TODO: configurable via postFx settings (maybe use Global SDF distance?) - const float distance = 20000; + // TODO: configurable via postFx settings (use GI distance) + const float distance = 20000.0f; // Initialize buffers bool noCache = surfaceAtlasData.Resolution != resolution; diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index c94b41b6f..d9682867b 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -383,8 +383,10 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex const int32 resolution = 256; const int32 resolutionMip = Math::DivideAndRoundUp(resolution, GLOBAL_SDF_RASTERIZE_MIP_FACTOR); // TODO: configurable via postFx settings - const float distanceExtent = 2000.0f; - const float cascadesDistances[] = { distanceExtent, distanceExtent * 2.0f, distanceExtent * 4.0f, distanceExtent * 8.0f }; + const int32 cascadesCount = 4; // in range 1-4 + const float distance = true ? 20000.0f : 16000.0f; // TODO: switch based if using GI, then use GI range + const float cascadesDistanceScales[] = { 1.0f, 2.0f, 4.0f, 8.0f }; + const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1]; // Initialize buffers auto desc = GPUTextureDescription::New3D(resolution, resolution, resolution, GLOBAL_SDF_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess, 1); @@ -449,23 +451,22 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex bool anyDraw = false; const uint64 cascadeFrequencies[] = { 2, 3, 5, 11 }; //const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 }; - for (int32 cascade = 0; cascade < 4; cascade++) for (int32 cascadeIndex = 0; cascadeIndex < 4; cascadeIndex++) { // Reduce frequency of the updates if (useCache && (Engine::FrameCount % cascadeFrequencies[cascadeIndex]) != 0) continue; auto& cascade = sdfData.Cascades[cascadeIndex]; - const float distance = cascadesDistances[cascadeIndex]; - const float maxDistance = distance * 2; - const float voxelSize = maxDistance / resolution; - const float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; + const float cascadeDistance = distanceExtent * cascadesDistanceScales[cascadeIndex]; + const float cascadeMaxDistance = cascadeDistance * 2; + const float cascadeVoxelSize = cascadeMaxDistance / resolution; + const float cascadeChunkSize = cascadeVoxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; static_assert(GLOBAL_SDF_RASTERIZE_CHUNK_SIZE % GLOBAL_SDF_RASTERIZE_MIP_FACTOR == 0, "Adjust chunk size to match the mip factor scale."); - const Vector3 center = Vector3::Floor(renderContext.View.Position / chunkSize) * chunkSize; + const Vector3 center = Vector3::Floor(renderContext.View.Position / cascadeChunkSize) * cascadeChunkSize; //const Vector3 center = Vector3::Zero; - BoundingBox cascadeBounds(center - distance, center + distance); + BoundingBox cascadeBounds(center - cascadeDistance, center + cascadeDistance); // TODO: add scene detail scale factor to PostFx settings (eg. to increase or decrease scene details and quality) - const float minObjectRadius = Math::Max(20.0f, voxelSize * 0.5f); // Skip too small objects for this cascade + const float minObjectRadius = Math::Max(20.0f, cascadeVoxelSize * 0.5f); // Skip too small objects for this cascade GPUTextureView* cascadeView = cascade.Texture->ViewVolume(); GPUTextureView* cascadeMipView = cascade.Mip->ViewVolume(); @@ -478,18 +479,18 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } // Check if cascade center has been moved - if (!(useCache && Vector3::NearEqual(cascade.Position, center, voxelSize))) + if (!(useCache && Vector3::NearEqual(cascade.Position, center, cascadeVoxelSize))) { // TODO: optimize for moving camera (copy sdf for cached chunks) cascade.StaticChunks.Clear(); } cascade.Position = center; - cascade.VoxelSize = voxelSize; + cascade.VoxelSize = cascadeVoxelSize; cascade.Bounds = cascadeBounds; // Draw all objects from all scenes into the cascade _objectsBufferCount = 0; - _voxelSize = voxelSize; + _voxelSize = cascadeVoxelSize; _cascadeBounds = cascadeBounds; _cascadeIndex = cascadeIndex; _sdfData = &sdfData; @@ -518,12 +519,12 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } ModelsRasterizeData data; data.CascadeCoordToPosMul = cascadeBounds.GetSize() / resolution; - data.CascadeCoordToPosAdd = cascadeBounds.Minimum + voxelSize * 0.5f; - data.MaxDistance = maxDistance; + data.CascadeCoordToPosAdd = cascadeBounds.Minimum + cascadeVoxelSize * 0.5f; + data.MaxDistance = cascadeMaxDistance; data.CascadeResolution = resolution; data.CascadeMipResolution = resolutionMip; data.CascadeMipFactor = GLOBAL_SDF_RASTERIZE_MIP_FACTOR; - data.CascadeVoxelSize = voxelSize; + data.CascadeVoxelSize = cascadeVoxelSize; context->BindUA(0, cascadeView); context->BindCB(1, _cb1); const int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE; @@ -728,12 +729,12 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex for (int32 cascadeIndex = 0; cascadeIndex < 4; cascadeIndex++) { auto& cascade = sdfData.Cascades[cascadeIndex]; - const float distance = cascadesDistances[cascadeIndex]; - const float maxDistance = distance * 2; - const float voxelSize = maxDistance / resolution; + const float cascadeDistance = distanceExtent * cascadesDistanceScales[cascadeIndex]; + const float cascadeMaxDistance = cascadeDistance * 2; + const float cascadeVoxelSize = cascadeMaxDistance / resolution; const Vector3 center = cascade.Position; - result.Constants.CascadePosDistance[cascadeIndex] = Vector4(center, distance); - result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = voxelSize; + result.Constants.CascadePosDistance[cascadeIndex] = Vector4(center, cascadeDistance); + result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = cascadeVoxelSize; result.Cascades[cascadeIndex] = cascade.Texture; result.CascadeMips[cascadeIndex] = cascade.Mip; } diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index 3017a6c8a..e09b364f4 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -22,19 +22,18 @@ // DDGI data for a constant buffer struct DDGIData { - float3 ProbesOrigin; - float ProbesSpacing; - float4 RaysRotation; + float4 ProbesOriginAndSpacing[4]; + int4 ProbesScrollOffsets[4]; + int4 ProbeScrollDirections[4]; uint3 ProbesCounts; + uint CascadesCount; float IrradianceGamma; - int3 ProbesScrollOffsets; float ProbeHistoryWeight; + float RayMaxDistance; + float Padding0; + float4 RaysRotation; float3 ViewDir; uint RaysCount; - int3 ProbeScrollDirections; - float RayMaxDistance; - uint3 ProbeScrollClear; // TODO: pack into bits - uint Padding0; }; uint GetDDGIProbeIndex(DDGIData data, uint3 probeCoords) @@ -62,88 +61,106 @@ uint3 GetDDGIProbeCoords(DDGIData data, uint probeIndex) return probeCoords; } -uint2 GetDDGIProbeTexelCoords(DDGIData data, uint probeIndex) +uint2 GetDDGIProbeTexelCoords(DDGIData data, uint cascadeIndex, uint probeIndex) { uint probesPerPlane = data.ProbesCounts.x * data.ProbesCounts.z; uint planeIndex = probeIndex / probesPerPlane; uint gridSpaceX = probeIndex % data.ProbesCounts.x; uint gridSpaceY = probeIndex / data.ProbesCounts.x; uint x = gridSpaceX + (planeIndex * data.ProbesCounts.x); - uint y = gridSpaceY % data.ProbesCounts.z; + uint y = gridSpaceY % data.ProbesCounts.z + cascadeIndex * data.ProbesCounts.z; return uint2(x, y); } -uint GetDDGIScrollingProbeIndex(DDGIData data, uint3 probeCoords) +uint GetDDGIScrollingProbeIndex(DDGIData data, uint cascadeIndex, uint3 probeCoords) { // Probes are scrolled on edges to stabilize GI when camera moves - return GetDDGIProbeIndex(data, (probeCoords + data.ProbesScrollOffsets + data.ProbesCounts) % data.ProbesCounts); + return GetDDGIProbeIndex(data, (probeCoords + data.ProbesScrollOffsets[cascadeIndex].xyz + data.ProbesCounts) % data.ProbesCounts); } -float3 GetDDGIProbeWorldPosition(DDGIData data, uint3 probeCoords) +float3 GetDDGIProbeWorldPosition(DDGIData data, uint cascadeIndex, uint3 probeCoords) { - float3 probePosition = probeCoords * data.ProbesSpacing; - float3 probeGridOffset = (data.ProbesSpacing * (data.ProbesCounts - 1)) * 0.5f; - return data.ProbesOrigin + probePosition - probeGridOffset + (data.ProbesScrollOffsets * data.ProbesSpacing); + float3 probesOrigin = data.ProbesOriginAndSpacing[cascadeIndex].xyz; + float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; + float3 probePosition = probeCoords * probesSpacing; + float3 probeGridOffset = (probesSpacing * (data.ProbesCounts - 1)) * 0.5f; + return probesOrigin + probePosition - probeGridOffset + (data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing); } // Loads probe probe state -float LoadDDGIProbeState(DDGIData data, Texture2D probesState, uint probeIndex) +float LoadDDGIProbeState(DDGIData data, Texture2D probesState, uint cascadeIndex, uint probeIndex) { - int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex); + int2 probeDataCoords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex); float4 probeState = probesState.Load(int3(probeDataCoords, 0)); return probeState.w; } // Loads probe world-space position (XYZ) and probe state (W) -float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D probesState, uint probeIndex, uint3 probeCoords) +float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D probesState, uint cascadeIndex, uint probeIndex, uint3 probeCoords) { - int2 probeDataCoords = GetDDGIProbeTexelCoords(data, probeIndex); + int2 probeDataCoords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex); float4 probeState = probesState.Load(int3(probeDataCoords, 0)); - probeState.xyz += GetDDGIProbeWorldPosition(data, probeCoords); + probeState.xyz += GetDDGIProbeWorldPosition(data, cascadeIndex, probeCoords); return probeState; } // Calculates texture UVs for sampling probes atlas texture (irradiance or distance) -float2 GetDDGIProbeUV(DDGIData data, uint probeIndex, float2 octahedralCoords, uint resolution) +float2 GetDDGIProbeUV(DDGIData data, uint cascadeIndex, uint probeIndex, float2 octahedralCoords, uint resolution) { - uint2 coords = GetDDGIProbeTexelCoords(data, probeIndex); + uint2 coords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex); float probeTexelSize = resolution + 2.0f; - float textureWidth = probeTexelSize * (data.ProbesCounts.x * data.ProbesCounts.y); - float textureHeight = probeTexelSize * data.ProbesCounts.z; + float2 textureSize = float2(data.ProbesCounts.x * data.ProbesCounts.y, data.ProbesCounts.z * data.CascadesCount) * probeTexelSize; float2 uv = float2(coords.x * probeTexelSize, coords.y * probeTexelSize) + (probeTexelSize * 0.5f); uv += octahedralCoords.xy * (resolution * 0.5f); - uv /= float2(textureWidth, textureHeight); + uv /= textureSize; return uv; } // Samples DDGI probes volume at the given world-space position and returns the irradiance. -float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesState, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias) +// rand - randomized per-pixel value in range 0-1, used to smooth dithering for cascades blending +float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesState, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias, float dither = 0.0f) { - float4 irradiance = float4(0, 0, 0, 0); - float3 probesOrigin = data.ProbesScrollOffsets * data.ProbesSpacing + data.ProbesOrigin; - float3 probesExtent = (data.ProbesCounts - 1) * (data.ProbesSpacing * 0.5f); + // Select the highest cascade that contains the sample location + uint cascadeIndex = 0; + for (; cascadeIndex < data.CascadesCount; cascadeIndex++) + { + float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; + float3 probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; + float3 probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); + float fadeDistance = probesSpacing * 0.5f; + float cascadeWeight = saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance); + if (cascadeWeight > dither) // Use dither to make transition smoother + break; + } + if (cascadeIndex == data.CascadesCount) + return float3(0, 0, 0); + + float probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; + float3 probesOrigin = data.ProbesScrollOffsets[cascadeIndex].xyz * probesSpacing + data.ProbesOriginAndSpacing[cascadeIndex].xyz; + float3 probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); // Bias the world-space position to reduce artifacts float3 surfaceBias = (worldNormal * bias) + (data.ViewDir * (bias * -4.0f)); float3 biasedWorldPosition = worldPosition + surfaceBias; // Get the grid coordinates of the probe nearest the biased world position - uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / data.ProbesSpacing), 0, data.ProbesCounts - 1); - float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, baseProbeCoords); - float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / data.ProbesSpacing); + uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), 0, data.ProbesCounts - 1); + float3 baseProbeWorldPosition = GetDDGIProbeWorldPosition(data, cascadeIndex, baseProbeCoords); + float3 biasAlpha = saturate((biasedWorldPosition - baseProbeWorldPosition) / probesSpacing); // Loop over the closest probes to accumulate their contributions + float4 irradiance = float4(0, 0, 0, 0); for (uint i = 0; i < 8; i++) { uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; uint3 probeCoords = clamp(baseProbeCoords + probeCoordsOffset, 0, data.ProbesCounts - 1); - uint probeIndex = GetDDGIScrollingProbeIndex(data, probeCoords); + uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords); // Load probe position and state - float4 probeState = probesState.Load(int3(GetDDGIProbeTexelCoords(data, probeIndex), 0)); + float4 probeState = probesState.Load(int3(GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex), 0)); if (probeState.w == DDGI_PROBE_STATE_INACTIVE) continue; - float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * data.ProbesSpacing); + float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * probesSpacing); float3 probePosition = probeBasePosition + probeState.xyz; // Calculate the distance and direction from the (biased and non-biased) shading point and the probe @@ -156,7 +173,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesState, Textur // Sample distance texture float2 octahedralCoords = GetOctahedralCoords(-biasedPosToProbe); - float2 uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE); + float2 uv = GetDDGIProbeUV(data, cascadeIndex, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_DISTANCE); float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f; float probeDistanceMean = probeDistance.x; float probeDistanceMean2 = probeDistance.y; @@ -183,7 +200,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesState, Textur // Sample irradiance texture octahedralCoords = GetOctahedralCoords(worldNormal); - uv = GetDDGIProbeUV(data, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_IRRADIANCE); + uv = GetDDGIProbeUV(data, cascadeIndex, probeIndex, octahedralCoords, DDGI_PROBE_RESOLUTION_IRRADIANCE); float3 probeIrradiance = probesIrradiance.SampleLevel(SamplerLinearClamp, uv, 0).rgb; #if DDGI_SRGB_BLENDING probeIrradiance = pow(probeIrradiance, data.IrradianceGamma * 0.5f); @@ -196,6 +213,18 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesState, Textur irradiance += float4(probeIrradiance * weight, weight); } +#if 0 + // Debug DDGI cascades with colors + if (cascadeIndex == 0) + irradiance = float4(1, 0, 0, 1); + else if (cascadeIndex == 1) + irradiance = float4(0, 1, 0, 1); + else if (cascadeIndex == 2) + irradiance = float4(0, 0, 1, 1); + else + irradiance = float4(1, 0, 1, 1); +#endif + if (irradiance.a > 0.0f) { // Normalize irradiance @@ -204,10 +233,6 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesState, Textur irradiance.rgb *= irradiance.rgb; #endif irradiance.rgb *= 2.0f * PI; - - // Fade-out outside the probes volume - float fadeDistance = data.ProbesSpacing * 0.5f; - irradiance.rgb *= saturate(Min3(probesExtent - abs(worldPosition - probesOrigin)) / fadeDistance); } return irradiance.rgb; } diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index e13ae04fe..ec576ede2 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -22,14 +22,20 @@ #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 -META_CB_BEGIN(0, Data) +META_CB_BEGIN(0, Data0) DDGIData DDGI; GlobalSDFData GlobalSDF; GlobalSurfaceAtlasData GlobalSurfaceAtlas; GBufferData GBuffer; -float2 Padding0; float ResetBlend; +float TemporalTime; float IndirectLightingIntensity; +float2 Padding0; +META_CB_END + +META_CB_BEGIN(1, Data1) +float3 Padding1; +uint CascadeIndex; META_CB_END // Calculates the evenly distributed direction ray on a sphere (Spherical Fibonacci lattice) @@ -66,22 +72,24 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) if (probeIndex >= probesCount) return; uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); - probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords); - int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, probeIndex); + probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); + float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; // Load probe state and position float4 probeState = RWProbesState[probeDataCoords]; - float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, probeCoords); + float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords); float3 probePosition = probeBasePosition + probeState.xyz; probeState.w = DDGI_PROBE_STATE_ACTIVE; // Use Global SDF to quickly get distance and direction to the scene geometry float sdf; float3 sdfNormal = normalize(SampleGlobalSDFGradient(GlobalSDF, GlobalSDFTex, probePosition.xyz, sdf)); - float threshold = GlobalSDF.CascadeVoxelSize[0] * 0.5f; - float distanceLimit = length(DDGI.ProbesSpacing) * 2.0f; - float relocateLimit = length(DDGI.ProbesSpacing) * 0.6f; - if (abs(sdf) > distanceLimit) // Probe is too far from geometry + float sdfDst = abs(sdf); + float threshold = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.5f; + float distanceLimit = length(probesSpacing) * 2.0f; + float relocateLimit = length(probesSpacing) * 0.6f; + if (sdfDst > distanceLimit) // Probe is too far from geometry { // Disable it probeState = float4(0, 0, 0, DDGI_PROBE_STATE_INACTIVE); @@ -90,9 +98,9 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) { if (sdf < threshold) // Probe is inside geometry { - if (abs(sdf) < relocateLimit) + if (sdfDst < relocateLimit) { - float3 offsetToAdd = sdfNormal * sdf; + float3 offsetToAdd = sdfNormal * (sdf + threshold); if (distance(probeState.xyz, offsetToAdd) < relocateLimit) { // Relocate it @@ -105,7 +113,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) probeState.xyz = float3(0, 0, 0); } } - else if (sdf > threshold * 2.0f) // Probe is far enough any geometry + else if (sdf > threshold * 4.0f) // Probe is far enough any geometry { // Reset relocation probeState.xyz = float3(0, 0, 0); @@ -146,10 +154,10 @@ void CS_TraceRays(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Dispat uint rayIndex = DispatchThreadId.x; uint probeIndex = DispatchThreadId.y; uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); - probeIndex = GetDDGIScrollingProbeIndex(DDGI, probeCoords); + probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); // Load current probe state and position - float4 probePositionAndState = LoadDDGIProbePositionAndState(DDGI, ProbesState, probeIndex, probeCoords); + float4 probePositionAndState = LoadDDGIProbePositionAndState(DDGI, ProbesState, CascadeIndex, probeIndex, probeCoords); if (probePositionAndState.w == DDGI_PROBE_STATE_INACTIVE) return; // Skip disabled probes float3 probeRayDirection = GetProbeRayDirection(DDGI, rayIndex); @@ -222,16 +230,20 @@ void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupInd uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z; bool skip = probeIndex >= probesCount; uint2 outputCoords = uint2(1, 1) + DispatchThreadId.xy + (DispatchThreadId.xy / DDGI_PROBE_RESOLUTION) * 2; - + outputCoords.y += CascadeIndex * DDGI.ProbesCounts.z * (DDGI_PROBE_RESOLUTION + 2); + // Clear probes that have been scrolled to a new positions (blending with current irradiance will happen the next frame) uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); + int3 probesScrollOffsets = DDGI.ProbesScrollOffsets[CascadeIndex].xyz; + int probeScrollClear = DDGI.ProbesScrollOffsets[CascadeIndex].w; + int3 probeScrollDirections = DDGI.ProbeScrollDirections[CascadeIndex].xyz; UNROLL for (uint planeIndex = 0; planeIndex < 3; planeIndex++) { - if (DDGI.ProbeScrollClear[planeIndex]) + if (probeScrollClear & (1 << planeIndex) && !skip) { - int scrollOffset = DDGI.ProbesScrollOffsets[planeIndex]; - int scrollDirection = DDGI.ProbeScrollDirections[planeIndex]; + int scrollOffset = probesScrollOffsets[planeIndex]; + int scrollDirection = probeScrollDirections[planeIndex]; uint probeCount = DDGI.ProbesCounts[planeIndex]; uint coord = (probeCount + (scrollDirection ? (scrollOffset - 1) : (scrollOffset % probeCount))) % probeCount; if (probeCoords[planeIndex] == coord) @@ -244,7 +256,7 @@ void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupInd } // Skip disabled probes - float probeState = LoadDDGIProbeState(DDGI, ProbesState, probeIndex); + float probeState = LoadDDGIProbeState(DDGI, ProbesState, CascadeIndex, probeIndex); if (probeState == DDGI_PROBE_STATE_INACTIVE) skip = true; @@ -275,7 +287,8 @@ void CS_UpdateProbes(uint3 DispatchThreadId : SV_DispatchThreadID, uint GroupInd uint backfacesCount = 0; uint backfacesLimit = uint(DDGI.RaysCount * 0.1f); #else - float distanceLimit = length(DDGI.ProbesSpacing) * 1.5f; + float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; + float distanceLimit = length(probesSpacing) * 1.5f; #endif LOOP for (uint rayIndex = 0; rayIndex < DDGI.RaysCount; rayIndex++) @@ -420,6 +433,7 @@ void CS_UpdateBorders(uint3 DispatchThreadId : SV_DispatchThreadID) #ifdef _PS_IndirectLighting #include "./Flax/GBuffer.hlsl" +#include "./Flax/Random.hlsl" #include "./Flax/LightingCommon.hlsl" Texture2D ProbesState : register(t4); @@ -445,8 +459,9 @@ void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0) // Sample irradiance float bias = 1.0f; - float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias); - + float dither = RandN2(input.TexCoord + TemporalTime).x; + float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias, dither); + // Calculate lighting float3 diffuseColor = GetDiffuseColor(gBuffer); float3 diffuse = Diffuse_Lambert(diffuseColor); diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 0d561db69..6ee0efc75 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -209,5 +209,5 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D tex[4] float GetGlobalSurfaceAtlasThreshold(GlobalSDFHit hit) { // Scale the threshold based on the hit cascade (less precision) - return hit.HitCascade * 10.0f + 20.0f; + return hit.HitCascade * 20.0f + 25.0f; } diff --git a/Source/Shaders/Random.hlsl b/Source/Shaders/Random.hlsl index 98f6009bd..33aa3cbb4 100644 --- a/Source/Shaders/Random.hlsl +++ b/Source/Shaders/Random.hlsl @@ -9,6 +9,18 @@ float PseudoRandom(float2 xy) return frac(dot(p.xyx * p.xyy, float3(20.390625f, 60.703125f, 2.4281209f))); } +// Generic noise (1-component) +float RandN1(float n) +{ + return frac(sin(n) * 43758.5453123); +} + +// Generic noise (2-components) +float2 RandN2(float2 n) +{ + return frac(sin(dot(n, float2(12.9898, 78.233))) * float2(43758.5453123, 28001.8384)); +} + void FindBestAxisVectors(float3 input, out float3 axis1, out float3 axis2) { const float3 a = abs(input); diff --git a/Source/Shaders/SSR.hlsl b/Source/Shaders/SSR.hlsl index 86d8c5f6b..ccb2b1e89 100644 --- a/Source/Shaders/SSR.hlsl +++ b/Source/Shaders/SSR.hlsl @@ -6,16 +6,6 @@ #include "./Flax/MonteCarlo.hlsl" #include "./Flax/GBufferCommon.hlsl" -float max2(float2 v) -{ - return max(v.x, v.y); -} - -float2 RandN2(float2 pos, float2 random) -{ - return frac(sin(dot(pos.xy + random, float2(12.9898, 78.233))) * float2(43758.5453, 28001.8384)); -} - // 1:-1 to 0:1 float2 ClipToUv(float2 clipPos) { @@ -62,7 +52,7 @@ float3 TraceSceenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D dep float3 normalVS = mul(gBuffer.Normal, (float3x3)viewMatrix); // Randomize it a little - float2 jitter = RandN2(uv, temporalTime); + float2 jitter = RandN2(uv + temporalTime); float2 Xi = jitter; Xi.y = lerp(Xi.y, 0.0, brdfBias); float3 H = temporal ? TangentToWorld(gBuffer.Normal, ImportanceSampleGGX(Xi, gBuffer.Roughness)) : gBuffer.Normal; @@ -80,7 +70,8 @@ float3 TraceSceenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D dep float3 endUV = ProjectWorldToUv(startWS + reflectWS, viewProjectionMatrix); float3 rayUV = endUV - startUV; - rayUV *= stepSize / max2(abs(rayUV.xy)); + float2 rayUVAbs = abs(rayUV.xy); + rayUV *= stepSize / max(rayUVAbs.x, rayUVAbs.y); float3 startUv = startUV + rayUV * 2; float3 currOffset = startUv; diff --git a/Source/Shaders/SSR.shader b/Source/Shaders/SSR.shader index 5893a6ad1..15d1975e0 100644 --- a/Source/Shaders/SSR.shader +++ b/Source/Shaders/SSR.shader @@ -138,7 +138,7 @@ float4 PS_ResolvePass(Quad_VS2PS input) : SV_Target0 float3 viewVector = normalize(gBufferData.ViewPos - gBuffer.WorldPos); // Randomize it a little - float2 random = RandN2(uv, TemporalTime); + float2 random = RandN2(uv + TemporalTime); float2 blueNoise = random.xy * 2.0 - 1.0; float2x2 offsetRotationMatrix = float2x2(blueNoise.x, blueNoise.y, -blueNoise.y, blueNoise.x);