diff --git a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl index 331981acb..3692046df 100644 --- a/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl +++ b/Content/Editor/MaterialTemplates/Features/ForwardShading.hlsl @@ -116,7 +116,7 @@ void PS_Forward( Texture2D sceneColorTexture = MATERIAL_REFLECTIONS_SSR_COLOR; float2 screenUV = materialInput.SvPosition.xy * ScreenSize.zw; float stepSize = ScreenSize.z; // 1 / screenWidth - float maxSamples = 48; + float maxSamples = 50; float worldAntiSelfOcclusionBias = 0.1f; float brdfBias = 0.82f; float drawDistance = 5000.0f; diff --git a/Source/Engine/Graphics/RenderBuffers.cpp b/Source/Engine/Graphics/RenderBuffers.cpp index a66eadf43..df79cd50d 100644 --- a/Source/Engine/Graphics/RenderBuffers.cpp +++ b/Source/Engine/Graphics/RenderBuffers.cpp @@ -64,6 +64,7 @@ void RenderBuffers::ReleaseUnusedMemory() UPDATE_LAZY_KEEP_RT(TemporalSSR); UPDATE_LAZY_KEEP_RT(TemporalAA); UPDATE_LAZY_KEEP_RT(HalfResDepth); + UPDATE_LAZY_KEEP_RT(HiZ); UPDATE_LAZY_KEEP_RT(LuminanceMap); #undef UPDATE_LAZY_KEEP_RT for (int32 i = CustomBuffers.Count() - 1; i >= 0; i--) @@ -112,6 +113,42 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context) return HalfResDepth; } +GPUTexture* RenderBuffers::RequestHiZ(GPUContext* context, bool fullRes, int32 mipLevels) +{ + // Skip if already done in the current frame + const auto currentFrame = Engine::FrameCount; + if (LastFrameHiZ == currentFrame) + return HiZ; + LastFrameHiZ = currentFrame; + + // Allocate or resize buffer (with full mip-chain) + // TODO: migrate to inverse depth and try using r16 again as default (should have no artifacts anymore) + auto format = PLATFORM_ANDROID || PLATFORM_IOS || PLATFORM_SWITCH ? PixelFormat::R16_UInt : PixelFormat::R32_Float; + auto width = fullRes ? _width : Math::Max(_width >> 1, 1); + auto height = fullRes ? _height : Math::Max(_height >> 1, 1); + auto desc = GPUTextureDescription::New2D(width, height, mipLevels, format, GPUTextureFlags::ShaderResource); + bool useCompute = false; // TODO: impl Compute Shader for downscaling depth to HiZ with a single dispatch (eg. FidelityFX Single Pass Downsampler) + if (useCompute) + desc.Flags |= GPUTextureFlags::UnorderedAccess; + else + desc.Flags |= GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews; + if (HiZ && HiZ->GetDescription() != desc) + { + RenderTargetPool::Release(HiZ); + HiZ = nullptr; + } + if (HiZ == nullptr) + { + HiZ = RenderTargetPool::Get(desc); + RENDER_TARGET_POOL_SET_NAME(HiZ, "HiZ"); + } + + // Downscale + MultiScaler::Instance()->BuildHiZ(context, DepthBuffer, HiZ); + + return HiZ; +} + PixelFormat RenderBuffers::GetOutputFormat() const { auto colorFormat = GraphicsSettings::Get()->RenderColorFormat; @@ -244,6 +281,7 @@ void RenderBuffers::Release() UPDATE_LAZY_KEEP_RT(TemporalSSR); UPDATE_LAZY_KEEP_RT(TemporalAA); UPDATE_LAZY_KEEP_RT(HalfResDepth); + UPDATE_LAZY_KEEP_RT(HiZ); UPDATE_LAZY_KEEP_RT(LuminanceMap); #undef UPDATE_LAZY_KEEP_RT CustomBuffers.ClearDelete(); diff --git a/Source/Engine/Graphics/RenderBuffers.h b/Source/Engine/Graphics/RenderBuffers.h index dadd06175..2ae253de8 100644 --- a/Source/Engine/Graphics/RenderBuffers.h +++ b/Source/Engine/Graphics/RenderBuffers.h @@ -43,6 +43,12 @@ API_CLASS() class FLAXENGINE_API RenderBuffers : public ScriptingObject String ToString() const override; }; +private: + GPUTexture* HalfResDepth = nullptr; + GPUTexture* HiZ = nullptr; + uint64 LastFrameHalfResDepth = 0; + uint64 LastFrameHiZ = 0; + protected: int32 _width = 0; int32 _height = 0; @@ -85,11 +91,6 @@ public: float MaxDistance; } VolumetricFogData; - // Helper buffer with half-resolution depth buffer shared by effects (eg. SSR, Motion Blur). Valid only during frame rendering and on request (see RequestHalfResDepth). - // Should be released if not used for a few frames. - GPUTexture* HalfResDepth = nullptr; - uint64 LastFrameHalfResDepth = 0; - // Helper target for the temporal SSR. // Should be released if not used for a few frames. GPUTexture* TemporalSSR = nullptr; @@ -122,6 +123,15 @@ public: /// The half-res depth buffer. GPUTexture* RequestHalfResDepth(GPUContext* context); + /// + /// Requests the Hierarchical Z-Buffer (closest) to be prepared for the current frame. + /// + /// The context. + /// Generates the full-resolution buffer, otherwise HiZ starts at half-res of the original Depth Buffer. + /// Maximum amount of mip levels to generate. Value 0 generates a full mip chain down to 1x1. + /// The HiZ depth buffer. + GPUTexture* RequestHiZ(GPUContext* context, bool fullRes = false, int32 mipLevels = 0); + public: /// /// Gets the buffers width (in pixels). diff --git a/Source/Engine/Renderer/ReflectionsPass.cpp b/Source/Engine/Renderer/ReflectionsPass.cpp index 508d361ae..18efc3583 100644 --- a/Source/Engine/Renderer/ReflectionsPass.cpp +++ b/Source/Engine/Renderer/ReflectionsPass.cpp @@ -411,6 +411,7 @@ void ReflectionsPass::Render(RenderContext& renderContext, GPUTextureView* light else { // Combine reflections and light buffer (additive mode) + PROFILE_GPU("Combine"); if (_depthBounds) { context->SetRenderTarget(depthBufferRTV, lightBuffer); diff --git a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp index 52b1412fe..e992b7c5a 100644 --- a/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp +++ b/Source/Engine/Renderer/ScreenSpaceReflectionsPass.cpp @@ -22,19 +22,21 @@ #define TEXTURE1 5 #define TEXTURE2 6 +#define SSR_USE_HZB 1 + GPU_CB_STRUCT(Data { ShaderGBufferData GBuffer; float MaxColorMiplevel; float TraceSizeMax; float MaxTraceSamples; float RoughnessFade; - Float2 SSRtexelSize; + Float2 SSRTexelSize; float TemporalTime; float BRDFBias; float WorldAntiSelfOcclusionBias; float EdgeFadeFactor; float TemporalResponse; - float Dummy0; + uint32 DepthMips; float RayTraceStep; float TemporalEffect; float Intensity; @@ -157,10 +159,10 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU const auto colorBufferMips = MipLevelsCount(colorBufferWidth, colorBufferHeight); // Prepare buffers - auto tempDesc = GPUTextureDescription::New2D(colorBufferWidth, colorBufferHeight, 0, PixelFormat::R11G11B10_Float, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews); GPUTexture* colorBuffer0, *colorBuffer1; if (settings.UseColorBufferMips) { + auto tempDesc = GPUTextureDescription::New2D(colorBufferWidth, colorBufferHeight, 0, PixelFormat::R11G11B10_Float, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews); colorBuffer0 = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(colorBuffer0, "SSR.ColorBuffer0"); // TODO: maybe allocate colorBuffer1 smaller because mip0 is not used (the same as PostProcessingPass for Bloom), keep in sync to use the same buffer in frame @@ -170,27 +172,33 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU else { // Single mip - tempDesc.MipLevels = 1; - tempDesc.Flags &= ~GPUTextureFlags::PerMipViews; + auto tempDesc = GPUTextureDescription::New2D(colorBufferWidth, colorBufferHeight, 1, PixelFormat::R11G11B10_Float); colorBuffer0 = RenderTargetPool::Get(tempDesc); colorBuffer1 = nullptr; } - tempDesc = GPUTextureDescription::New2D(traceWidth, traceHeight, PixelFormat::R16G16B16A16_Float); - auto traceBuffer = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(traceBuffer, "SSR.TraceBuffer"); - tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, PixelFormat::R16G16B16A16_Float); - auto resolveBuffer = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(resolveBuffer, "SSR.ResolveBuffer"); + GPUTexture* traceBuffer, *resolveBuffer; + { + auto tempDesc = GPUTextureDescription::New2D(traceWidth, traceHeight, PixelFormat::R16G16B16A16_Float); + traceBuffer = RenderTargetPool::Get(tempDesc); + RENDER_TARGET_POOL_SET_NAME(traceBuffer, "SSR.TraceBuffer"); + tempDesc.Width = resolveWidth; + tempDesc.Height = resolveHeight; + resolveBuffer = RenderTargetPool::Get(tempDesc); + RENDER_TARGET_POOL_SET_NAME(resolveBuffer, "SSR.ResolveBuffer"); + } // Pick effect settings int32 maxTraceSamples = 60; + int32 resolveSamples = settings.ResolveSamples; switch (Graphics::SSRQuality) { case Quality::Low: - maxTraceSamples = 20; + maxTraceSamples = 40; + resolveSamples = Math::Min(resolveSamples, 2); break; case Quality::Medium: maxTraceSamples = 55; + resolveSamples = Math::Min(resolveSamples, 4); break; case Quality::High: maxTraceSamples = 70; @@ -199,7 +207,6 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU maxTraceSamples = 120; break; } - const int32 resolveSamples = settings.ResolveSamples; int32 resolvePassIndex = 0; if (resolveSamples >= 8) resolvePassIndex = 3; @@ -214,12 +221,12 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU data.RoughnessFade = Math::Saturate(settings.RoughnessThreshold); data.MaxTraceSamples = static_cast(maxTraceSamples); data.BRDFBias = settings.BRDFBias; - data.WorldAntiSelfOcclusionBias = settings.WorldAntiSelfOcclusionBias; + data.WorldAntiSelfOcclusionBias = settings.WorldAntiSelfOcclusionBias * (int32)settings.DepthResolution; data.EdgeFadeFactor = settings.EdgeFadeFactor; - data.SSRtexelSize = Float2(1.0f / (float)traceWidth, 1.0f / (float)traceHeight); + data.SSRTexelSize = Float2(1.0f / (float)traceWidth, 1.0f / (float)traceHeight); data.TraceSizeMax = (float)Math::Max(traceWidth, traceHeight); - data.MaxColorMiplevel = settings.UseColorBufferMips ? (float)colorBufferMips - 2.0f : 0.0f; - data.RayTraceStep = static_cast(settings.DepthResolution) / (float)width; + data.MaxColorMiplevel = settings.UseColorBufferMips ? (float)(colorBufferMips - 2) : 0.0f; + data.RayTraceStep = (float)settings.DepthResolution / (float)width; data.Intensity = settings.Intensity; data.FadeOutDistance = Math::Max(settings.FadeOutDistance, 100.0f); data.TemporalResponse = settings.TemporalResponse; @@ -245,8 +252,16 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU } } - // Check if resize depth + // Prepare depth buffer +#if SSR_USE_HZB + int32 hzbMips = settings.DepthResolution == ResolutionMode::Full ? 5 : 4; // Using lower mips in tracing introduces blocky artifacts + bool hzbFullRes = settings.DepthResolution == ResolutionMode::Full; + GPUTexture* depthBufferTrace = buffers->RequestHiZ(context, hzbFullRes, hzbMips); + data.DepthMips = hzbMips - 1; // Offset to improve SSR range +#else GPUTexture* depthBufferTrace = settings.DepthResolution == ResolutionMode::Half ? buffers->RequestHalfResDepth(context) : buffers->DepthBuffer; + data.DepthMips = 1; +#endif // Prepare constants context->UpdateCB(cb, &data); @@ -259,16 +274,19 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU context->BindSR(3, depthBufferTrace); // Combine pass - context->BindSR(TEXTURE0, lightBuffer); - context->BindSR(TEXTURE1, reflectionsRT); - context->BindSR(TEXTURE2, _preIntegratedGF->GetTexture()); - context->SetViewportAndScissors((float)colorBufferWidth, (float)colorBufferHeight); - context->SetRenderTarget(colorBuffer0->View(0)); - context->SetState(_psCombinePass); - context->DrawFullscreenTriangle(); - context->UnBindSR(TEXTURE1); - context->UnBindSR(TEXTURE2); - context->ResetRenderTarget(); + { + PROFILE_GPU("Combine"); + context->BindSR(TEXTURE0, lightBuffer); + context->BindSR(TEXTURE1, reflectionsRT); + context->BindSR(TEXTURE2, _preIntegratedGF->GetTexture()); + context->SetViewportAndScissors((float)colorBufferWidth, (float)colorBufferHeight); + context->SetRenderTarget(colorBuffer0->View(0)); + context->SetState(_psCombinePass); + context->DrawFullscreenTriangle(); + context->UnBindSR(TEXTURE1); + context->UnBindSR(TEXTURE2); + context->ResetRenderTarget(); + } // Blur Pass if (settings.UseColorBufferMips) @@ -298,37 +316,44 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU RenderTargetPool::Release(colorBuffer1); // Ray Trace Pass - context->SetViewportAndScissors((float)traceWidth, (float)traceHeight); - context->SetRenderTarget(*traceBuffer); - context->BindSR(TEXTURE0, colorBuffer0->View()); - if (useGlobalSurfaceAtlas) { - context->BindSR(7, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr); - context->BindSR(8, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr); - context->BindSR(9, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr); - context->BindSR(10, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr); - context->BindSR(11, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr); - context->BindSR(12, bindingDataSurfaceAtlas.AtlasDepth->View()); - context->BindSR(13, bindingDataSurfaceAtlas.AtlasLighting->View()); + PROFILE_GPU("RayTrace"); + context->SetViewportAndScissors((float)traceWidth, (float)traceHeight); + context->SetRenderTarget(*traceBuffer); + context->BindSR(TEXTURE0, colorBuffer0->View()); + if (useGlobalSurfaceAtlas) + { + context->BindSR(7, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr); + context->BindSR(8, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr); + context->BindSR(9, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr); + context->BindSR(10, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr); + context->BindSR(11, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr); + context->BindSR(12, bindingDataSurfaceAtlas.AtlasDepth->View()); + context->BindSR(13, bindingDataSurfaceAtlas.AtlasLighting->View()); + } + context->SetState(_psRayTracePass.Get(useGlobalSurfaceAtlas ? 1 : 0)); + context->DrawFullscreenTriangle(); + context->ResetRenderTarget(); + RenderTargetPool::Release(colorBuffer0); } - context->SetState(_psRayTracePass.Get(useGlobalSurfaceAtlas ? 1 : 0)); - context->DrawFullscreenTriangle(); - context->ResetRenderTarget(); - RenderTargetPool::Release(colorBuffer0); // Resolve Pass - context->SetViewportAndScissors((float)resolveWidth, (float)resolveHeight); - context->SetRenderTarget(resolveBuffer->View()); - context->BindSR(TEXTURE0, traceBuffer->View()); - context->SetState(_psResolvePass.Get(resolvePassIndex)); - context->DrawFullscreenTriangle(); - context->ResetRenderTarget(); - RenderTargetPool::Release(traceBuffer); + { + PROFILE_GPU("Resolve"); + context->SetViewportAndScissors((float)resolveWidth, (float)resolveHeight); + context->SetRenderTarget(resolveBuffer->View()); + context->BindSR(TEXTURE0, traceBuffer->View()); + context->SetState(_psResolvePass.Get(resolvePassIndex)); + context->DrawFullscreenTriangle(); + context->ResetRenderTarget(); + RenderTargetPool::Release(traceBuffer); + } // Temporal Pass GPUTexture* reflectionsBuffer = resolveBuffer; if (useTemporal) { + PROFILE_GPU("Temporal"); buffers->LastFrameTemporalSSR = Engine::FrameCount; bool resetHistory = false; if (!buffers->TemporalSSR || buffers->TemporalSSR->Width() != resolveWidth || buffers->TemporalSSR->Height() != resolveHeight) @@ -336,7 +361,7 @@ GPUTexture* ScreenSpaceReflectionsPass::Render(RenderContext& renderContext, GPU resetHistory = true; if (buffers->TemporalSSR) RenderTargetPool::Release(buffers->TemporalSSR); - tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, PixelFormat::R16G16B16A16_Float); + auto tempDesc = GPUTextureDescription::New2D(resolveWidth, resolveHeight, PixelFormat::R16G16B16A16_Float); buffers->TemporalSSR = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(buffers->TemporalSSR, "SSR.TemporalSSR"); } diff --git a/Source/Engine/Renderer/Utils/MultiScaler.cpp b/Source/Engine/Renderer/Utils/MultiScaler.cpp index ae5633834..c35aac08e 100644 --- a/Source/Engine/Renderer/Utils/MultiScaler.cpp +++ b/Source/Engine/Renderer/Utils/MultiScaler.cpp @@ -2,8 +2,8 @@ #include "MultiScaler.h" #include "Engine/Graphics/Textures/GPUTexture.h" -#include "Engine/Content/Content.h" #include "Engine/Graphics/GPUContext.h" +#include "Engine/Content/Content.h" GPU_CB_STRUCT(Data { Float2 TexelSize; @@ -18,10 +18,10 @@ String MultiScaler::ToString() const bool MultiScaler::Init() { // Create pipeline states - _psHalfDepth = GPUDevice::Instance->CreatePipelineState(); _psBlur5.CreatePipelineStates(); _psBlur9.CreatePipelineStates(); _psBlur13.CreatePipelineStates(); + _psHalfDepth.CreatePipelineStates(); _psUpscale = GPUDevice::Instance->CreatePipelineState(); // Load asset @@ -66,13 +66,20 @@ bool MultiScaler::setupResources() if (_psUpscale->Init(psDesc)) return true; } - if (!_psHalfDepth->IsValid()) + if (!_psHalfDepth.IsValid()) { - psDesc.PS = shader->GetPS("PS_HalfDepth"); + psDesc.PS = shader->GetPS("PS_HalfDepth", 0); + if (_psHalfDepth[0]->Init(psDesc)) + return true; + psDesc.PS = shader->GetPS("PS_HalfDepth", 2); + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::Red; + if (_psHalfDepth[2]->Init(psDesc)) + return true; + psDesc.PS = shader->GetPS("PS_HalfDepth", 1); psDesc.DepthWriteEnable = true; psDesc.DepthEnable = true; psDesc.DepthFunc = ComparisonFunc::Always; - if (_psHalfDepth->Init(psDesc)) + if (_psHalfDepth[1]->Init(psDesc)) return true; } @@ -85,15 +92,15 @@ void MultiScaler::Dispose() RendererPass::Dispose(); // Cleanup - SAFE_DELETE_GPU_RESOURCE(_psHalfDepth); SAFE_DELETE_GPU_RESOURCE(_psUpscale); _psBlur5.Delete(); _psBlur9.Delete(); _psBlur13.Delete(); + _psHalfDepth.Delete(); _shader = nullptr; } -void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp) +void MultiScaler::Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp) { PROFILE_GPU_CPU("MultiScaler Filter"); @@ -152,18 +159,14 @@ void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32 context->ResetRenderTarget(); } -void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* srcDst, GPUTextureView* tmp) +void MultiScaler::Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* srcDst, GPUTextureView* tmp) { PROFILE_GPU_CPU("MultiScaler Filter"); context->SetViewportAndScissors((float)width, (float)height); - // Check if has missing resources if (checkIfSkipPass()) - { - // Skip return; - } // Select filter GPUPipelineStatePermutationsPs<2>* ps; @@ -211,11 +214,8 @@ void MultiScaler::Filter(const FilterMode mode, GPUContext* context, const int32 void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstHeight, GPUTexture* src, GPUTextureView* dst) { PROFILE_GPU_CPU("Downscale Depth"); - - // Check if has missing resources if (checkIfSkipPass()) { - // Clear the output context->ClearDepth(dst); return; } @@ -224,6 +224,7 @@ void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstH Data data; data.TexelSize.X = 1.0f / (float)src->Width(); data.TexelSize.Y = 1.0f / (float)src->Height(); + bool outputDepth = ((GPUTexture*)dst->GetParent())->IsDepthStencil(); auto cb = _shader->GetShader()->GetCB(0); context->UpdateCB(cb, &data); context->BindCB(0, cb); @@ -232,7 +233,7 @@ void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstH context->SetViewportAndScissors((float)dstWidth, (float)dstHeight); context->SetRenderTarget(dst, (GPUTextureView*)nullptr); context->BindSR(0, src); - context->SetState(_psHalfDepth); + context->SetState(_psHalfDepth[outputDepth ? 1 : 0]); context->DrawFullscreenTriangle(); // Cleanup @@ -240,6 +241,49 @@ void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstH context->UnBindCB(0); } +void MultiScaler::BuildHiZ(GPUContext* context, GPUTexture* srcDepth, GPUTexture* dstHiZ) +{ + PROFILE_GPU_CPU("Build HiZ"); + + int32 dstWidth = dstHiZ->Width(); + int32 dstHeight = dstHiZ->Height(); + + // Copy mip0 + if (srcDepth->Size() == dstHiZ->Size() && srcDepth->Format() == dstHiZ->Format()) + { + context->CopySubresource(dstHiZ, 0, srcDepth, 0); + } + else if (srcDepth->Size() == dstHiZ->Size()) + { + context->Draw(dstHiZ, srcDepth); + } + else + { + context->SetViewportAndScissors((float)dstWidth, (float)dstHeight); + context->SetRenderTarget(dstHiZ->View()); + context->BindSR(0, srcDepth); + context->SetState(_psHalfDepth[2]); + context->DrawFullscreenTriangle(); + } + + // Build mip chain + for (int32 mip = 1; mip < dstHiZ->MipLevels(); mip++) + { + const int32 mipWidth = Math::Max(dstWidth >> mip, 1); + const int32 mipHeight = Math::Max(dstHeight >> mip, 1); + context->ResetRenderTarget(); + + context->SetViewportAndScissors((float)mipWidth, (float)mipHeight); + context->SetRenderTarget(dstHiZ->View(0, mip)); + context->BindSR(0, dstHiZ->View(0, mip - 1)); + context->SetState(_psHalfDepth[2]); + context->DrawFullscreenTriangle(); + } + + context->ResetRenderTarget(); + context->UnBindCB(0); +} + void MultiScaler::Upscale(GPUContext* context, const Viewport& viewport, GPUTexture* src, GPUTextureView* dst) { PROFILE_GPU_CPU("Upscale"); diff --git a/Source/Engine/Renderer/Utils/MultiScaler.h b/Source/Engine/Renderer/Utils/MultiScaler.h index ecbb4c248..67d8881c3 100644 --- a/Source/Engine/Renderer/Utils/MultiScaler.h +++ b/Source/Engine/Renderer/Utils/MultiScaler.h @@ -12,16 +12,14 @@ class MultiScaler : public RendererPass { private: - AssetReference _shader; - GPUPipelineState* _psHalfDepth = nullptr; GPUPipelineStatePermutationsPs<2> _psBlur5; GPUPipelineStatePermutationsPs<2> _psBlur9; GPUPipelineStatePermutationsPs<2> _psBlur13; + GPUPipelineStatePermutationsPs<3> _psHalfDepth; GPUPipelineState* _psUpscale = nullptr; public: - /// /// Filter mode /// @@ -53,7 +51,7 @@ public: /// The source texture. /// The destination texture. /// The temporary texture (should have the same size as destination texture). - void Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp); + void Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* src, GPUTextureView* dst, GPUTextureView* tmp); /// /// Performs texture filtering. @@ -64,18 +62,26 @@ public: /// The output height. /// The source and destination texture. /// The temporary texture (should have the same size as destination texture). - void Filter(const FilterMode mode, GPUContext* context, const int32 width, const int32 height, GPUTextureView* srcDst, GPUTextureView* tmp); + void Filter(FilterMode mode, GPUContext* context, int32 width, int32 height, GPUTextureView* srcDst, GPUTextureView* tmp); /// - /// Downscales the depth buffer (to half resolution). + /// Downscales the depth buffer (to half resolution). Uses `min` operator (`max` for inverted depth) to output the furthest depths for conservative usage. /// /// The context. /// The width of the destination texture (in pixels). /// The height of the destination texture (in pixels). - /// The source texture. - /// The destination texture. + /// The source texture (has to have ShaderResource flag). + /// The destination texture (has to have DepthStencil or RenderTarget flag). void DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstHeight, GPUTexture* src, GPUTextureView* dst); + /// + /// Generates the Hierarchical Z-Buffer (HiZ). Uses `min` operator (`max` for inverted depth) to output the furthest depths for conservative usage. + /// + /// The context. + /// The source depth buffer texture (has to have ShaderResource flag). + /// The destination HiZ texture (has to have DepthStencil or RenderTarget flag). + void BuildHiZ(GPUContext* context, GPUTexture* srcDepth, GPUTexture* dstHiZ); + /// /// Upscales the texture. /// @@ -86,7 +92,6 @@ public: void Upscale(GPUContext* context, const Viewport& viewport, GPUTexture* src, GPUTextureView* dst); public: - // [RendererPass] String ToString() const override; bool Init() override; @@ -94,17 +99,16 @@ public: #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) { - _psHalfDepth->ReleaseGPU(); _psUpscale->ReleaseGPU(); _psBlur5.Release(); _psBlur9.Release(); _psBlur13.Release(); + _psHalfDepth.Release(); invalidateResources(); } #endif protected: - // [RendererPass] bool setupResources() override; }; diff --git a/Source/Engine/ShadersCompilation/ShadersCompilation.cpp b/Source/Engine/ShadersCompilation/ShadersCompilation.cpp index f7d1367ba..cb11d2524 100644 --- a/Source/Engine/ShadersCompilation/ShadersCompilation.cpp +++ b/Source/Engine/ShadersCompilation/ShadersCompilation.cpp @@ -443,6 +443,11 @@ String ShadersCompilation::ResolveShaderPath(StringView path) // Hard-coded redirect to platform-specific includes result = Globals::StartupFolder / TEXT("Source/Platforms"); } + else if (projectName.StartsWith(StringView(TEXT("FlaxThirdParty")))) + { + // Hard-coded redirect to third-party-specific includes + result = Globals::StartupFolder / TEXT("Source/ThirdParty"); + } else { HashSet projects; diff --git a/Source/Shaders/MultiScaler.shader b/Source/Shaders/MultiScaler.shader index f46295886..ed3138567 100644 --- a/Source/Shaders/MultiScaler.shader +++ b/Source/Shaders/MultiScaler.shader @@ -1,6 +1,7 @@ // Copyright (c) Wojciech Figat. All rights reserved. #include "./Flax/Common.hlsl" +#include "./Flax/Gather.hlsl" META_CB_BEGIN(0, Data) float2 TexelSize; @@ -14,19 +15,24 @@ Texture2D Input : register(t0); // Pixel Shader for depth buffer downscale (to half res) META_PS(true, FEATURE_LEVEL_ES2) -float PS_HalfDepth(Quad_VS2PS input) : SV_Depth -{ -#if CAN_USE_GATHER - float4 depths = Input.GatherRed(SamplerPointClamp, input.TexCoord); +META_PERMUTATION_1(OUTPUT_DEPTH=0) +META_PERMUTATION_1(OUTPUT_DEPTH=1) +META_PERMUTATION_1(HZB_CLOSEST=2) +float PS_HalfDepth(Quad_VS2PS input) +#if OUTPUT_DEPTH + : SV_Depth #else - float4 depths; - depths.x = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(0, 1) * TexelSize, 0).r; - depths.y = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(1, 1) * TexelSize, 0).r; - depths.z = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(1, 0) * TexelSize, 0).r; - depths.w = Input.SampleLevel(SamplerPointClamp, input.TexCoord + float2(0, 0) * TexelSize, 0).r; + : SV_Target0 #endif +{ + // Load 4 depth values (2x2 quad) + float4 depths = TextureGatherRed(Input, SamplerPointClamp, input.TexCoord); - return max(depths.x, max(depths.y, max(depths.z, depths.w))) + 0.0001f; +#if HZB_CLOSEST + return min(depths.x, min(depths.y, min(depths.z, depths.w))); +#else + return max(depths.x, max(depths.y, max(depths.z, depths.w))); +#endif } // Pixel Shader for 5-tap gaussian blur diff --git a/Source/Shaders/ReflectionsCommon.hlsl b/Source/Shaders/ReflectionsCommon.hlsl index d17993fca..eed24de36 100644 --- a/Source/Shaders/ReflectionsCommon.hlsl +++ b/Source/Shaders/ReflectionsCommon.hlsl @@ -6,9 +6,6 @@ #include "./Flax/GBufferCommon.hlsl" #include "./Flax/Quaternion.hlsl" -// Hit depth (view space) threshold to detect if sky was hit (value above it where 1.0f is default) -#define REFLECTIONS_HIT_THRESHOLD 0.9f - // Packed env probe data struct EnvProbeData { diff --git a/Source/Shaders/SSR.hlsl b/Source/Shaders/SSR.hlsl index 537a7099d..52fc4e7fa 100644 --- a/Source/Shaders/SSR.hlsl +++ b/Source/Shaders/SSR.hlsl @@ -5,6 +5,9 @@ #include "./Flax/Random.hlsl" #include "./Flax/MonteCarlo.hlsl" #include "./Flax/GBufferCommon.hlsl" +#if SSR_USE_HZB +#include "./FlaxThirdParty/FidelityFX/ffx_sssr.h" +#endif // 1:-1 to 0:1 float2 ClipToUv(float2 clipPos) @@ -44,19 +47,22 @@ float RayAttenBorder(float2 pos, float value) // Returns: xy: hitUV, z: hitMask, where hitUV is the result UV of hit pixel, hitMask is the normalized sample weight (0 if no hit). float3 ScreenSpaceReflectionDirection(float2 uv, GBufferSample gBuffer, float3 viewPos, bool temporal = false, float temporalTime = 0.0f, float brdfBias = 0.82f) { - // Randomize it a little float2 jitter = RandN2(uv + temporalTime); float2 Xi = jitter; Xi.y = lerp(Xi.y, 0.0, brdfBias); float3 H = temporal ? TangentToWorld(gBuffer.Normal, ImportanceSampleGGX(Xi, gBuffer.Roughness)) : gBuffer.Normal; - float3 viewWS = normalize(gBuffer.WorldPos - viewPos); return reflect(viewWS, H.xyz); } // Screen Space Reflection ray tracing utility. +// If SSR_USE_HZB is defined, it uses Hierarchical Z-Buffer for tracing against screen (assumes that depthBuffer is a HiZ with full mip-chain). // Returns: xy: hitUV, z: hitMask, where hitUV is the result UV of hit pixel, hitMask is the normalized sample weight (0 if no hit). -float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D depthBuffer, float3 viewPos, float4x4 viewMatrix, float4x4 viewProjectionMatrix, float stepSize, float maxSamples = 20, bool temporal = false, float temporalTime = 0.0f, float worldAntiSelfOcclusionBias = 0.1f, float brdfBias = 0.82f, float drawDistance = 5000.0f, float roughnessThreshold = 0.4f, float edgeFade = 0.1f) +float3 TraceScreenSpaceReflection( +#if SSR_USE_HZB + out bool uncertainHit, uint hzbMips, +#endif + float2 uv, GBufferSample gBuffer, Texture2D depthBuffer, float3 viewPos, float4x4 viewMatrix, float4x4 viewProjectionMatrix, float stepSize, float maxSamples = 50, bool temporal = false, float temporalTime = 0.0f, float worldAntiSelfOcclusionBias = 0.1f, float brdfBias = 0.82f, float drawDistance = 5000.0f, float roughnessThreshold = 0.4f, float edgeFade = 0.1f) { #ifndef SSR_SKIP_INVALID_CHECK // Reject invalid pixels @@ -69,17 +75,19 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de float3 reflectVS = normalize(reflect(gBuffer.ViewPos, normalVS)); if (gBuffer.ViewPos.z < 1.0 && reflectVS.z < 0.4) return 0; - + + // Calculate ray path in UV space (z is raw depth) float3 reflectWS = ScreenSpaceReflectionDirection(uv, gBuffer, viewPos, temporal, temporalTime, brdfBias); +#if SSR_USE_HZB + worldAntiSelfOcclusionBias *= 10.0f; // Higher bias for HZB trace to reduce artifacts +#endif float3 startWS = gBuffer.WorldPos + gBuffer.Normal * worldAntiSelfOcclusionBias; float3 startUV = ProjectWorldToUv(startWS, viewProjectionMatrix); float3 endUV = ProjectWorldToUv(startWS + reflectWS, viewProjectionMatrix); - float3 rayUV = endUV - startUV; float2 rayUVAbs = abs(rayUV.xy); rayUV *= stepSize / max(rayUVAbs.x, rayUVAbs.y); float3 startUv = startUV + rayUV * 2; - float3 currOffset = startUv; float3 rayStep = rayUV * 2; @@ -89,26 +97,30 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de float numSamples = min(maxSamples, samplesToEdge.x); rayStep *= samplesToEdge.x / numSamples; - // Calculate depth difference error - float depthDiffError = 1.3f * abs(rayStep.z); - // Ray trace + float depthDiffError = 1.3f * abs(rayStep.z); +#if SSR_USE_HZB + bool validHit = false; + uint2 depthBufferSize; + depthBuffer.GetDimensions(depthBufferSize.x, depthBufferSize.y); + float3 hit = FFX_SSSR_HierarchicalRaymarch(depthBuffer, hzbMips, depthDiffError, uncertainHit, startUV, rayUV, depthBufferSize, 0, numSamples, validHit); + if (!validHit) + return 0; + currOffset = hit; +#else float currSampleIndex = 0; - float currSample, depthDiff; LOOP while (currSampleIndex < numSamples) { // Sample depth buffer and calculate depth difference - currSample = SAMPLE_RT(depthBuffer, currOffset.xy).r; - depthDiff = currOffset.z - currSample; + float currSample = SAMPLE_RT(depthBuffer, currOffset.xy).r; + float depthDiff = currOffset.z - currSample; // Check intersection if (depthDiff >= 0) { if (depthDiff < depthDiffError) - { break; - } currOffset -= rayStep; rayStep *= 0.5; } @@ -117,25 +129,19 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de currOffset += rayStep; currSampleIndex++; } - - // Check if has valid result after ray tracing if (currSampleIndex >= numSamples) - { - // All samples done but no result - return 0; - } - - float2 hitUV = currOffset.xy; + return 0; // All samples done but no result +#endif // Fade rays close to screen edge const float fadeStart = 0.9f; const float fadeEnd = 1.0f; const float fadeDiffRcp = 1.0f / (fadeEnd - fadeStart); - float2 boundary = abs(hitUV - float2(0.5f, 0.5f)) * 2.0f; + float2 boundary = abs(currOffset.xy - float2(0.5f, 0.5f)) * 2.0f; float fadeOnBorder = 1.0f - saturate((boundary.x - fadeStart) * fadeDiffRcp); fadeOnBorder *= 1.0f - saturate((boundary.y - fadeStart) * fadeDiffRcp); fadeOnBorder = smoothstep(0.0f, 1.0f, fadeOnBorder); - fadeOnBorder *= RayAttenBorder(hitUV, edgeFade); + fadeOnBorder *= RayAttenBorder(currOffset.xy, edgeFade); // Fade rays on high roughness float roughnessFade = saturate((roughnessThreshold - gBuffer.Roughness) * 20); @@ -144,5 +150,5 @@ float3 TraceScreenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D de float distanceFade = saturate((drawDistance - gBuffer.ViewPos.z) / drawDistance); // Output: xy: hitUV, z: hitMask - return float3(hitUV, fadeOnBorder * roughnessFade * distanceFade); + return float3(currOffset.xy, fadeOnBorder * roughnessFade * distanceFade); } diff --git a/Source/Shaders/SSR.shader b/Source/Shaders/SSR.shader index 92edbf1b3..79011e37f 100644 --- a/Source/Shaders/SSR.shader +++ b/Source/Shaders/SSR.shader @@ -1,7 +1,14 @@ // Copyright (c) Wojciech Figat. All rights reserved. +// Skips additional check in TraceScreenSpaceReflection for material that is already done by PS_RayTracePass #define SSR_SKIP_INVALID_CHECK 1 +// Uses more-optimized Hierarchical Z-Buffer tracing rather than naive Depth Buffer tracing +#define SSR_USE_HZB 1 + +// Enable/disable luminance filter to reduce reflections highlights +#define SSR_REDUCE_HIGHLIGHTS 1 + #include "./Flax/Common.hlsl" #include "./Flax/LightingCommon.hlsl" #include "./Flax/ReflectionsCommon.hlsl" @@ -11,8 +18,7 @@ #include "./Flax/GlobalSignDistanceField.hlsl" #include "./Flax/GI/GlobalSurfaceAtlas.hlsl" -// Enable/disable luminance filter to reduce reflections highlights -#define SSR_REDUCE_HIGHLIGHTS 1 +#define SSR_USE_SDF (USE_GLOBAL_SURFACE_ATLAS && CAN_USE_GLOBAL_SURFACE_ATLAS) META_CB_BEGIN(0, Data) GBufferData GBuffer; @@ -20,13 +26,13 @@ float MaxColorMiplevel; float TraceSizeMax; float MaxTraceSamples; float RoughnessFade; -float2 SSRtexelSize; +float2 SSRTexelSize; float TemporalTime; float BRDFBias; float WorldAntiSelfOcclusionBias; float EdgeFadeFactor; float TemporalResponse; -float Dummy0; +uint DepthMips; float RayTraceStep; float TemporalEffect; float Intensity; @@ -104,14 +110,31 @@ float4 PS_RayTracePass(Quad_VS2PS input) : SV_Target0 GBufferSample gBuffer = SampleGBuffer(gBufferData, input.TexCoord); // Reject invalid pixels + BRANCH if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT || gBuffer.Roughness > RoughnessFade || gBuffer.ViewPos.z > FadeOutDistance) return base; // Trace depth buffer to find intersection - float3 screenHit = TraceScreenSpaceReflection(input.TexCoord, gBuffer, Depth, gBufferData.ViewPos, ViewMatrix, ViewProjectionMatrix, RayTraceStep, MaxTraceSamples, TemporalEffect, TemporalTime, WorldAntiSelfOcclusionBias, BRDFBias, FadeOutDistance, RoughnessFade, EdgeFadeFactor); - float4 result = base; + bool uncertainHit = false; + float3 screenHit = TraceScreenSpaceReflection( +#if SSR_USE_HZB + uncertainHit, DepthMips, +#endif + input.TexCoord, gBuffer, Depth, gBufferData.ViewPos, ViewMatrix, ViewProjectionMatrix, RayTraceStep, MaxTraceSamples, TemporalEffect, TemporalTime, WorldAntiSelfOcclusionBias, BRDFBias, FadeOutDistance, RoughnessFade, EdgeFadeFactor); + float4 result = base; +#if SSR_USE_SDF + if (screenHit.z > 0 && !uncertainHit) // Only use certain SSR hits when SDF tracing is enabled +#else if (screenHit.z > 0) +#endif { + if (uncertainHit) + { + // Jitter edges of uncertain hits (when ray goes behind the object) + screenHit.xy += RandN2(input.TexCoord + TemporalTime) * SSRTexelSize; + } + + // Sample color buffer mip that matches roughness of the surface to get blurred reflections float3 viewVector = normalize(gBufferData.ViewPos - gBuffer.WorldPos); float NdotV = saturate(dot(gBuffer.Normal, viewVector)); float coneTangent = lerp(0.0, gBuffer.Roughness * 5 * (1.0 - BRDFBias), pow(NdotV, 1.5) * sqrt(gBuffer.Roughness)); @@ -119,21 +142,28 @@ float4 PS_RayTracePass(Quad_VS2PS input) : SV_Target0 float mip = clamp(log2(intersectionCircleRadius * TraceSizeMax), 0.0, MaxColorMiplevel); float3 sampleColor = Texture0.SampleLevel(SamplerLinearClamp, screenHit.xy, mip).rgb; result = float4(sampleColor, screenHit.z); - if (screenHit.z >= REFLECTIONS_HIT_THRESHOLD) + +#if SSR_USE_SDF + // Skip SDF tracing if SSR hit is very certain + BRANCH + if (result.a > 0.95f) return result; +#endif } // Fallback to Global SDF and Global Surface Atlas tracing -#if USE_GLOBAL_SURFACE_ATLAS && CAN_USE_GLOBAL_SURFACE_ATLAS +#if SSR_USE_SDF // Calculate reflection direction (the same TraceScreenSpaceReflection) float3 reflectWS = ScreenSpaceReflectionDirection(input.TexCoord, gBuffer, gBufferData.ViewPos, TemporalEffect, TemporalTime, BRDFBias); + // Raytrace Global SDF GlobalSDFTrace sdfTrace; float maxDistance = GLOBAL_SDF_WORLD_SIZE; sdfTrace.Init(gBuffer.WorldPos, reflectWS, 0.0f, maxDistance); GlobalSDFHit sdfHit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, sdfTrace, 2.0f); if (sdfHit.IsHit()) { + // Sample Global Surface Atlas float3 hitPosition = sdfHit.GetHitPosition(sdfTrace); float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, sdfHit); float4 surfaceAtlas = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, RWGlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -reflectWS, surfaceThreshold); @@ -159,28 +189,27 @@ float4 PS_ResolvePass(Quad_VS2PS input) : SV_Target0 static const float2 Offsets[8] = { float2( 0, 0), - float2( 2, -2), - float2(-2, -2), - float2( 0, 2), - float2(-2, 0), - float2( 0, -2), - float2( 2, 0), - float2( 2, 2), + float2( 1, -1), + float2(-1, -1), + float2( 0, 1), + float2(-1, 0), + float2( 0, -1), + float2( 1, 0), + float2( 1, 1), }; - float2 uv = input.TexCoord; - // Inputs: // Texture0 - ray trace buffer (xy: HDR color, z: weight) // Sample GBuffer GBufferData gBufferData = GetGBufferData(); - GBufferSample gBuffer = SampleGBuffer(gBufferData, uv); + GBufferSample gBuffer = SampleGBuffer(gBufferData, input.TexCoord); + BRANCH if (gBuffer.ShadingModel == SHADING_MODEL_UNLIT) return 0; // Randomize it a little - float2 random = RandN2(uv + TemporalTime); + float2 random = RandN2(input.TexCoord + TemporalTime); float2 blueNoise = random.xy * 2.0 - 1.0; float2x2 offsetRotationMatrix = float2x2(blueNoise.x, blueNoise.y, -blueNoise.y, blueNoise.x); @@ -189,9 +218,9 @@ float4 PS_ResolvePass(Quad_VS2PS input) : SV_Target0 UNROLL for (int i = 0; i < RESOLVE_SAMPLES; i++) { - float2 offsetUV = Offsets[i] * SSRtexelSize; + float2 offsetUV = Offsets[i] * SSRTexelSize; offsetUV = mul(offsetRotationMatrix, offsetUV); - float4 value = Texture0.SampleLevel(SamplerLinearClamp, uv + offsetUV, 0); + float4 value = Texture0.SampleLevel(SamplerLinearClamp, input.TexCoord + offsetUV, 0); #if SSR_REDUCE_HIGHLIGHTS value.rgb /= 1 + Luminance(value.rgb); #endif @@ -224,8 +253,8 @@ float4 PS_TemporalPass(Quad_VS2PS input) : SV_Target0 float2 velocity = Texture2.SampleLevel(SamplerLinearClamp, uv, 0).xy; float2 prevUV = uv - velocity; float4 current = Texture0.SampleLevel(SamplerLinearClamp, uv, 0); - float2 du = float2(SSRtexelSize.x, 0.0); - float2 dv = float2(0.0, SSRtexelSize.y); + float2 du = float2(SSRTexelSize.x, 0.0); + float2 dv = float2(0.0, SSRTexelSize.y); // Sample pixels around float4 currentTopLeft = Texture0.SampleLevel(SamplerLinearClamp, uv.xy - dv - du, 0); diff --git a/Source/ThirdParty/FidelityFX/FidelityFX.Build.cs b/Source/ThirdParty/FidelityFX/FidelityFX.Build.cs new file mode 100644 index 000000000..2057aad9b --- /dev/null +++ b/Source/ThirdParty/FidelityFX/FidelityFX.Build.cs @@ -0,0 +1,22 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using Flax.Build; +using Flax.Build.NativeCpp; + +/// +/// https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK +/// +public class FidelityFX : HeaderOnlyModule +{ + /// + public override void Init() + { + base.Init(); + + LicenseType = LicenseTypes.MIT; + LicenseFilePath = "license.txt"; + + // Merge third-party modules into engine binary + BinaryModuleName = "FlaxEngine"; + } +} diff --git a/Source/ThirdParty/FidelityFX/ffx_sssr.h b/Source/ThirdParty/FidelityFX/ffx_sssr.h new file mode 100644 index 000000000..3e9eeab85 --- /dev/null +++ b/Source/ThirdParty/FidelityFX/ffx_sssr.h @@ -0,0 +1,131 @@ +/********************************************************************** +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef FFX_SSSR +#define FFX_SSSR +#define FFX_SSSR_FLOAT_MAX 3.402823466e+38 + +void FFX_SSSR_InitialAdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_resolution, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, out float3 position, out float current_t) { + float2 current_mip_position = current_mip_resolution * origin.xy; + + // Intersect ray with the half box that is pointing away from the ray origin. + float2 xy_plane = floor(current_mip_position) + floor_offset; + xy_plane = xy_plane * current_mip_resolution_inv + uv_offset; + + // o + d * t = p' => t = (p' - o) / d + float2 t = xy_plane * inv_direction.xy - origin.xy * inv_direction.xy; + current_t = min(t.x, t.y); + position = origin + current_t * direction; +} + +bool FFX_SSSR_AdvanceRay(float3 origin, float3 direction, float3 inv_direction, float2 current_mip_position, float2 current_mip_resolution_inv, float2 floor_offset, float2 uv_offset, float surface_z, inout float3 position, inout float current_t) { + // Create boundary planes + float2 xy_plane = floor(current_mip_position) + floor_offset; + xy_plane = xy_plane * current_mip_resolution_inv + uv_offset; + float3 boundary_planes = float3(xy_plane, surface_z); + + // Intersect ray with the half box that is pointing away from the ray origin. + // o + d * t = p' => t = (p' - o) / d + float3 t = boundary_planes * inv_direction - origin * inv_direction; + + // Prevent using z plane when shooting out of the depth buffer. +#ifdef FFX_SSSR_INVERTED_DEPTH_RANGE + t.z = direction.z < 0 ? t.z : FFX_SSSR_FLOAT_MAX; +#else + t.z = direction.z > 0 ? t.z : FFX_SSSR_FLOAT_MAX; +#endif + + // Choose nearest intersection with a boundary. + float t_min = min(min(t.x, t.y), t.z); + +#ifdef FFX_SSSR_INVERTED_DEPTH_RANGE + // Larger z means closer to the camera. + bool above_surface = surface_z < position.z; +#else + // Smaller z means closer to the camera. + bool above_surface = surface_z > position.z; +#endif + + // Decide whether we are able to advance the ray until we hit the xy boundaries or if we had to clamp it at the surface. + // We use the asuint comparison to avoid NaN / Inf logic, also we actually care about bitwise equality here to see if t_min is the t.z we fed into the min3 above. + bool skipped_tile = asuint(t_min) != asuint(t.z) && above_surface; + + // Make sure to only advance the ray if we're still above the surface. + current_t = above_surface ? t_min : current_t; + + // Advance ray + position = origin + current_t * direction; + + return skipped_tile; +} + +float2 FFX_SSSR_GetMipResolution(float2 screen_dimensions, int mip_level) { + return screen_dimensions * pow(0.5, mip_level); +} + +// Requires origin and direction of the ray to be in screen space [0, 1] x [0, 1] +float3 FFX_SSSR_HierarchicalRaymarch(Texture2D depthBuffer, uint hzbMips, float depthDiffError, out bool uncertainHit, float3 origin, float3 direction, float2 screen_size, int most_detailed_mip, uint max_traversal_intersections, out bool valid_hit) { + const float3 inv_direction = select(direction != 0, 1.0 / direction, FFX_SSSR_FLOAT_MAX); + + // Start on mip with highest detail. + int current_mip = most_detailed_mip; + + // Could recompute these every iteration, but it's faster to hoist them out and update them. + float2 current_mip_resolution = FFX_SSSR_GetMipResolution(screen_size, current_mip); + float2 current_mip_resolution_inv = rcp(current_mip_resolution); + + // Offset to the bounding boxes uv space to intersect the ray with the center of the next pixel. + // This means we ever so slightly over shoot into the next region. + float2 uv_offset = 0.005 * exp2(most_detailed_mip) / screen_size; + uv_offset = select(direction.xy < 0, -uv_offset, uv_offset); + + // Offset applied depending on current mip resolution to move the boundary to the left/right upper/lower border depending on ray direction. + float2 floor_offset = select(direction.xy < 0, 0, 1); + + // Initially advance ray to avoid immediate self intersections. + float current_t; + float3 position; + FFX_SSSR_InitialAdvanceRay(origin, direction, inv_direction, current_mip_resolution, current_mip_resolution_inv, floor_offset, uv_offset, position, current_t); + + uint overDiffError = 0; + int i = 0; + while (i < max_traversal_intersections && current_mip >= most_detailed_mip) { + float2 current_mip_position = current_mip_resolution * position.xy; + float surface_z = depthBuffer.Load(int3(current_mip_position, current_mip)).x; + if (position.z - surface_z > depthDiffError) overDiffError++; // Count number of times we were under the depth by more than the allowed error + bool skipped_tile = FFX_SSSR_AdvanceRay(origin, direction, inv_direction, current_mip_position, current_mip_resolution_inv, floor_offset, uv_offset, surface_z, position, current_t); + ++i; + if (!skipped_tile || current_mip < hzbMips) // Never go too low depth resolution to avoid blocky artifacts + { + current_mip += skipped_tile ? 1 : -1; + current_mip_resolution *= skipped_tile ? 0.5 : 2; + current_mip_resolution_inv *= skipped_tile ? 2 : 0.5; + } + } + + valid_hit = (i <= max_traversal_intersections); + uncertainHit = valid_hit && overDiffError > 3; // If we went over under the surface to detect uncertain hits + + return position; +} + +#endif //FFX_SSSR diff --git a/Source/ThirdParty/FidelityFX/license.txt b/Source/ThirdParty/FidelityFX/license.txt new file mode 100644 index 000000000..324cba594 --- /dev/null +++ b/Source/ThirdParty/FidelityFX/license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE.