diff --git a/Content/Shaders/PostProcessing.flax b/Content/Shaders/PostProcessing.flax index 8747c4eb4..9efa7fc98 100644 --- a/Content/Shaders/PostProcessing.flax +++ b/Content/Shaders/PostProcessing.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05a68ff4987dbd550865098e85e5aad29e12589d541b07826784a9cb2eefbb78 -size 16522 +oid sha256:4b7c6f13b504eaa8f8ef6ba9e74c3210effd12d93aa519a63c19b365058f184d +size 22577 diff --git a/Source/Engine/Graphics/PostProcessSettings.cpp b/Source/Engine/Graphics/PostProcessSettings.cpp index 738cc4505..5e7278638 100644 --- a/Source/Engine/Graphics/PostProcessSettings.cpp +++ b/Source/Engine/Graphics/PostProcessSettings.cpp @@ -42,8 +42,10 @@ void BloomSettings::BlendWith(BloomSettings& other, float weight) BLEND_BOOL(Enabled); BLEND_FLOAT(Intensity); BLEND_FLOAT(Threshold); - BLEND_FLOAT(BlurSigma); - BLEND_FLOAT(Limit); + BLEND_FLOAT(ThresholdKnee); + BLEND_FLOAT(Clamp); + BLEND_FLOAT(BaseMix); + BLEND_FLOAT(HighMix); } void ToneMappingSettings::BlendWith(ToneMappingSettings& other, float weight) diff --git a/Source/Engine/Graphics/PostProcessSettings.h b/Source/Engine/Graphics/PostProcessSettings.h index dc3895954..f3aa5c4a8 100644 --- a/Source/Engine/Graphics/PostProcessSettings.h +++ b/Source/Engine/Graphics/PostProcessSettings.h @@ -415,19 +415,29 @@ API_ENUM(Attributes="Flags") enum class BloomSettingsOverride : int32 Threshold = 1 << 2, /// - /// Overrides property. + /// Overrides property. /// - BlurSigma = 1 << 3, + ThresholdKnee = 1 << 3, /// - /// Overrides property. + /// Overrides property. /// - Limit = 1 << 4, + Clamp = 1 << 4, + + /// + /// Overrides property. + /// + BaseMix = 1 << 5, + + /// + /// Overrides property. + /// + HighMix = 1 << 6, /// /// All properties. /// - All = Enabled | Intensity | Threshold | BlurSigma | Limit, + All = Enabled | Intensity | Threshold | ThresholdKnee | Clamp | BaseMix | HighMix, }; /// @@ -452,28 +462,40 @@ API_STRUCT() struct FLAXENGINE_API BloomSettings : ISerializable bool Enabled = true; /// - /// Bloom effect strength. Set a value of 0 to disabled it, while higher values increase the effect. + /// Overall bloom effect strength. Higher values create a stronger glow effect. /// - API_FIELD(Attributes="Limit(0, 20.0f, 0.01f), EditorOrder(1), PostProcessSetting((int)BloomSettingsOverride.Intensity)") + API_FIELD(Attributes="Limit(0, 100.0f, 0.001f), EditorOrder(1), PostProcessSetting((int)BloomSettingsOverride.Intensity)") float Intensity = 1.0f; /// - /// Minimum pixel brightness value to start blooming. Values below this threshold are skipped. + /// Luminance threshold where bloom begins. /// - API_FIELD(Attributes="Limit(0, 15.0f, 0.01f), EditorOrder(2), PostProcessSetting((int)BloomSettingsOverride.Threshold)") - float Threshold = 3.0f; + API_FIELD(Attributes="Limit(0, 100.0f, 0.1f), EditorOrder(2), PostProcessSetting((int)BloomSettingsOverride.Threshold)") + float Threshold = 1.0f; /// - /// This affects the fall-off of the bloom. It's the standard deviation (sigma) used in the Gaussian blur formula when calculating the kernel of the bloom. + /// Controls the threshold rolloff curve. Higher values create a softer transition. /// - API_FIELD(Attributes="Limit(0, 20.0f, 0.01f), EditorOrder(3), PostProcessSetting((int)BloomSettingsOverride.BlurSigma)") - float BlurSigma = 4.0f; + API_FIELD(Attributes="Limit(0, 100.0f, 0.01f), EditorOrder(3), PostProcessSetting((int)BloomSettingsOverride.ThresholdKnee)") + float ThresholdKnee = 0.5f; /// - /// Bloom effect brightness limit. Pixels with higher luminance will be capped to this brightness level. + /// Maximum brightness limit for bloom highlights. /// - API_FIELD(Attributes="Limit(0, 100.0f, 0.01f), EditorOrder(4), PostProcessSetting((int)BloomSettingsOverride.Limit)") - float Limit = 10.0f; + API_FIELD(Attributes="Limit(0, 100.0f, 0.1f), EditorOrder(4), PostProcessSetting((int)BloomSettingsOverride.Clamp)") + float Clamp = 3.0f; + + /// + /// Base mip contribution for wider, softer bloom. + /// + API_FIELD(Attributes="Limit(0, 1.0f, 0.01f), EditorOrder(5), PostProcessSetting((int)BloomSettingsOverride.BaseMix)") + float BaseMix = 0.6f; + + /// + /// High mip contribution for tighter, core bloom. + /// + API_FIELD(Attributes="Limit(0, 1.0f, 0.01f), EditorOrder(6), PostProcessSetting((int)BloomSettingsOverride.HighMix)") + float HighMix = 1.0f; public: /// @@ -487,7 +509,7 @@ public: /// /// The structure members override flags. /// -API_ENUM(Attributes="Flags") enum class ToneMappingSettingsOverride : int32 +API_ENUM(Attributes ="Flags") enum class ToneMappingSettingsOverride : int32 { /// /// None properties. @@ -1937,25 +1959,25 @@ API_STRUCT() struct FLAXENGINE_API AntiAliasingSettings : ISerializable /// /// The sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. /// - API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(10), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_SharpeningAmount), EditorDisplay(null, \"CAS Sharpening Amount\"), VisibleIf(nameof(ShowTAASettings), true)") + API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(10), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_SharpeningAmount), EditorDisplay(null, \"CAS Sharpening Amount\"), VisibleIf(nameof(ShowTAASettings), true)") float CAS_SharpeningAmount = 0.0f; /// /// The edge sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. /// - API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(11), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_EdgeSharpening), EditorDisplay(null, \"CAS Edge Sharpening\"), VisibleIf(nameof(ShowTAASettings), true)") + API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(11), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_EdgeSharpening), EditorDisplay(null, \"CAS Edge Sharpening\"), VisibleIf(nameof(ShowTAASettings), true)") float CAS_EdgeSharpening = 0.5f; /// /// The minimum edge threshold for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. /// - API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(12), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_MinEdgeThreshold), EditorDisplay(null, \"CAS Min Edge Threshold\"), VisibleIf(nameof(ShowTAASettings), true)") + API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(12), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_MinEdgeThreshold), EditorDisplay(null, \"CAS Min Edge Threshold\"), VisibleIf(nameof(ShowTAASettings), true)") float CAS_MinEdgeThreshold = 0.03f; /// /// The over-blur limit for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter. /// - API_FIELD(Attributes = "Limit(0, 100f, 0.001f), EditorOrder(13), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_OverBlurLimit), EditorDisplay(null, \"CAS Over-blur Limit\"), VisibleIf(nameof(ShowTAASettings), true)") + API_FIELD(Attributes="Limit(0, 100f, 0.001f), EditorOrder(13), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_OverBlurLimit), EditorDisplay(null, \"CAS Over-blur Limit\"), VisibleIf(nameof(ShowTAASettings), true)") float CAS_OverBlurLimit = 1.0f; public: diff --git a/Source/Engine/Renderer/PostProcessingPass.cpp b/Source/Engine/Renderer/PostProcessingPass.cpp index 99831b430..3122e847d 100644 --- a/Source/Engine/Renderer/PostProcessingPass.cpp +++ b/Source/Engine/Renderer/PostProcessingPass.cpp @@ -9,18 +9,61 @@ #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Engine/Time.h" -PostProcessingPass::PostProcessingPass() - : _shader(nullptr) - , _psThreshold(nullptr) - , _psScale(nullptr) - , _psBlurH(nullptr) - , _psBlurV(nullptr) - , _psGenGhosts(nullptr) - , _defaultLensColor(nullptr) - , _defaultLensStar(nullptr) - , _defaultLensDirt(nullptr) -{ -} +#define GB_RADIUS 6 +#define GB_KERNEL_SIZE (GB_RADIUS * 2 + 1) + +GPU_CB_STRUCT(Data{ + float BloomIntensity; // Overall bloom strength multiplier + float BloomClamp; // Maximum brightness limit for bloom + float BloomThreshold; // Luminance threshold where bloom begins + float BloomThresholdKnee; // Controls the threshold rolloff curve + + float BloomBaseMix; // Base mip contribution + float BloomHighMix; // High mip contribution + float BloomMipCount; + float BloomLayer; + + Float3 VignetteColor; + float VignetteShapeFactor; + + Float2 InputSize; + float InputAspect; + float GrainAmount; + + float GrainTime; + float GrainParticleSize; + int32 Ghosts; + float HaloWidth; + + float HaloIntensity; + float Distortion; + float GhostDispersal; + float LensFlareIntensity; + + Float2 LensInputDistortion; + float LensScale; + float LensBias; + + Float2 InvInputSize; + float ChromaticDistortion; + float Time; + + float Dummy1; + float PostExposure; + float VignetteIntensity; + float LensDirtIntensity; + + Color ScreenFadeColor; + + Matrix LensFlareStarMat; + }); + +GPU_CB_STRUCT(GaussianBlurData{ + Float2 Size; + float Dummy3; + float Dummy4; + Float4 GaussianBlurCache[GB_KERNEL_SIZE]; // x-weight, y-offset + }); String PostProcessingPass::ToString() const { @@ -30,8 +73,9 @@ String PostProcessingPass::ToString() const bool PostProcessingPass::Init() { // Create pipeline states - _psThreshold = GPUDevice::Instance->CreatePipelineState(); - _psScale = GPUDevice::Instance->CreatePipelineState(); + _psBloomBrightPass = GPUDevice::Instance->CreatePipelineState(); + _psBloomDownsample = GPUDevice::Instance->CreatePipelineState(); + _psBloomDualFilterUpsample = GPUDevice::Instance->CreatePipelineState(); _psBlurH = GPUDevice::Instance->CreatePipelineState(); _psBlurV = GPUDevice::Instance->CreatePipelineState(); _psGenGhosts = GPUDevice::Instance->CreatePipelineState(); @@ -69,16 +113,22 @@ bool PostProcessingPass::setupResources() // Create pipeline stages GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; - if (!_psThreshold->IsValid()) + if (!_psBloomBrightPass->IsValid()) { - psDesc.PS = shader->GetPS("PS_Threshold"); - if (_psThreshold->Init(psDesc)) + psDesc.PS = shader->GetPS("PS_BloomBrightPass"); + if (_psBloomBrightPass->Init(psDesc)) return true; } - if (!_psScale->IsValid()) + if (!_psBloomDownsample->IsValid()) { - psDesc.PS = shader->GetPS("PS_Scale"); - if (_psScale->Init(psDesc)) + psDesc.PS = shader->GetPS("PS_BloomDownsample"); + if (_psBloomDownsample->Init(psDesc)) + return true; + } + if (!_psBloomDualFilterUpsample->IsValid()) + { + psDesc.PS = shader->GetPS("PS_BloomDualFilterUpsample"); + if (_psBloomDualFilterUpsample->Init(psDesc)) return true; } if (!_psBlurH->IsValid()) @@ -108,7 +158,7 @@ bool PostProcessingPass::setupResources() return false; } -GPUTexture* PostProcessingPass::getCustomOrDefault(Texture* customTexture, AssetReference& defaultTexture, const Char* defaultName) +GPUTexture* GetCustomOrDefault(Texture* customTexture, AssetReference& defaultTexture, const Char* defaultName) { // Check if use custom texture if (customTexture) @@ -125,7 +175,15 @@ GPUTexture* PostProcessingPass::getCustomOrDefault(Texture* customTexture, Asset return defaultTexture ? defaultTexture->GetTexture() : nullptr; } -void PostProcessingPass::GB_ComputeKernel(float sigma, float width, float height) +/// +/// Calculates the Gaussian blur filter kernel. This implementation is +/// ported from the original Java code appearing in chapter 16 of +/// "Filthy Rich Clients: Developing Animated and Graphical Effects for Desktop Java". +/// +/// Gaussian Blur sigma parameter +/// Texture to blur width in pixels +/// Texture to blur height in pixels +void GB_ComputeKernel(float sigma, float width, float height, Float4 gaussianBlurCacheH[GB_KERNEL_SIZE], Float4 gaussianBlurCacheV[GB_KERNEL_SIZE]) { float total = 0.0f; float twoSigmaSquare = 2.0f * sigma * sigma; @@ -146,19 +204,16 @@ void PostProcessingPass::GB_ComputeKernel(float sigma, float width, float height // Calculate total weights sum total += weight; - GaussianBlurCacheH[index] = Float4(weight, i * xOffset, 0, 0); - GaussianBlurCacheV[index] = Float4(weight, i * yOffset, 0, 0); + gaussianBlurCacheH[index] = Float4(weight, i * xOffset, 0, 0); + gaussianBlurCacheV[index] = Float4(weight, i * yOffset, 0, 0); } // Normalize weights for (int32 i = 0; i < GB_KERNEL_SIZE; i++) { - GaussianBlurCacheH[i].X /= total; - GaussianBlurCacheV[i].X /= total; + gaussianBlurCacheH[i].X /= total; + gaussianBlurCacheV[i].X /= total; } - - // Assign size - _gbData.Size = Float2(width, height); } void PostProcessingPass::Dispose() @@ -167,8 +222,9 @@ void PostProcessingPass::Dispose() RendererPass::Dispose(); // Cleanup - SAFE_DELETE_GPU_RESOURCE(_psThreshold); - SAFE_DELETE_GPU_RESOURCE(_psScale); + SAFE_DELETE_GPU_RESOURCE(_psBloomBrightPass); + SAFE_DELETE_GPU_RESOURCE(_psBloomDownsample); + SAFE_DELETE_GPU_RESOURCE(_psBloomDualFilterUpsample); SAFE_DELETE_GPU_RESOURCE(_psBlurH); SAFE_DELETE_GPU_RESOURCE(_psBlurV); SAFE_DELETE_GPU_RESOURCE(_psGenGhosts); @@ -179,13 +235,30 @@ void PostProcessingPass::Dispose() _defaultLensStar = nullptr; } +int32 CalculateBloomMipCount(int32 width, int32 height) +{ + // Calculate the smallest dimension + int32 minDimension = Math::Min(width, height); + + // Calculate how many times we can half the dimension until we hit a minimum size + // (e.g., 16x16 pixels as the smallest mip) + const int32 MIN_MIP_SIZE = 16; + int32 mipCount = 1; + while (minDimension > MIN_MIP_SIZE) + { + minDimension /= 2; + mipCount++; + } + return mipCount; +} + void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, GPUTexture* output, GPUTexture* colorGradingLUT) { PROFILE_GPU_CPU("Post Processing"); auto device = GPUDevice::Instance; auto context = device->GetMainContext(); auto& view = renderContext.View; - + context->ResetRenderTarget(); PostProcessSettings& settings = renderContext.List->Settings; @@ -204,8 +277,10 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, int32 h4 = h2 >> 1; int32 h8 = h4 >> 1; + int32 bloomMipCount = CalculateBloomMipCount(w1, h1); + // Ensure to have valid data and if at least one effect should be applied - if (!(useBloom || useToneMapping || useCameraArtifacts) || checkIfSkipPass() || w8 == 0 || h8 ==0) + if (!(useBloom || useToneMapping || useCameraArtifacts) || checkIfSkipPass() || w8 == 0 || h8 == 0) { // Resources are missing. Do not perform rendering. Just copy raw frame context->SetViewportAndScissors((float)output->Width(), (float)output->Height()); @@ -245,14 +320,18 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, } if (useBloom) { - data.BloomMagnitude = settings.Bloom.Intensity; + data.BloomIntensity = settings.Bloom.Intensity; + data.BloomClamp = settings.Bloom.Clamp; data.BloomThreshold = settings.Bloom.Threshold; - data.BloomBlurSigma = Math::Max(settings.Bloom.BlurSigma, 0.0001f); - data.BloomLimit = settings.Bloom.Limit; + data.BloomThresholdKnee = settings.Bloom.ThresholdKnee; + data.BloomBaseMix = settings.Bloom.BaseMix; + data.BloomHighMix = settings.Bloom.HighMix; + data.BloomMipCount = (float)bloomMipCount; + data.BloomLayer = 0.0f; } else { - data.BloomMagnitude = 0; + data.BloomIntensity = 0; } if (useLensFlares) { @@ -298,94 +377,71 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, //////////////////////////////////////////////////////////////////////////////////// // Bloom - auto tempDesc = GPUTextureDescription::New2D(w2, h2, 0, output->Format(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews); - auto bloomTmp1 = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(bloomTmp1, "PostProcessing.Bloom"); - // TODO: bloomTmp2 could be quarter res because we don't use it's first mip - auto bloomTmp2 = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(bloomTmp2, "PostProcessing.Bloom"); + auto tempDesc = GPUTextureDescription::New2D(w2, h2, bloomMipCount, output->Format(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews); + auto bloomBuffer1 = RenderTargetPool::Get(tempDesc); + RENDER_TARGET_POOL_SET_NAME(bloomBuffer1, "PostProcessing.Bloom"); + auto bloomBuffer2 = RenderTargetPool::Get(tempDesc); + RENDER_TARGET_POOL_SET_NAME(bloomBuffer2, "PostProcessing.Bloom"); + + for (int32 mip = 0; mip < bloomMipCount; mip++) + { + context->Clear(bloomBuffer1->View(0, mip), Color::Transparent); + context->Clear(bloomBuffer2->View(0, mip), Color::Transparent); + } - // Check if use bloom if (useBloom) { - // Bloom Threshold and downscale to 1/2 - context->SetRenderTarget(bloomTmp1->View(0, 0)); + context->SetRenderTarget(bloomBuffer1->View(0, 0)); context->SetViewportAndScissors((float)w2, (float)h2); context->BindSR(0, input->View()); - context->SetState(_psThreshold); + context->SetState(_psBloomBrightPass); context->DrawFullscreenTriangle(); context->ResetRenderTarget(); - // Downscale to 1/4 - context->SetRenderTarget(bloomTmp1->View(0, 1)); - context->SetViewportAndScissors((float)w4, (float)h4); - context->BindSR(0, bloomTmp1->View(0, 0)); - context->SetState(_psScale); - context->DrawFullscreenTriangle(); - context->ResetRenderTarget(); - - // Downscale to 1/8 - context->SetRenderTarget(bloomTmp1->View(0, 2)); - context->SetViewportAndScissors((float)w8, (float)h8); - context->BindSR(0, bloomTmp1->View(0, 1)); - context->SetState(_psScale); - context->DrawFullscreenTriangle(); - context->ResetRenderTarget(); - - // TODO: perform blur when downscaling (13 tap) and when upscaling? (9 tap) - - // Gaussian Blur - GB_ComputeKernel(data.BloomBlurSigma, static_cast(w8), static_cast(h8)); - //int32 blurStages = (int)Rendering.Quality + 1; - int32 blurStages = 2; - for (int32 i = 0; i < blurStages; i++) + // Progressive downsamples + for (int32 mip = 1; mip < bloomMipCount; mip++) { - // Horizontal Bloom Blur - Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH)); - context->UpdateCB(cb1, &_gbData); - context->BindCB(1, cb1); - // - context->SetRenderTarget(bloomTmp2->View(0, 2)); - context->BindSR(0, bloomTmp1->View(0, 2)); - context->SetState(_psBlurH); - context->DrawFullscreenTriangle(); - context->ResetRenderTarget(); + const int32 mipWidth = w2 >> mip; + const int32 mipHeight = h2 >> mip; - // Vertical Bloom Blur - Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV)); - context->UpdateCB(cb1, &_gbData); - context->BindCB(1, cb1); - // - context->SetRenderTarget(bloomTmp1->View(0, 2)); - context->BindSR(0, bloomTmp2->View(0, 2)); - context->SetState(_psBlurV); + context->SetRenderTarget(bloomBuffer1->View(0, mip)); + context->SetViewportAndScissors((float)mipWidth, (float)mipHeight); + context->BindSR(0, bloomBuffer1->View(0, mip - 1)); + context->SetState(_psBloomDownsample); context->DrawFullscreenTriangle(); context->ResetRenderTarget(); } - // Upscale to 1/4 (use second tmp target to cache that downscale thress data for lens flares) - context->SetRenderTarget(bloomTmp2->View(0, 1)); - context->SetViewportAndScissors((float)w4, (float)h4); - context->BindSR(0, bloomTmp1->View(0, 2)); - context->SetState(_psScale); - context->DrawFullscreenTriangle(); - context->ResetRenderTarget(); + // Progressive upsamples + for (int32 mip = bloomMipCount - 2; mip >= 0; mip--) + { + auto upscaleBuffer = bloomBuffer2; + if (mip == bloomMipCount - 2) + { + // If it's the first, copy the chain over + upscaleBuffer = bloomBuffer1; + } + const int32 mipWidth = w2 >> mip; + const int32 mipHeight = h2 >> mip; - // Upscale to 1/2 - context->SetRenderTarget(bloomTmp1->View(0, 0)); - context->SetViewportAndScissors((float)w2, (float)h2); - context->BindSR(0, bloomTmp2->View(0, 1)); - context->SetState(_psScale); - context->DrawFullscreenTriangle(); - context->ResetRenderTarget(); + data.BloomLayer = static_cast(mip); + context->UpdateCB(cb0, &data); + context->SetRenderTarget(bloomBuffer2->View(0, mip)); + context->SetViewportAndScissors((float)mipWidth, (float)mipHeight); + context->BindSR(0, upscaleBuffer->View(0, mip + 1)); + context->BindSR(1, bloomBuffer1->View(0, mip + 1)); + context->SetState(_psBloomDualFilterUpsample); + context->DrawFullscreenTriangle(); + context->ResetRenderTarget(); + } - // Set bloom + // Set bloom output context->UnBindSR(0); - context->BindSR(2, bloomTmp1->View(0, 0)); + context->UnBindSR(1); + context->BindSR(2, bloomBuffer2->View(0, 0)); } else { - // No bloom texture context->UnBindSR(2); } @@ -396,43 +452,47 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, if (useLensFlares) { // Prepare lens flares helper textures - context->BindSR(5, getCustomOrDefault(settings.LensFlares.LensStar, _defaultLensStar, TEXT("Engine/Textures/DefaultLensStarburst"))); - context->BindSR(6, getCustomOrDefault(settings.LensFlares.LensColor, _defaultLensColor, TEXT("Engine/Textures/DefaultLensColor"))); + context->BindSR(5, GetCustomOrDefault(settings.LensFlares.LensStar, _defaultLensStar, TEXT("Engine/Textures/DefaultLensStarburst"))); + context->BindSR(6, GetCustomOrDefault(settings.LensFlares.LensColor, _defaultLensColor, TEXT("Engine/Textures/DefaultLensColor"))); // Render lens flares - context->SetRenderTarget(bloomTmp2->View(0, 1)); + context->SetRenderTarget(bloomBuffer2->View(0, 1)); context->SetViewportAndScissors((float)w4, (float)h4); - context->BindSR(3, bloomTmp1->View(0, 1)); + context->BindSR(3, bloomBuffer1->View(0, 1)); // Use mip 1 of bloomBuffer1 as source context->SetState(_psGenGhosts); context->DrawFullscreenTriangle(); context->ResetRenderTarget(); context->UnBindSR(3); // Gaussian blur kernel - GB_ComputeKernel(2.0f, static_cast(w4), static_cast(h4)); + GaussianBlurData gbData; + Float4 GaussianBlurCacheH[GB_KERNEL_SIZE]; + Float4 GaussianBlurCacheV[GB_KERNEL_SIZE]; + gbData.Size = Float2(static_cast(w4), static_cast(h4)); + GB_ComputeKernel(2.0f, gbData.Size.X, gbData.Size.Y, GaussianBlurCacheH, GaussianBlurCacheV); // Gaussian blur H - Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH)); - context->UpdateCB(cb1, &_gbData); + Platform::MemoryCopy(gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH)); + context->UpdateCB(cb1, &gbData); context->BindCB(1, cb1); - context->SetRenderTarget(bloomTmp1->View(0, 1)); - context->BindSR(0, bloomTmp2->View(0, 1)); + context->SetRenderTarget(bloomBuffer1->View(0, 1)); + context->BindSR(0, bloomBuffer2->View(0, 1)); context->SetState(_psBlurH); context->DrawFullscreenTriangle(); context->ResetRenderTarget(); // Gaussian blur V - Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV)); - context->UpdateCB(cb1, &_gbData); + Platform::MemoryCopy(gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV)); + context->UpdateCB(cb1, &gbData); context->BindCB(1, cb1); - context->SetRenderTarget(bloomTmp2->View(0, 1)); - context->BindSR(0, bloomTmp1->View(0, 1)); + context->SetRenderTarget(bloomBuffer2->View(0, 1)); + context->BindSR(0, bloomBuffer1->View(0, 1)); context->SetState(_psBlurV); context->DrawFullscreenTriangle(); context->ResetRenderTarget(); // Set lens flares output - context->BindSR(3, bloomTmp2->View(0, 1)); + context->BindSR(3, bloomBuffer2->View(0, 1)); } else { @@ -472,7 +532,7 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, // - 5 - LensStar - lens star texture // - 7 - ColorGradingLUT context->BindSR(0, input->View()); - context->BindSR(4, getCustomOrDefault(settings.LensFlares.LensDirt, _defaultLensDirt, TEXT("Engine/Textures/DefaultLensDirt"))); + context->BindSR(4, GetCustomOrDefault(settings.LensFlares.LensDirt, _defaultLensDirt, TEXT("Engine/Textures/DefaultLensDirt"))); context->BindSR(7, colorGradingLutView); // Composite final frame during single pass (done in full resolution) @@ -482,6 +542,6 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, context->DrawFullscreenTriangle(); // Cleanup - RenderTargetPool::Release(bloomTmp1); - RenderTargetPool::Release(bloomTmp2); + RenderTargetPool::Release(bloomBuffer1); + RenderTargetPool::Release(bloomBuffer2); } diff --git a/Source/Engine/Renderer/PostProcessingPass.h b/Source/Engine/Renderer/PostProcessingPass.h index 22ef5eafc..ed45be1ea 100644 --- a/Source/Engine/Renderer/PostProcessingPass.h +++ b/Source/Engine/Renderer/PostProcessingPass.h @@ -5,90 +5,25 @@ #include "RendererPass.h" #include "Engine/Graphics/GPUPipelineStatePermutations.h" -#define GB_RADIUS 6 -#define GB_KERNEL_SIZE (GB_RADIUS * 2 + 1) - /// -/// Post processing rendering service +/// Post-processing rendering service. /// class PostProcessingPass : public RendererPass { private: - - GPU_CB_STRUCT(Data { - float BloomLimit; - float BloomThreshold; - float BloomMagnitude; - float BloomBlurSigma; - - Float3 VignetteColor; - float VignetteShapeFactor; - - Float2 InputSize; - float InputAspect; - float GrainAmount; - - float GrainTime; - float GrainParticleSize; - int32 Ghosts; - float HaloWidth; - - float HaloIntensity; - float Distortion; - float GhostDispersal; - float LensFlareIntensity; - - Float2 LensInputDistortion; - float LensScale; - float LensBias; - - Float2 InvInputSize; - float ChromaticDistortion; - float Time; - - float Dummy1; - float PostExposure; - float VignetteIntensity; - float LensDirtIntensity; - - Color ScreenFadeColor; - - Matrix LensFlareStarMat; - }); - - GPU_CB_STRUCT(GaussianBlurData { - Float2 Size; - float Dummy3; - float Dummy4; - Float4 GaussianBlurCache[GB_KERNEL_SIZE]; // x-weight, y-offset - }); - - // Post Processing AssetReference _shader; - GPUPipelineState* _psThreshold; - GPUPipelineState* _psScale; - GPUPipelineState* _psBlurH; - GPUPipelineState* _psBlurV; - GPUPipelineState* _psGenGhosts; + GPUPipelineState* _psBloomBrightPass = nullptr; + GPUPipelineState* _psBloomDownsample = nullptr; + GPUPipelineState* _psBloomDualFilterUpsample = nullptr; + GPUPipelineState* _psBlurH = nullptr; + GPUPipelineState* _psBlurV = nullptr; + GPUPipelineState* _psGenGhosts = nullptr; GPUPipelineStatePermutationsPs<3> _psComposite; - - GaussianBlurData _gbData; - Float4 GaussianBlurCacheH[GB_KERNEL_SIZE]; - Float4 GaussianBlurCacheV[GB_KERNEL_SIZE]; - AssetReference _defaultLensColor; AssetReference _defaultLensStar; AssetReference _defaultLensDirt; public: - - /// - /// Init - /// - PostProcessingPass(); - -public: - /// /// Perform postFx rendering for the input task /// @@ -99,24 +34,12 @@ public: void Render(RenderContext& renderContext, GPUTexture* input, GPUTexture* output, GPUTexture* colorGradingLUT); private: - - GPUTexture* getCustomOrDefault(Texture* customTexture, AssetReference& defaultTexture, const Char* defaultName); - - /// - /// Calculates the Gaussian blur filter kernel. This implementation is - /// ported from the original Java code appearing in chapter 16 of - /// "Filthy Rich Clients: Developing Animated and Graphical Effects for Desktop Java". - /// - /// Gaussian Blur sigma parameter - /// Texture to blur width in pixels - /// Texture to blur height in pixels - void GB_ComputeKernel(float sigma, float width, float height); - #if COMPILE_WITH_DEV_ENV void OnShaderReloading(Asset* obj) { - _psThreshold->ReleaseGPU(); - _psScale->ReleaseGPU(); + _psBloomBrightPass->ReleaseGPU(); + _psBloomDownsample->ReleaseGPU(); + _psBloomDualFilterUpsample->ReleaseGPU(); _psBlurH->ReleaseGPU(); _psBlurV->ReleaseGPU(); _psGenGhosts->ReleaseGPU(); @@ -126,14 +49,12 @@ private: #endif public: - // [RendererPass] String ToString() const override; bool Init() override; void Dispose() override; protected: - // [RendererPass] bool setupResources() override; }; diff --git a/Source/Shaders/PostProcessing.shader b/Source/Shaders/PostProcessing.shader index c54337b2e..067b0efd3 100644 --- a/Source/Shaders/PostProcessing.shader +++ b/Source/Shaders/PostProcessing.shader @@ -36,10 +36,15 @@ META_CB_BEGIN(0, Data) -float BloomLimit; -float BloomThreshold; -float BloomMagnitude; -float BloomBlurSigma; +float BloomIntensity; +float BloomClamp; +float BloomThreshold; +float BloomThresholdKnee; + +float BloomBaseMix; +float BloomHighMix; +float BloomMipCount; +float BloomLayer; float3 VignetteColor; float VignetteShapeFactor; @@ -254,31 +259,230 @@ float2 coordRot(in float2 tc, in float angle) // Uses a lower exposure to produce a value suitable for a bloom pass META_PS(true, FEATURE_LEVEL_ES2) -float4 PS_Threshold(Quad_VS2PS input) : SV_Target +float4 PS_BloomBrightPass(Quad_VS2PS input) : SV_Target { - float4 color = Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0); - return clamp(color - BloomThreshold, 0, BloomLimit); + // Get dimensions for precise texel calculations + uint width, height; + Input0.GetDimensions(width, height); + float2 texelSize = 1.0 / float2(width, height); + // Use fixed 13-tap sample pattern for initial bright pass + float3 color = 0; + float totalWeight = 0; + + // Center sample with high weight for energy preservation + float3 center = Input0.Sample(SamplerLinearClamp, input.TexCoord).rgb; + + // Apply Karis average to prevent bright pixels from dominating + float centerLuma = max(dot(center, float3(0.2126, 0.7152, 0.0722)), 0.0001); + center = center / (1.0 + centerLuma); + + float centerWeight = 4.0; + color += center * centerWeight; + totalWeight += centerWeight; + + // Inner ring - fixed offset at 1.0 texel distance + UNROLL + for (int i = 0; i < 4; i++) + { + float angle = i * (PI / 2.0); + float2 offset = float2(cos(angle), sin(angle)) * texelSize; + float3 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset).rgb; + + // Apply Karis average + float sampleLuma = max(dot(sample, float3(0.2126, 0.7152, 0.0722)), 0.0001); + sample = sample / (1.0 + sampleLuma); + + float weight = 2.0; + color += sample * weight; + totalWeight += weight; + } + + // Outer ring - fixed offset at 1.4142 texel distance (diagonal) + UNROLL + for (int j = 0; j < 8; j++) + { + float angle = j * (PI / 4.0); + float2 offset = float2(cos(angle), sin(angle)) * texelSize * 1.4142; + float3 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset).rgb; + + // Apply Karis average + float sampleLuma = max(dot(sample, float3(0.2126, 0.7152, 0.0722)), 0.0001); + sample = sample / (1.0 + sampleLuma); + + float weight = 1.0; + color += sample * weight; + totalWeight += weight; + } + color /= totalWeight; + + // Un-apply Karis average to maintain energy + float finalLuma = max(dot(color, float3(0.2126, 0.7152, 0.0722)), 0.0001); + color = color * (1.0 + finalLuma); + + // Apply threshold with quadratic rolloff for smoother transition + float luminance = dot(color, float3(0.2126, 0.7152, 0.0722)); + float threshold = max(BloomThreshold, 0.2); + float knee = threshold * BloomThresholdKnee; + float softMax = threshold + knee; + + float contribution = 0; + if (luminance > threshold) + { + if (luminance < softMax) + { + // Quadratic softening between threshold and (threshold + knee) + float x = (luminance - threshold) / knee; + contribution = x * x * 0.5; + } + else + { + // Full contribution above softMax + contribution = luminance - threshold; + } + } + + float testc = BloomClamp; + float3 clamped = (color * contribution); + clamped.r = min(clamped.r, testc); + clamped.g = min(clamped.g, testc); + clamped.b = min(clamped.b, testc); + + // Store threshold result in alpha for downsample chain + return float4(clamped, luminance); } -// Uses hw bilinear filtering for upscaling or downscaling META_PS(true, FEATURE_LEVEL_ES2) -float4 PS_Scale(Quad_VS2PS input) : SV_Target +float4 PS_BloomDownsample(Quad_VS2PS input) : SV_Target { - // TODO: we could use quality switch for bloom effect + uint width, height; + Input0.GetDimensions(width, height); + float2 texelSize = 1.0 / float2(width, height); - return Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0); - /* - float3 color; - // TODO: use gather for dx11 and dx12?? - color = Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0, 0)).rgb; - color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0, 1)).rgb; - color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0,-1)).rgb; - color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2(-1, 0)).rgb; - color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 1, 0)).rgb; - color *= (1.0f / 5.0f); + // 9-tap tent filter with fixed weights + float3 color = 0; + float totalWeight = 0; - return float4(color, 1); - */ + // Sample offsets (fixed) + const float2 offsets[9] = + { + float2( 0, 0), // Center + float2(-1, -1), // Corners + float2( 1, -1), + float2(-1, 1), + float2( 1, 1), + float2( 0, -1), // Cross + float2(-1, 0), + float2( 1, 0), + float2( 0, 1) + }; + + // Sample weights (fixed) + const float weights[9] = + { + 4.0, // Center + 1.0, // Corners + 1.0, + 1.0, + 1.0, + 2.0, // Cross + 2.0, + 2.0, + 2.0 + }; + + UNROLL + for (int i = 0; i < 9; i++) + { + float2 offset = offsets[i] * texelSize * 2.0; // Fixed scale factor for stability + float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset); + color += sample.rgb * weights[i]; + totalWeight += weights[i]; + } + + return float4(color / totalWeight, 1.0); +} + +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_BloomDualFilterUpsample(Quad_VS2PS input) : SV_Target +{ + float anisotropy = 1.0; + uint width, height; + Input0.GetDimensions(width, height); + float2 texelSize = 1.0 / float2(width, height); + + // Maintain fixed scale through mip chain + float baseOffset = 1.0; + float offsetScale = (1.0) * baseOffset; + float3 color = 0; + float totalWeight = 0; + + // Center + float4 center = Input0.Sample(SamplerLinearClamp, input.TexCoord); + float centerWeight = 4.0; + color += center.rgb * centerWeight; + totalWeight += centerWeight; + + // Cross - fixed distance samples + float2 crossOffsets[4] = { + float2(offsetScale * anisotropy, 0), + float2(-offsetScale * anisotropy, 0), + float2(0, offsetScale), + float2(0, -offsetScale) + }; + + UNROLL + for (int i = 0; i < 4; i++) + { + float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + crossOffsets[i] * texelSize); + float weight = 2.0; + color += sample.rgb * weight; + totalWeight += weight; + } + + // Corners - fixed distance samples + float2 cornerOffsets[4] = + { + float2(offsetScale * anisotropy, offsetScale), + float2(-offsetScale * anisotropy, offsetScale), + float2(offsetScale * anisotropy, -offsetScale), + float2(-offsetScale * anisotropy, -offsetScale) + }; + + UNROLL + for (int j = 0; j < 4; j++) + { + float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + cornerOffsets[j] * texelSize); + float weight = 1.0; + color += sample.rgb * weight; + totalWeight += weight; + } + + color /= totalWeight; + + uint width1, height1; + Input1.GetDimensions(width1, height1); + + // Calculate mip fade factor (0 = smallest mip, 1 = largest mip) + float mipFade = BloomLayer / (BloomMipCount - 1); + + // Muzz says: + // Lerp between your desired intensity values based on mip level + // setting both to 0.6 is a decent default, but playing with these numbers will let you dial in the blending between the lowest and highest mips. + // you can make some really ugly bloom if you go too far. + // note this does change the intensity of the bloom. + // This was my own invention + + float mipIntensity = lerp(BloomBaseMix, BloomHighMix, mipFade); + color *= mipIntensity; + + BRANCH + if (width1 > 0) + { + float3 previousMip = Input1.Sample(SamplerLinearClamp, input.TexCoord).rgb; + color += previousMip; + } + + return float4(color, 1.0); } // Horizontal gaussian blur @@ -286,13 +490,11 @@ META_PS(true, FEATURE_LEVEL_ES2) float4 PS_GaussainBlurH(Quad_VS2PS input) : SV_Target { float4 color = 0; - UNROLL for (int i = 0; i < GB_KERNEL_SIZE; i++) { color += Input0.Sample(SamplerLinearClamp, input.TexCoord + float2(GaussianBlurCache[i].y, 0.0)) * GaussianBlurCache[i].x; } - return color; } @@ -301,13 +503,11 @@ META_PS(true, FEATURE_LEVEL_ES2) float4 PS_GaussainBlurV(Quad_VS2PS input) : SV_Target { float4 color = 0; - UNROLL for (int i = 0; i < GB_KERNEL_SIZE; i++) { color += Input0.Sample(SamplerLinearClamp, input.TexCoord + float2(0.0, GaussianBlurCache[i].y)) * GaussianBlurCache[i].x; } - return color; } @@ -471,18 +671,16 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target color.rgb += lensFlares; } - // Bloom - BRANCH - if (BloomMagnitude > 0) - { - // Sample the bloom - float3 bloom = Input2.SampleLevel(SamplerLinearClamp, uv, 0).rgb; - bloom = bloom * BloomMagnitude; - - // Accumulate final bloom lght - lensLight += max(0, bloom * 3.0f + (- 1.0f * 3.0f)); - color.rgb += bloom; - } + // Bloom + BRANCH + if (BloomIntensity > 0) + { + // Sample the final bloom result + float3 bloom = Input2.Sample(SamplerLinearClamp, input.TexCoord).rgb; + bloom = bloom * BloomIntensity; + lensLight += max(0, bloom * 3.0f + (-1.0f * 3.0f)); + color.rgb += bloom; + } // Lens Dirt float3 lensDirt = LensDirt.SampleLevel(SamplerLinearClamp, uv, 0).rgb;