diff --git a/Content/Shaders/PostProcessing.flax b/Content/Shaders/PostProcessing.flax
index 8747c4eb4..9efa7fc98 100644
--- a/Content/Shaders/PostProcessing.flax
+++ b/Content/Shaders/PostProcessing.flax
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:05a68ff4987dbd550865098e85e5aad29e12589d541b07826784a9cb2eefbb78
-size 16522
+oid sha256:4b7c6f13b504eaa8f8ef6ba9e74c3210effd12d93aa519a63c19b365058f184d
+size 22577
diff --git a/Source/Engine/Graphics/PostProcessSettings.cpp b/Source/Engine/Graphics/PostProcessSettings.cpp
index 738cc4505..5e7278638 100644
--- a/Source/Engine/Graphics/PostProcessSettings.cpp
+++ b/Source/Engine/Graphics/PostProcessSettings.cpp
@@ -42,8 +42,10 @@ void BloomSettings::BlendWith(BloomSettings& other, float weight)
BLEND_BOOL(Enabled);
BLEND_FLOAT(Intensity);
BLEND_FLOAT(Threshold);
- BLEND_FLOAT(BlurSigma);
- BLEND_FLOAT(Limit);
+ BLEND_FLOAT(ThresholdKnee);
+ BLEND_FLOAT(Clamp);
+ BLEND_FLOAT(BaseMix);
+ BLEND_FLOAT(HighMix);
}
void ToneMappingSettings::BlendWith(ToneMappingSettings& other, float weight)
diff --git a/Source/Engine/Graphics/PostProcessSettings.h b/Source/Engine/Graphics/PostProcessSettings.h
index dc3895954..f3aa5c4a8 100644
--- a/Source/Engine/Graphics/PostProcessSettings.h
+++ b/Source/Engine/Graphics/PostProcessSettings.h
@@ -415,19 +415,29 @@ API_ENUM(Attributes="Flags") enum class BloomSettingsOverride : int32
Threshold = 1 << 2,
///
- /// Overrides property.
+ /// Overrides property.
///
- BlurSigma = 1 << 3,
+ ThresholdKnee = 1 << 3,
///
- /// Overrides property.
+ /// Overrides property.
///
- Limit = 1 << 4,
+ Clamp = 1 << 4,
+
+ ///
+ /// Overrides property.
+ ///
+ BaseMix = 1 << 5,
+
+ ///
+ /// Overrides property.
+ ///
+ HighMix = 1 << 6,
///
/// All properties.
///
- All = Enabled | Intensity | Threshold | BlurSigma | Limit,
+ All = Enabled | Intensity | Threshold | ThresholdKnee | Clamp | BaseMix | HighMix,
};
///
@@ -452,28 +462,40 @@ API_STRUCT() struct FLAXENGINE_API BloomSettings : ISerializable
bool Enabled = true;
///
- /// Bloom effect strength. Set a value of 0 to disabled it, while higher values increase the effect.
+ /// Overall bloom effect strength. Higher values create a stronger glow effect.
///
- API_FIELD(Attributes="Limit(0, 20.0f, 0.01f), EditorOrder(1), PostProcessSetting((int)BloomSettingsOverride.Intensity)")
+ API_FIELD(Attributes="Limit(0, 100.0f, 0.001f), EditorOrder(1), PostProcessSetting((int)BloomSettingsOverride.Intensity)")
float Intensity = 1.0f;
///
- /// Minimum pixel brightness value to start blooming. Values below this threshold are skipped.
+ /// Luminance threshold where bloom begins.
///
- API_FIELD(Attributes="Limit(0, 15.0f, 0.01f), EditorOrder(2), PostProcessSetting((int)BloomSettingsOverride.Threshold)")
- float Threshold = 3.0f;
+ API_FIELD(Attributes="Limit(0, 100.0f, 0.1f), EditorOrder(2), PostProcessSetting((int)BloomSettingsOverride.Threshold)")
+ float Threshold = 1.0f;
///
- /// This affects the fall-off of the bloom. It's the standard deviation (sigma) used in the Gaussian blur formula when calculating the kernel of the bloom.
+ /// Controls the threshold rolloff curve. Higher values create a softer transition.
///
- API_FIELD(Attributes="Limit(0, 20.0f, 0.01f), EditorOrder(3), PostProcessSetting((int)BloomSettingsOverride.BlurSigma)")
- float BlurSigma = 4.0f;
+ API_FIELD(Attributes="Limit(0, 100.0f, 0.01f), EditorOrder(3), PostProcessSetting((int)BloomSettingsOverride.ThresholdKnee)")
+ float ThresholdKnee = 0.5f;
///
- /// Bloom effect brightness limit. Pixels with higher luminance will be capped to this brightness level.
+ /// Maximum brightness limit for bloom highlights.
///
- API_FIELD(Attributes="Limit(0, 100.0f, 0.01f), EditorOrder(4), PostProcessSetting((int)BloomSettingsOverride.Limit)")
- float Limit = 10.0f;
+ API_FIELD(Attributes="Limit(0, 100.0f, 0.1f), EditorOrder(4), PostProcessSetting((int)BloomSettingsOverride.Clamp)")
+ float Clamp = 3.0f;
+
+ ///
+ /// Base mip contribution for wider, softer bloom.
+ ///
+ API_FIELD(Attributes="Limit(0, 1.0f, 0.01f), EditorOrder(5), PostProcessSetting((int)BloomSettingsOverride.BaseMix)")
+ float BaseMix = 0.6f;
+
+ ///
+ /// High mip contribution for tighter, core bloom.
+ ///
+ API_FIELD(Attributes="Limit(0, 1.0f, 0.01f), EditorOrder(6), PostProcessSetting((int)BloomSettingsOverride.HighMix)")
+ float HighMix = 1.0f;
public:
///
@@ -487,7 +509,7 @@ public:
///
/// The structure members override flags.
///
-API_ENUM(Attributes="Flags") enum class ToneMappingSettingsOverride : int32
+API_ENUM(Attributes ="Flags") enum class ToneMappingSettingsOverride : int32
{
///
/// None properties.
@@ -1937,25 +1959,25 @@ API_STRUCT() struct FLAXENGINE_API AntiAliasingSettings : ISerializable
///
/// The sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
///
- API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(10), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_SharpeningAmount), EditorDisplay(null, \"CAS Sharpening Amount\"), VisibleIf(nameof(ShowTAASettings), true)")
+ API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(10), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_SharpeningAmount), EditorDisplay(null, \"CAS Sharpening Amount\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_SharpeningAmount = 0.0f;
///
/// The edge sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
///
- API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(11), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_EdgeSharpening), EditorDisplay(null, \"CAS Edge Sharpening\"), VisibleIf(nameof(ShowTAASettings), true)")
+ API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(11), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_EdgeSharpening), EditorDisplay(null, \"CAS Edge Sharpening\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_EdgeSharpening = 0.5f;
///
/// The minimum edge threshold for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
///
- API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(12), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_MinEdgeThreshold), EditorDisplay(null, \"CAS Min Edge Threshold\"), VisibleIf(nameof(ShowTAASettings), true)")
+ API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(12), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_MinEdgeThreshold), EditorDisplay(null, \"CAS Min Edge Threshold\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_MinEdgeThreshold = 0.03f;
///
/// The over-blur limit for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
///
- API_FIELD(Attributes = "Limit(0, 100f, 0.001f), EditorOrder(13), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_OverBlurLimit), EditorDisplay(null, \"CAS Over-blur Limit\"), VisibleIf(nameof(ShowTAASettings), true)")
+ API_FIELD(Attributes="Limit(0, 100f, 0.001f), EditorOrder(13), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_OverBlurLimit), EditorDisplay(null, \"CAS Over-blur Limit\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_OverBlurLimit = 1.0f;
public:
diff --git a/Source/Engine/Renderer/PostProcessingPass.cpp b/Source/Engine/Renderer/PostProcessingPass.cpp
index 99831b430..3122e847d 100644
--- a/Source/Engine/Renderer/PostProcessingPass.cpp
+++ b/Source/Engine/Renderer/PostProcessingPass.cpp
@@ -9,18 +9,61 @@
#include "Engine/Graphics/RenderTargetPool.h"
#include "Engine/Engine/Time.h"
-PostProcessingPass::PostProcessingPass()
- : _shader(nullptr)
- , _psThreshold(nullptr)
- , _psScale(nullptr)
- , _psBlurH(nullptr)
- , _psBlurV(nullptr)
- , _psGenGhosts(nullptr)
- , _defaultLensColor(nullptr)
- , _defaultLensStar(nullptr)
- , _defaultLensDirt(nullptr)
-{
-}
+#define GB_RADIUS 6
+#define GB_KERNEL_SIZE (GB_RADIUS * 2 + 1)
+
+GPU_CB_STRUCT(Data{
+ float BloomIntensity; // Overall bloom strength multiplier
+ float BloomClamp; // Maximum brightness limit for bloom
+ float BloomThreshold; // Luminance threshold where bloom begins
+ float BloomThresholdKnee; // Controls the threshold rolloff curve
+
+ float BloomBaseMix; // Base mip contribution
+ float BloomHighMix; // High mip contribution
+ float BloomMipCount;
+ float BloomLayer;
+
+ Float3 VignetteColor;
+ float VignetteShapeFactor;
+
+ Float2 InputSize;
+ float InputAspect;
+ float GrainAmount;
+
+ float GrainTime;
+ float GrainParticleSize;
+ int32 Ghosts;
+ float HaloWidth;
+
+ float HaloIntensity;
+ float Distortion;
+ float GhostDispersal;
+ float LensFlareIntensity;
+
+ Float2 LensInputDistortion;
+ float LensScale;
+ float LensBias;
+
+ Float2 InvInputSize;
+ float ChromaticDistortion;
+ float Time;
+
+ float Dummy1;
+ float PostExposure;
+ float VignetteIntensity;
+ float LensDirtIntensity;
+
+ Color ScreenFadeColor;
+
+ Matrix LensFlareStarMat;
+ });
+
+GPU_CB_STRUCT(GaussianBlurData{
+ Float2 Size;
+ float Dummy3;
+ float Dummy4;
+ Float4 GaussianBlurCache[GB_KERNEL_SIZE]; // x-weight, y-offset
+ });
String PostProcessingPass::ToString() const
{
@@ -30,8 +73,9 @@ String PostProcessingPass::ToString() const
bool PostProcessingPass::Init()
{
// Create pipeline states
- _psThreshold = GPUDevice::Instance->CreatePipelineState();
- _psScale = GPUDevice::Instance->CreatePipelineState();
+ _psBloomBrightPass = GPUDevice::Instance->CreatePipelineState();
+ _psBloomDownsample = GPUDevice::Instance->CreatePipelineState();
+ _psBloomDualFilterUpsample = GPUDevice::Instance->CreatePipelineState();
_psBlurH = GPUDevice::Instance->CreatePipelineState();
_psBlurV = GPUDevice::Instance->CreatePipelineState();
_psGenGhosts = GPUDevice::Instance->CreatePipelineState();
@@ -69,16 +113,22 @@ bool PostProcessingPass::setupResources()
// Create pipeline stages
GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle;
- if (!_psThreshold->IsValid())
+ if (!_psBloomBrightPass->IsValid())
{
- psDesc.PS = shader->GetPS("PS_Threshold");
- if (_psThreshold->Init(psDesc))
+ psDesc.PS = shader->GetPS("PS_BloomBrightPass");
+ if (_psBloomBrightPass->Init(psDesc))
return true;
}
- if (!_psScale->IsValid())
+ if (!_psBloomDownsample->IsValid())
{
- psDesc.PS = shader->GetPS("PS_Scale");
- if (_psScale->Init(psDesc))
+ psDesc.PS = shader->GetPS("PS_BloomDownsample");
+ if (_psBloomDownsample->Init(psDesc))
+ return true;
+ }
+ if (!_psBloomDualFilterUpsample->IsValid())
+ {
+ psDesc.PS = shader->GetPS("PS_BloomDualFilterUpsample");
+ if (_psBloomDualFilterUpsample->Init(psDesc))
return true;
}
if (!_psBlurH->IsValid())
@@ -108,7 +158,7 @@ bool PostProcessingPass::setupResources()
return false;
}
-GPUTexture* PostProcessingPass::getCustomOrDefault(Texture* customTexture, AssetReference& defaultTexture, const Char* defaultName)
+GPUTexture* GetCustomOrDefault(Texture* customTexture, AssetReference& defaultTexture, const Char* defaultName)
{
// Check if use custom texture
if (customTexture)
@@ -125,7 +175,15 @@ GPUTexture* PostProcessingPass::getCustomOrDefault(Texture* customTexture, Asset
return defaultTexture ? defaultTexture->GetTexture() : nullptr;
}
-void PostProcessingPass::GB_ComputeKernel(float sigma, float width, float height)
+///
+/// Calculates the Gaussian blur filter kernel. This implementation is
+/// ported from the original Java code appearing in chapter 16 of
+/// "Filthy Rich Clients: Developing Animated and Graphical Effects for Desktop Java".
+///
+/// Gaussian Blur sigma parameter
+/// Texture to blur width in pixels
+/// Texture to blur height in pixels
+void GB_ComputeKernel(float sigma, float width, float height, Float4 gaussianBlurCacheH[GB_KERNEL_SIZE], Float4 gaussianBlurCacheV[GB_KERNEL_SIZE])
{
float total = 0.0f;
float twoSigmaSquare = 2.0f * sigma * sigma;
@@ -146,19 +204,16 @@ void PostProcessingPass::GB_ComputeKernel(float sigma, float width, float height
// Calculate total weights sum
total += weight;
- GaussianBlurCacheH[index] = Float4(weight, i * xOffset, 0, 0);
- GaussianBlurCacheV[index] = Float4(weight, i * yOffset, 0, 0);
+ gaussianBlurCacheH[index] = Float4(weight, i * xOffset, 0, 0);
+ gaussianBlurCacheV[index] = Float4(weight, i * yOffset, 0, 0);
}
// Normalize weights
for (int32 i = 0; i < GB_KERNEL_SIZE; i++)
{
- GaussianBlurCacheH[i].X /= total;
- GaussianBlurCacheV[i].X /= total;
+ gaussianBlurCacheH[i].X /= total;
+ gaussianBlurCacheV[i].X /= total;
}
-
- // Assign size
- _gbData.Size = Float2(width, height);
}
void PostProcessingPass::Dispose()
@@ -167,8 +222,9 @@ void PostProcessingPass::Dispose()
RendererPass::Dispose();
// Cleanup
- SAFE_DELETE_GPU_RESOURCE(_psThreshold);
- SAFE_DELETE_GPU_RESOURCE(_psScale);
+ SAFE_DELETE_GPU_RESOURCE(_psBloomBrightPass);
+ SAFE_DELETE_GPU_RESOURCE(_psBloomDownsample);
+ SAFE_DELETE_GPU_RESOURCE(_psBloomDualFilterUpsample);
SAFE_DELETE_GPU_RESOURCE(_psBlurH);
SAFE_DELETE_GPU_RESOURCE(_psBlurV);
SAFE_DELETE_GPU_RESOURCE(_psGenGhosts);
@@ -179,13 +235,30 @@ void PostProcessingPass::Dispose()
_defaultLensStar = nullptr;
}
+int32 CalculateBloomMipCount(int32 width, int32 height)
+{
+ // Calculate the smallest dimension
+ int32 minDimension = Math::Min(width, height);
+
+ // Calculate how many times we can half the dimension until we hit a minimum size
+ // (e.g., 16x16 pixels as the smallest mip)
+ const int32 MIN_MIP_SIZE = 16;
+ int32 mipCount = 1;
+ while (minDimension > MIN_MIP_SIZE)
+ {
+ minDimension /= 2;
+ mipCount++;
+ }
+ return mipCount;
+}
+
void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, GPUTexture* output, GPUTexture* colorGradingLUT)
{
PROFILE_GPU_CPU("Post Processing");
auto device = GPUDevice::Instance;
auto context = device->GetMainContext();
auto& view = renderContext.View;
-
+
context->ResetRenderTarget();
PostProcessSettings& settings = renderContext.List->Settings;
@@ -204,8 +277,10 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
int32 h4 = h2 >> 1;
int32 h8 = h4 >> 1;
+ int32 bloomMipCount = CalculateBloomMipCount(w1, h1);
+
// Ensure to have valid data and if at least one effect should be applied
- if (!(useBloom || useToneMapping || useCameraArtifacts) || checkIfSkipPass() || w8 == 0 || h8 ==0)
+ if (!(useBloom || useToneMapping || useCameraArtifacts) || checkIfSkipPass() || w8 == 0 || h8 == 0)
{
// Resources are missing. Do not perform rendering. Just copy raw frame
context->SetViewportAndScissors((float)output->Width(), (float)output->Height());
@@ -245,14 +320,18 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
}
if (useBloom)
{
- data.BloomMagnitude = settings.Bloom.Intensity;
+ data.BloomIntensity = settings.Bloom.Intensity;
+ data.BloomClamp = settings.Bloom.Clamp;
data.BloomThreshold = settings.Bloom.Threshold;
- data.BloomBlurSigma = Math::Max(settings.Bloom.BlurSigma, 0.0001f);
- data.BloomLimit = settings.Bloom.Limit;
+ data.BloomThresholdKnee = settings.Bloom.ThresholdKnee;
+ data.BloomBaseMix = settings.Bloom.BaseMix;
+ data.BloomHighMix = settings.Bloom.HighMix;
+ data.BloomMipCount = (float)bloomMipCount;
+ data.BloomLayer = 0.0f;
}
else
{
- data.BloomMagnitude = 0;
+ data.BloomIntensity = 0;
}
if (useLensFlares)
{
@@ -298,94 +377,71 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
////////////////////////////////////////////////////////////////////////////////////
// Bloom
- auto tempDesc = GPUTextureDescription::New2D(w2, h2, 0, output->Format(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews);
- auto bloomTmp1 = RenderTargetPool::Get(tempDesc);
- RENDER_TARGET_POOL_SET_NAME(bloomTmp1, "PostProcessing.Bloom");
- // TODO: bloomTmp2 could be quarter res because we don't use it's first mip
- auto bloomTmp2 = RenderTargetPool::Get(tempDesc);
- RENDER_TARGET_POOL_SET_NAME(bloomTmp2, "PostProcessing.Bloom");
+ auto tempDesc = GPUTextureDescription::New2D(w2, h2, bloomMipCount, output->Format(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews);
+ auto bloomBuffer1 = RenderTargetPool::Get(tempDesc);
+ RENDER_TARGET_POOL_SET_NAME(bloomBuffer1, "PostProcessing.Bloom");
+ auto bloomBuffer2 = RenderTargetPool::Get(tempDesc);
+ RENDER_TARGET_POOL_SET_NAME(bloomBuffer2, "PostProcessing.Bloom");
+
+ for (int32 mip = 0; mip < bloomMipCount; mip++)
+ {
+ context->Clear(bloomBuffer1->View(0, mip), Color::Transparent);
+ context->Clear(bloomBuffer2->View(0, mip), Color::Transparent);
+ }
- // Check if use bloom
if (useBloom)
{
- // Bloom Threshold and downscale to 1/2
- context->SetRenderTarget(bloomTmp1->View(0, 0));
+ context->SetRenderTarget(bloomBuffer1->View(0, 0));
context->SetViewportAndScissors((float)w2, (float)h2);
context->BindSR(0, input->View());
- context->SetState(_psThreshold);
+ context->SetState(_psBloomBrightPass);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
- // Downscale to 1/4
- context->SetRenderTarget(bloomTmp1->View(0, 1));
- context->SetViewportAndScissors((float)w4, (float)h4);
- context->BindSR(0, bloomTmp1->View(0, 0));
- context->SetState(_psScale);
- context->DrawFullscreenTriangle();
- context->ResetRenderTarget();
-
- // Downscale to 1/8
- context->SetRenderTarget(bloomTmp1->View(0, 2));
- context->SetViewportAndScissors((float)w8, (float)h8);
- context->BindSR(0, bloomTmp1->View(0, 1));
- context->SetState(_psScale);
- context->DrawFullscreenTriangle();
- context->ResetRenderTarget();
-
- // TODO: perform blur when downscaling (13 tap) and when upscaling? (9 tap)
-
- // Gaussian Blur
- GB_ComputeKernel(data.BloomBlurSigma, static_cast(w8), static_cast(h8));
- //int32 blurStages = (int)Rendering.Quality + 1;
- int32 blurStages = 2;
- for (int32 i = 0; i < blurStages; i++)
+ // Progressive downsamples
+ for (int32 mip = 1; mip < bloomMipCount; mip++)
{
- // Horizontal Bloom Blur
- Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH));
- context->UpdateCB(cb1, &_gbData);
- context->BindCB(1, cb1);
- //
- context->SetRenderTarget(bloomTmp2->View(0, 2));
- context->BindSR(0, bloomTmp1->View(0, 2));
- context->SetState(_psBlurH);
- context->DrawFullscreenTriangle();
- context->ResetRenderTarget();
+ const int32 mipWidth = w2 >> mip;
+ const int32 mipHeight = h2 >> mip;
- // Vertical Bloom Blur
- Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV));
- context->UpdateCB(cb1, &_gbData);
- context->BindCB(1, cb1);
- //
- context->SetRenderTarget(bloomTmp1->View(0, 2));
- context->BindSR(0, bloomTmp2->View(0, 2));
- context->SetState(_psBlurV);
+ context->SetRenderTarget(bloomBuffer1->View(0, mip));
+ context->SetViewportAndScissors((float)mipWidth, (float)mipHeight);
+ context->BindSR(0, bloomBuffer1->View(0, mip - 1));
+ context->SetState(_psBloomDownsample);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
}
- // Upscale to 1/4 (use second tmp target to cache that downscale thress data for lens flares)
- context->SetRenderTarget(bloomTmp2->View(0, 1));
- context->SetViewportAndScissors((float)w4, (float)h4);
- context->BindSR(0, bloomTmp1->View(0, 2));
- context->SetState(_psScale);
- context->DrawFullscreenTriangle();
- context->ResetRenderTarget();
+ // Progressive upsamples
+ for (int32 mip = bloomMipCount - 2; mip >= 0; mip--)
+ {
+ auto upscaleBuffer = bloomBuffer2;
+ if (mip == bloomMipCount - 2)
+ {
+ // If it's the first, copy the chain over
+ upscaleBuffer = bloomBuffer1;
+ }
+ const int32 mipWidth = w2 >> mip;
+ const int32 mipHeight = h2 >> mip;
- // Upscale to 1/2
- context->SetRenderTarget(bloomTmp1->View(0, 0));
- context->SetViewportAndScissors((float)w2, (float)h2);
- context->BindSR(0, bloomTmp2->View(0, 1));
- context->SetState(_psScale);
- context->DrawFullscreenTriangle();
- context->ResetRenderTarget();
+ data.BloomLayer = static_cast(mip);
+ context->UpdateCB(cb0, &data);
+ context->SetRenderTarget(bloomBuffer2->View(0, mip));
+ context->SetViewportAndScissors((float)mipWidth, (float)mipHeight);
+ context->BindSR(0, upscaleBuffer->View(0, mip + 1));
+ context->BindSR(1, bloomBuffer1->View(0, mip + 1));
+ context->SetState(_psBloomDualFilterUpsample);
+ context->DrawFullscreenTriangle();
+ context->ResetRenderTarget();
+ }
- // Set bloom
+ // Set bloom output
context->UnBindSR(0);
- context->BindSR(2, bloomTmp1->View(0, 0));
+ context->UnBindSR(1);
+ context->BindSR(2, bloomBuffer2->View(0, 0));
}
else
{
- // No bloom texture
context->UnBindSR(2);
}
@@ -396,43 +452,47 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
if (useLensFlares)
{
// Prepare lens flares helper textures
- context->BindSR(5, getCustomOrDefault(settings.LensFlares.LensStar, _defaultLensStar, TEXT("Engine/Textures/DefaultLensStarburst")));
- context->BindSR(6, getCustomOrDefault(settings.LensFlares.LensColor, _defaultLensColor, TEXT("Engine/Textures/DefaultLensColor")));
+ context->BindSR(5, GetCustomOrDefault(settings.LensFlares.LensStar, _defaultLensStar, TEXT("Engine/Textures/DefaultLensStarburst")));
+ context->BindSR(6, GetCustomOrDefault(settings.LensFlares.LensColor, _defaultLensColor, TEXT("Engine/Textures/DefaultLensColor")));
// Render lens flares
- context->SetRenderTarget(bloomTmp2->View(0, 1));
+ context->SetRenderTarget(bloomBuffer2->View(0, 1));
context->SetViewportAndScissors((float)w4, (float)h4);
- context->BindSR(3, bloomTmp1->View(0, 1));
+ context->BindSR(3, bloomBuffer1->View(0, 1)); // Use mip 1 of bloomBuffer1 as source
context->SetState(_psGenGhosts);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
context->UnBindSR(3);
// Gaussian blur kernel
- GB_ComputeKernel(2.0f, static_cast(w4), static_cast(h4));
+ GaussianBlurData gbData;
+ Float4 GaussianBlurCacheH[GB_KERNEL_SIZE];
+ Float4 GaussianBlurCacheV[GB_KERNEL_SIZE];
+ gbData.Size = Float2(static_cast(w4), static_cast(h4));
+ GB_ComputeKernel(2.0f, gbData.Size.X, gbData.Size.Y, GaussianBlurCacheH, GaussianBlurCacheV);
// Gaussian blur H
- Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH));
- context->UpdateCB(cb1, &_gbData);
+ Platform::MemoryCopy(gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH));
+ context->UpdateCB(cb1, &gbData);
context->BindCB(1, cb1);
- context->SetRenderTarget(bloomTmp1->View(0, 1));
- context->BindSR(0, bloomTmp2->View(0, 1));
+ context->SetRenderTarget(bloomBuffer1->View(0, 1));
+ context->BindSR(0, bloomBuffer2->View(0, 1));
context->SetState(_psBlurH);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// Gaussian blur V
- Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV));
- context->UpdateCB(cb1, &_gbData);
+ Platform::MemoryCopy(gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV));
+ context->UpdateCB(cb1, &gbData);
context->BindCB(1, cb1);
- context->SetRenderTarget(bloomTmp2->View(0, 1));
- context->BindSR(0, bloomTmp1->View(0, 1));
+ context->SetRenderTarget(bloomBuffer2->View(0, 1));
+ context->BindSR(0, bloomBuffer1->View(0, 1));
context->SetState(_psBlurV);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// Set lens flares output
- context->BindSR(3, bloomTmp2->View(0, 1));
+ context->BindSR(3, bloomBuffer2->View(0, 1));
}
else
{
@@ -472,7 +532,7 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
// - 5 - LensStar - lens star texture
// - 7 - ColorGradingLUT
context->BindSR(0, input->View());
- context->BindSR(4, getCustomOrDefault(settings.LensFlares.LensDirt, _defaultLensDirt, TEXT("Engine/Textures/DefaultLensDirt")));
+ context->BindSR(4, GetCustomOrDefault(settings.LensFlares.LensDirt, _defaultLensDirt, TEXT("Engine/Textures/DefaultLensDirt")));
context->BindSR(7, colorGradingLutView);
// Composite final frame during single pass (done in full resolution)
@@ -482,6 +542,6 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
context->DrawFullscreenTriangle();
// Cleanup
- RenderTargetPool::Release(bloomTmp1);
- RenderTargetPool::Release(bloomTmp2);
+ RenderTargetPool::Release(bloomBuffer1);
+ RenderTargetPool::Release(bloomBuffer2);
}
diff --git a/Source/Engine/Renderer/PostProcessingPass.h b/Source/Engine/Renderer/PostProcessingPass.h
index 22ef5eafc..ed45be1ea 100644
--- a/Source/Engine/Renderer/PostProcessingPass.h
+++ b/Source/Engine/Renderer/PostProcessingPass.h
@@ -5,90 +5,25 @@
#include "RendererPass.h"
#include "Engine/Graphics/GPUPipelineStatePermutations.h"
-#define GB_RADIUS 6
-#define GB_KERNEL_SIZE (GB_RADIUS * 2 + 1)
-
///
-/// Post processing rendering service
+/// Post-processing rendering service.
///
class PostProcessingPass : public RendererPass
{
private:
-
- GPU_CB_STRUCT(Data {
- float BloomLimit;
- float BloomThreshold;
- float BloomMagnitude;
- float BloomBlurSigma;
-
- Float3 VignetteColor;
- float VignetteShapeFactor;
-
- Float2 InputSize;
- float InputAspect;
- float GrainAmount;
-
- float GrainTime;
- float GrainParticleSize;
- int32 Ghosts;
- float HaloWidth;
-
- float HaloIntensity;
- float Distortion;
- float GhostDispersal;
- float LensFlareIntensity;
-
- Float2 LensInputDistortion;
- float LensScale;
- float LensBias;
-
- Float2 InvInputSize;
- float ChromaticDistortion;
- float Time;
-
- float Dummy1;
- float PostExposure;
- float VignetteIntensity;
- float LensDirtIntensity;
-
- Color ScreenFadeColor;
-
- Matrix LensFlareStarMat;
- });
-
- GPU_CB_STRUCT(GaussianBlurData {
- Float2 Size;
- float Dummy3;
- float Dummy4;
- Float4 GaussianBlurCache[GB_KERNEL_SIZE]; // x-weight, y-offset
- });
-
- // Post Processing
AssetReference _shader;
- GPUPipelineState* _psThreshold;
- GPUPipelineState* _psScale;
- GPUPipelineState* _psBlurH;
- GPUPipelineState* _psBlurV;
- GPUPipelineState* _psGenGhosts;
+ GPUPipelineState* _psBloomBrightPass = nullptr;
+ GPUPipelineState* _psBloomDownsample = nullptr;
+ GPUPipelineState* _psBloomDualFilterUpsample = nullptr;
+ GPUPipelineState* _psBlurH = nullptr;
+ GPUPipelineState* _psBlurV = nullptr;
+ GPUPipelineState* _psGenGhosts = nullptr;
GPUPipelineStatePermutationsPs<3> _psComposite;
-
- GaussianBlurData _gbData;
- Float4 GaussianBlurCacheH[GB_KERNEL_SIZE];
- Float4 GaussianBlurCacheV[GB_KERNEL_SIZE];
-
AssetReference _defaultLensColor;
AssetReference _defaultLensStar;
AssetReference _defaultLensDirt;
public:
-
- ///
- /// Init
- ///
- PostProcessingPass();
-
-public:
-
///
/// Perform postFx rendering for the input task
///
@@ -99,24 +34,12 @@ public:
void Render(RenderContext& renderContext, GPUTexture* input, GPUTexture* output, GPUTexture* colorGradingLUT);
private:
-
- GPUTexture* getCustomOrDefault(Texture* customTexture, AssetReference& defaultTexture, const Char* defaultName);
-
- ///
- /// Calculates the Gaussian blur filter kernel. This implementation is
- /// ported from the original Java code appearing in chapter 16 of
- /// "Filthy Rich Clients: Developing Animated and Graphical Effects for Desktop Java".
- ///
- /// Gaussian Blur sigma parameter
- /// Texture to blur width in pixels
- /// Texture to blur height in pixels
- void GB_ComputeKernel(float sigma, float width, float height);
-
#if COMPILE_WITH_DEV_ENV
void OnShaderReloading(Asset* obj)
{
- _psThreshold->ReleaseGPU();
- _psScale->ReleaseGPU();
+ _psBloomBrightPass->ReleaseGPU();
+ _psBloomDownsample->ReleaseGPU();
+ _psBloomDualFilterUpsample->ReleaseGPU();
_psBlurH->ReleaseGPU();
_psBlurV->ReleaseGPU();
_psGenGhosts->ReleaseGPU();
@@ -126,14 +49,12 @@ private:
#endif
public:
-
// [RendererPass]
String ToString() const override;
bool Init() override;
void Dispose() override;
protected:
-
// [RendererPass]
bool setupResources() override;
};
diff --git a/Source/Shaders/PostProcessing.shader b/Source/Shaders/PostProcessing.shader
index c54337b2e..067b0efd3 100644
--- a/Source/Shaders/PostProcessing.shader
+++ b/Source/Shaders/PostProcessing.shader
@@ -36,10 +36,15 @@
META_CB_BEGIN(0, Data)
-float BloomLimit;
-float BloomThreshold;
-float BloomMagnitude;
-float BloomBlurSigma;
+float BloomIntensity;
+float BloomClamp;
+float BloomThreshold;
+float BloomThresholdKnee;
+
+float BloomBaseMix;
+float BloomHighMix;
+float BloomMipCount;
+float BloomLayer;
float3 VignetteColor;
float VignetteShapeFactor;
@@ -254,31 +259,230 @@ float2 coordRot(in float2 tc, in float angle)
// Uses a lower exposure to produce a value suitable for a bloom pass
META_PS(true, FEATURE_LEVEL_ES2)
-float4 PS_Threshold(Quad_VS2PS input) : SV_Target
+float4 PS_BloomBrightPass(Quad_VS2PS input) : SV_Target
{
- float4 color = Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0);
- return clamp(color - BloomThreshold, 0, BloomLimit);
+ // Get dimensions for precise texel calculations
+ uint width, height;
+ Input0.GetDimensions(width, height);
+ float2 texelSize = 1.0 / float2(width, height);
+ // Use fixed 13-tap sample pattern for initial bright pass
+ float3 color = 0;
+ float totalWeight = 0;
+
+ // Center sample with high weight for energy preservation
+ float3 center = Input0.Sample(SamplerLinearClamp, input.TexCoord).rgb;
+
+ // Apply Karis average to prevent bright pixels from dominating
+ float centerLuma = max(dot(center, float3(0.2126, 0.7152, 0.0722)), 0.0001);
+ center = center / (1.0 + centerLuma);
+
+ float centerWeight = 4.0;
+ color += center * centerWeight;
+ totalWeight += centerWeight;
+
+ // Inner ring - fixed offset at 1.0 texel distance
+ UNROLL
+ for (int i = 0; i < 4; i++)
+ {
+ float angle = i * (PI / 2.0);
+ float2 offset = float2(cos(angle), sin(angle)) * texelSize;
+ float3 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset).rgb;
+
+ // Apply Karis average
+ float sampleLuma = max(dot(sample, float3(0.2126, 0.7152, 0.0722)), 0.0001);
+ sample = sample / (1.0 + sampleLuma);
+
+ float weight = 2.0;
+ color += sample * weight;
+ totalWeight += weight;
+ }
+
+ // Outer ring - fixed offset at 1.4142 texel distance (diagonal)
+ UNROLL
+ for (int j = 0; j < 8; j++)
+ {
+ float angle = j * (PI / 4.0);
+ float2 offset = float2(cos(angle), sin(angle)) * texelSize * 1.4142;
+ float3 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset).rgb;
+
+ // Apply Karis average
+ float sampleLuma = max(dot(sample, float3(0.2126, 0.7152, 0.0722)), 0.0001);
+ sample = sample / (1.0 + sampleLuma);
+
+ float weight = 1.0;
+ color += sample * weight;
+ totalWeight += weight;
+ }
+ color /= totalWeight;
+
+ // Un-apply Karis average to maintain energy
+ float finalLuma = max(dot(color, float3(0.2126, 0.7152, 0.0722)), 0.0001);
+ color = color * (1.0 + finalLuma);
+
+ // Apply threshold with quadratic rolloff for smoother transition
+ float luminance = dot(color, float3(0.2126, 0.7152, 0.0722));
+ float threshold = max(BloomThreshold, 0.2);
+ float knee = threshold * BloomThresholdKnee;
+ float softMax = threshold + knee;
+
+ float contribution = 0;
+ if (luminance > threshold)
+ {
+ if (luminance < softMax)
+ {
+ // Quadratic softening between threshold and (threshold + knee)
+ float x = (luminance - threshold) / knee;
+ contribution = x * x * 0.5;
+ }
+ else
+ {
+ // Full contribution above softMax
+ contribution = luminance - threshold;
+ }
+ }
+
+ float testc = BloomClamp;
+ float3 clamped = (color * contribution);
+ clamped.r = min(clamped.r, testc);
+ clamped.g = min(clamped.g, testc);
+ clamped.b = min(clamped.b, testc);
+
+ // Store threshold result in alpha for downsample chain
+ return float4(clamped, luminance);
}
-// Uses hw bilinear filtering for upscaling or downscaling
META_PS(true, FEATURE_LEVEL_ES2)
-float4 PS_Scale(Quad_VS2PS input) : SV_Target
+float4 PS_BloomDownsample(Quad_VS2PS input) : SV_Target
{
- // TODO: we could use quality switch for bloom effect
+ uint width, height;
+ Input0.GetDimensions(width, height);
+ float2 texelSize = 1.0 / float2(width, height);
- return Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0);
- /*
- float3 color;
- // TODO: use gather for dx11 and dx12??
- color = Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0, 0)).rgb;
- color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0, 1)).rgb;
- color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0,-1)).rgb;
- color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2(-1, 0)).rgb;
- color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 1, 0)).rgb;
- color *= (1.0f / 5.0f);
+ // 9-tap tent filter with fixed weights
+ float3 color = 0;
+ float totalWeight = 0;
- return float4(color, 1);
- */
+ // Sample offsets (fixed)
+ const float2 offsets[9] =
+ {
+ float2( 0, 0), // Center
+ float2(-1, -1), // Corners
+ float2( 1, -1),
+ float2(-1, 1),
+ float2( 1, 1),
+ float2( 0, -1), // Cross
+ float2(-1, 0),
+ float2( 1, 0),
+ float2( 0, 1)
+ };
+
+ // Sample weights (fixed)
+ const float weights[9] =
+ {
+ 4.0, // Center
+ 1.0, // Corners
+ 1.0,
+ 1.0,
+ 1.0,
+ 2.0, // Cross
+ 2.0,
+ 2.0,
+ 2.0
+ };
+
+ UNROLL
+ for (int i = 0; i < 9; i++)
+ {
+ float2 offset = offsets[i] * texelSize * 2.0; // Fixed scale factor for stability
+ float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset);
+ color += sample.rgb * weights[i];
+ totalWeight += weights[i];
+ }
+
+ return float4(color / totalWeight, 1.0);
+}
+
+META_PS(true, FEATURE_LEVEL_ES2)
+float4 PS_BloomDualFilterUpsample(Quad_VS2PS input) : SV_Target
+{
+ float anisotropy = 1.0;
+ uint width, height;
+ Input0.GetDimensions(width, height);
+ float2 texelSize = 1.0 / float2(width, height);
+
+ // Maintain fixed scale through mip chain
+ float baseOffset = 1.0;
+ float offsetScale = (1.0) * baseOffset;
+ float3 color = 0;
+ float totalWeight = 0;
+
+ // Center
+ float4 center = Input0.Sample(SamplerLinearClamp, input.TexCoord);
+ float centerWeight = 4.0;
+ color += center.rgb * centerWeight;
+ totalWeight += centerWeight;
+
+ // Cross - fixed distance samples
+ float2 crossOffsets[4] = {
+ float2(offsetScale * anisotropy, 0),
+ float2(-offsetScale * anisotropy, 0),
+ float2(0, offsetScale),
+ float2(0, -offsetScale)
+ };
+
+ UNROLL
+ for (int i = 0; i < 4; i++)
+ {
+ float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + crossOffsets[i] * texelSize);
+ float weight = 2.0;
+ color += sample.rgb * weight;
+ totalWeight += weight;
+ }
+
+ // Corners - fixed distance samples
+ float2 cornerOffsets[4] =
+ {
+ float2(offsetScale * anisotropy, offsetScale),
+ float2(-offsetScale * anisotropy, offsetScale),
+ float2(offsetScale * anisotropy, -offsetScale),
+ float2(-offsetScale * anisotropy, -offsetScale)
+ };
+
+ UNROLL
+ for (int j = 0; j < 4; j++)
+ {
+ float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + cornerOffsets[j] * texelSize);
+ float weight = 1.0;
+ color += sample.rgb * weight;
+ totalWeight += weight;
+ }
+
+ color /= totalWeight;
+
+ uint width1, height1;
+ Input1.GetDimensions(width1, height1);
+
+ // Calculate mip fade factor (0 = smallest mip, 1 = largest mip)
+ float mipFade = BloomLayer / (BloomMipCount - 1);
+
+ // Muzz says:
+ // Lerp between your desired intensity values based on mip level
+ // setting both to 0.6 is a decent default, but playing with these numbers will let you dial in the blending between the lowest and highest mips.
+ // you can make some really ugly bloom if you go too far.
+ // note this does change the intensity of the bloom.
+ // This was my own invention
+
+ float mipIntensity = lerp(BloomBaseMix, BloomHighMix, mipFade);
+ color *= mipIntensity;
+
+ BRANCH
+ if (width1 > 0)
+ {
+ float3 previousMip = Input1.Sample(SamplerLinearClamp, input.TexCoord).rgb;
+ color += previousMip;
+ }
+
+ return float4(color, 1.0);
}
// Horizontal gaussian blur
@@ -286,13 +490,11 @@ META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_GaussainBlurH(Quad_VS2PS input) : SV_Target
{
float4 color = 0;
-
UNROLL
for (int i = 0; i < GB_KERNEL_SIZE; i++)
{
color += Input0.Sample(SamplerLinearClamp, input.TexCoord + float2(GaussianBlurCache[i].y, 0.0)) * GaussianBlurCache[i].x;
}
-
return color;
}
@@ -301,13 +503,11 @@ META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_GaussainBlurV(Quad_VS2PS input) : SV_Target
{
float4 color = 0;
-
UNROLL
for (int i = 0; i < GB_KERNEL_SIZE; i++)
{
color += Input0.Sample(SamplerLinearClamp, input.TexCoord + float2(0.0, GaussianBlurCache[i].y)) * GaussianBlurCache[i].x;
}
-
return color;
}
@@ -471,18 +671,16 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target
color.rgb += lensFlares;
}
- // Bloom
- BRANCH
- if (BloomMagnitude > 0)
- {
- // Sample the bloom
- float3 bloom = Input2.SampleLevel(SamplerLinearClamp, uv, 0).rgb;
- bloom = bloom * BloomMagnitude;
-
- // Accumulate final bloom lght
- lensLight += max(0, bloom * 3.0f + (- 1.0f * 3.0f));
- color.rgb += bloom;
- }
+ // Bloom
+ BRANCH
+ if (BloomIntensity > 0)
+ {
+ // Sample the final bloom result
+ float3 bloom = Input2.Sample(SamplerLinearClamp, input.TexCoord).rgb;
+ bloom = bloom * BloomIntensity;
+ lensLight += max(0, bloom * 3.0f + (-1.0f * 3.0f));
+ color.rgb += bloom;
+ }
// Lens Dirt
float3 lensDirt = LensDirt.SampleLevel(SamplerLinearClamp, uv, 0).rgb;