Merge branch 'Muzz-Advanced-Bloom' into 1.10

This commit is contained in:
Wojtek Figat
2025-03-08 20:29:57 +01:00
6 changed files with 477 additions and 274 deletions

BIN
Content/Shaders/PostProcessing.flax (Stored with Git LFS)

Binary file not shown.

View File

@@ -42,8 +42,10 @@ void BloomSettings::BlendWith(BloomSettings& other, float weight)
BLEND_BOOL(Enabled);
BLEND_FLOAT(Intensity);
BLEND_FLOAT(Threshold);
BLEND_FLOAT(BlurSigma);
BLEND_FLOAT(Limit);
BLEND_FLOAT(ThresholdKnee);
BLEND_FLOAT(Clamp);
BLEND_FLOAT(BaseMix);
BLEND_FLOAT(HighMix);
}
void ToneMappingSettings::BlendWith(ToneMappingSettings& other, float weight)

View File

@@ -415,19 +415,29 @@ API_ENUM(Attributes="Flags") enum class BloomSettingsOverride : int32
Threshold = 1 << 2,
/// <summary>
/// Overrides <see cref="BloomSettings.BlurSigma"/> property.
/// Overrides <see cref="BloomSettings.ThresholdKnee"/> property.
/// </summary>
BlurSigma = 1 << 3,
ThresholdKnee = 1 << 3,
/// <summary>
/// Overrides <see cref="BloomSettings.Limit"/> property.
/// Overrides <see cref="BloomSettings.Clamp"/> property.
/// </summary>
Limit = 1 << 4,
Clamp = 1 << 4,
/// <summary>
/// Overrides <see cref="BloomSettings.BaseMix"/> property.
/// </summary>
BaseMix = 1 << 5,
/// <summary>
/// Overrides <see cref="BloomSettings.HighMix"/> property.
/// </summary>
HighMix = 1 << 6,
/// <summary>
/// All properties.
/// </summary>
All = Enabled | Intensity | Threshold | BlurSigma | Limit,
All = Enabled | Intensity | Threshold | ThresholdKnee | Clamp | BaseMix | HighMix,
};
/// <summary>
@@ -452,28 +462,40 @@ API_STRUCT() struct FLAXENGINE_API BloomSettings : ISerializable
bool Enabled = true;
/// <summary>
/// Bloom effect strength. Set a value of 0 to disabled it, while higher values increase the effect.
/// Overall bloom effect strength. Higher values create a stronger glow effect.
/// </summary>
API_FIELD(Attributes="Limit(0, 20.0f, 0.01f), EditorOrder(1), PostProcessSetting((int)BloomSettingsOverride.Intensity)")
API_FIELD(Attributes="Limit(0, 100.0f, 0.001f), EditorOrder(1), PostProcessSetting((int)BloomSettingsOverride.Intensity)")
float Intensity = 1.0f;
/// <summary>
/// Minimum pixel brightness value to start blooming. Values below this threshold are skipped.
/// Luminance threshold where bloom begins.
/// </summary>
API_FIELD(Attributes="Limit(0, 15.0f, 0.01f), EditorOrder(2), PostProcessSetting((int)BloomSettingsOverride.Threshold)")
float Threshold = 3.0f;
API_FIELD(Attributes="Limit(0, 100.0f, 0.1f), EditorOrder(2), PostProcessSetting((int)BloomSettingsOverride.Threshold)")
float Threshold = 1.0f;
/// <summary>
/// This affects the fall-off of the bloom. It's the standard deviation (sigma) used in the Gaussian blur formula when calculating the kernel of the bloom.
/// Controls the threshold rolloff curve. Higher values create a softer transition.
/// </summary>
API_FIELD(Attributes="Limit(0, 20.0f, 0.01f), EditorOrder(3), PostProcessSetting((int)BloomSettingsOverride.BlurSigma)")
float BlurSigma = 4.0f;
API_FIELD(Attributes="Limit(0, 100.0f, 0.01f), EditorOrder(3), PostProcessSetting((int)BloomSettingsOverride.ThresholdKnee)")
float ThresholdKnee = 0.5f;
/// <summary>
/// Bloom effect brightness limit. Pixels with higher luminance will be capped to this brightness level.
/// Maximum brightness limit for bloom highlights.
/// </summary>
API_FIELD(Attributes="Limit(0, 100.0f, 0.01f), EditorOrder(4), PostProcessSetting((int)BloomSettingsOverride.Limit)")
float Limit = 10.0f;
API_FIELD(Attributes="Limit(0, 100.0f, 0.1f), EditorOrder(4), PostProcessSetting((int)BloomSettingsOverride.Clamp)")
float Clamp = 3.0f;
/// <summary>
/// Base mip contribution for wider, softer bloom.
/// </summary>
API_FIELD(Attributes="Limit(0, 1.0f, 0.01f), EditorOrder(5), PostProcessSetting((int)BloomSettingsOverride.BaseMix)")
float BaseMix = 0.6f;
/// <summary>
/// High mip contribution for tighter, core bloom.
/// </summary>
API_FIELD(Attributes="Limit(0, 1.0f, 0.01f), EditorOrder(6), PostProcessSetting((int)BloomSettingsOverride.HighMix)")
float HighMix = 1.0f;
public:
/// <summary>
@@ -487,7 +509,7 @@ public:
/// <summary>
/// The structure members override flags.
/// </summary>
API_ENUM(Attributes="Flags") enum class ToneMappingSettingsOverride : int32
API_ENUM(Attributes ="Flags") enum class ToneMappingSettingsOverride : int32
{
/// <summary>
/// None properties.
@@ -1937,25 +1959,25 @@ API_STRUCT() struct FLAXENGINE_API AntiAliasingSettings : ISerializable
/// <summary>
/// The sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
/// </summary>
API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(10), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_SharpeningAmount), EditorDisplay(null, \"CAS Sharpening Amount\"), VisibleIf(nameof(ShowTAASettings), true)")
API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(10), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_SharpeningAmount), EditorDisplay(null, \"CAS Sharpening Amount\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_SharpeningAmount = 0.0f;
/// <summary>
/// The edge sharpening strength for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
/// </summary>
API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(11), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_EdgeSharpening), EditorDisplay(null, \"CAS Edge Sharpening\"), VisibleIf(nameof(ShowTAASettings), true)")
API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(11), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_EdgeSharpening), EditorDisplay(null, \"CAS Edge Sharpening\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_EdgeSharpening = 0.5f;
/// <summary>
/// The minimum edge threshold for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
/// </summary>
API_FIELD(Attributes = "Limit(0, 10f, 0.001f), EditorOrder(12), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_MinEdgeThreshold), EditorDisplay(null, \"CAS Min Edge Threshold\"), VisibleIf(nameof(ShowTAASettings), true)")
API_FIELD(Attributes="Limit(0, 10f, 0.001f), EditorOrder(12), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_MinEdgeThreshold), EditorDisplay(null, \"CAS Min Edge Threshold\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_MinEdgeThreshold = 0.03f;
/// <summary>
/// The over-blur limit for the Contrast Adaptive Sharpening (CAS) pass. Ignored when using TAA that contains own contrast filter.
/// </summary>
API_FIELD(Attributes = "Limit(0, 100f, 0.001f), EditorOrder(13), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_OverBlurLimit), EditorDisplay(null, \"CAS Over-blur Limit\"), VisibleIf(nameof(ShowTAASettings), true)")
API_FIELD(Attributes="Limit(0, 100f, 0.001f), EditorOrder(13), PostProcessSetting((int)AntiAliasingSettingsOverride.CAS_OverBlurLimit), EditorDisplay(null, \"CAS Over-blur Limit\"), VisibleIf(nameof(ShowTAASettings), true)")
float CAS_OverBlurLimit = 1.0f;
public:

View File

@@ -9,18 +9,61 @@
#include "Engine/Graphics/RenderTargetPool.h"
#include "Engine/Engine/Time.h"
PostProcessingPass::PostProcessingPass()
: _shader(nullptr)
, _psThreshold(nullptr)
, _psScale(nullptr)
, _psBlurH(nullptr)
, _psBlurV(nullptr)
, _psGenGhosts(nullptr)
, _defaultLensColor(nullptr)
, _defaultLensStar(nullptr)
, _defaultLensDirt(nullptr)
{
}
#define GB_RADIUS 6
#define GB_KERNEL_SIZE (GB_RADIUS * 2 + 1)
GPU_CB_STRUCT(Data{
float BloomIntensity; // Overall bloom strength multiplier
float BloomClamp; // Maximum brightness limit for bloom
float BloomThreshold; // Luminance threshold where bloom begins
float BloomThresholdKnee; // Controls the threshold rolloff curve
float BloomBaseMix; // Base mip contribution
float BloomHighMix; // High mip contribution
float BloomMipCount;
float BloomLayer;
Float3 VignetteColor;
float VignetteShapeFactor;
Float2 InputSize;
float InputAspect;
float GrainAmount;
float GrainTime;
float GrainParticleSize;
int32 Ghosts;
float HaloWidth;
float HaloIntensity;
float Distortion;
float GhostDispersal;
float LensFlareIntensity;
Float2 LensInputDistortion;
float LensScale;
float LensBias;
Float2 InvInputSize;
float ChromaticDistortion;
float Time;
float Dummy1;
float PostExposure;
float VignetteIntensity;
float LensDirtIntensity;
Color ScreenFadeColor;
Matrix LensFlareStarMat;
});
GPU_CB_STRUCT(GaussianBlurData{
Float2 Size;
float Dummy3;
float Dummy4;
Float4 GaussianBlurCache[GB_KERNEL_SIZE]; // x-weight, y-offset
});
String PostProcessingPass::ToString() const
{
@@ -30,8 +73,9 @@ String PostProcessingPass::ToString() const
bool PostProcessingPass::Init()
{
// Create pipeline states
_psThreshold = GPUDevice::Instance->CreatePipelineState();
_psScale = GPUDevice::Instance->CreatePipelineState();
_psBloomBrightPass = GPUDevice::Instance->CreatePipelineState();
_psBloomDownsample = GPUDevice::Instance->CreatePipelineState();
_psBloomDualFilterUpsample = GPUDevice::Instance->CreatePipelineState();
_psBlurH = GPUDevice::Instance->CreatePipelineState();
_psBlurV = GPUDevice::Instance->CreatePipelineState();
_psGenGhosts = GPUDevice::Instance->CreatePipelineState();
@@ -69,16 +113,22 @@ bool PostProcessingPass::setupResources()
// Create pipeline stages
GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle;
if (!_psThreshold->IsValid())
if (!_psBloomBrightPass->IsValid())
{
psDesc.PS = shader->GetPS("PS_Threshold");
if (_psThreshold->Init(psDesc))
psDesc.PS = shader->GetPS("PS_BloomBrightPass");
if (_psBloomBrightPass->Init(psDesc))
return true;
}
if (!_psScale->IsValid())
if (!_psBloomDownsample->IsValid())
{
psDesc.PS = shader->GetPS("PS_Scale");
if (_psScale->Init(psDesc))
psDesc.PS = shader->GetPS("PS_BloomDownsample");
if (_psBloomDownsample->Init(psDesc))
return true;
}
if (!_psBloomDualFilterUpsample->IsValid())
{
psDesc.PS = shader->GetPS("PS_BloomDualFilterUpsample");
if (_psBloomDualFilterUpsample->Init(psDesc))
return true;
}
if (!_psBlurH->IsValid())
@@ -108,7 +158,7 @@ bool PostProcessingPass::setupResources()
return false;
}
GPUTexture* PostProcessingPass::getCustomOrDefault(Texture* customTexture, AssetReference<Texture>& defaultTexture, const Char* defaultName)
GPUTexture* GetCustomOrDefault(Texture* customTexture, AssetReference<Texture>& defaultTexture, const Char* defaultName)
{
// Check if use custom texture
if (customTexture)
@@ -125,7 +175,15 @@ GPUTexture* PostProcessingPass::getCustomOrDefault(Texture* customTexture, Asset
return defaultTexture ? defaultTexture->GetTexture() : nullptr;
}
void PostProcessingPass::GB_ComputeKernel(float sigma, float width, float height)
/// <summary>
/// Calculates the Gaussian blur filter kernel. This implementation is
/// ported from the original Java code appearing in chapter 16 of
/// "Filthy Rich Clients: Developing Animated and Graphical Effects for Desktop Java".
/// </summary>
/// <param name="sigma">Gaussian Blur sigma parameter</param>
/// <param name="width">Texture to blur width in pixels</param>
/// <param name="height">Texture to blur height in pixels</param>
void GB_ComputeKernel(float sigma, float width, float height, Float4 gaussianBlurCacheH[GB_KERNEL_SIZE], Float4 gaussianBlurCacheV[GB_KERNEL_SIZE])
{
float total = 0.0f;
float twoSigmaSquare = 2.0f * sigma * sigma;
@@ -146,19 +204,16 @@ void PostProcessingPass::GB_ComputeKernel(float sigma, float width, float height
// Calculate total weights sum
total += weight;
GaussianBlurCacheH[index] = Float4(weight, i * xOffset, 0, 0);
GaussianBlurCacheV[index] = Float4(weight, i * yOffset, 0, 0);
gaussianBlurCacheH[index] = Float4(weight, i * xOffset, 0, 0);
gaussianBlurCacheV[index] = Float4(weight, i * yOffset, 0, 0);
}
// Normalize weights
for (int32 i = 0; i < GB_KERNEL_SIZE; i++)
{
GaussianBlurCacheH[i].X /= total;
GaussianBlurCacheV[i].X /= total;
gaussianBlurCacheH[i].X /= total;
gaussianBlurCacheV[i].X /= total;
}
// Assign size
_gbData.Size = Float2(width, height);
}
void PostProcessingPass::Dispose()
@@ -167,8 +222,9 @@ void PostProcessingPass::Dispose()
RendererPass::Dispose();
// Cleanup
SAFE_DELETE_GPU_RESOURCE(_psThreshold);
SAFE_DELETE_GPU_RESOURCE(_psScale);
SAFE_DELETE_GPU_RESOURCE(_psBloomBrightPass);
SAFE_DELETE_GPU_RESOURCE(_psBloomDownsample);
SAFE_DELETE_GPU_RESOURCE(_psBloomDualFilterUpsample);
SAFE_DELETE_GPU_RESOURCE(_psBlurH);
SAFE_DELETE_GPU_RESOURCE(_psBlurV);
SAFE_DELETE_GPU_RESOURCE(_psGenGhosts);
@@ -179,13 +235,30 @@ void PostProcessingPass::Dispose()
_defaultLensStar = nullptr;
}
int32 CalculateBloomMipCount(int32 width, int32 height)
{
// Calculate the smallest dimension
int32 minDimension = Math::Min(width, height);
// Calculate how many times we can half the dimension until we hit a minimum size
// (e.g., 16x16 pixels as the smallest mip)
const int32 MIN_MIP_SIZE = 16;
int32 mipCount = 1;
while (minDimension > MIN_MIP_SIZE)
{
minDimension /= 2;
mipCount++;
}
return mipCount;
}
void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, GPUTexture* output, GPUTexture* colorGradingLUT)
{
PROFILE_GPU_CPU("Post Processing");
auto device = GPUDevice::Instance;
auto context = device->GetMainContext();
auto& view = renderContext.View;
context->ResetRenderTarget();
PostProcessSettings& settings = renderContext.List->Settings;
@@ -204,8 +277,10 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
int32 h4 = h2 >> 1;
int32 h8 = h4 >> 1;
int32 bloomMipCount = CalculateBloomMipCount(w1, h1);
// Ensure to have valid data and if at least one effect should be applied
if (!(useBloom || useToneMapping || useCameraArtifacts) || checkIfSkipPass() || w8 == 0 || h8 ==0)
if (!(useBloom || useToneMapping || useCameraArtifacts) || checkIfSkipPass() || w8 == 0 || h8 == 0)
{
// Resources are missing. Do not perform rendering. Just copy raw frame
context->SetViewportAndScissors((float)output->Width(), (float)output->Height());
@@ -245,14 +320,18 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
}
if (useBloom)
{
data.BloomMagnitude = settings.Bloom.Intensity;
data.BloomIntensity = settings.Bloom.Intensity;
data.BloomClamp = settings.Bloom.Clamp;
data.BloomThreshold = settings.Bloom.Threshold;
data.BloomBlurSigma = Math::Max(settings.Bloom.BlurSigma, 0.0001f);
data.BloomLimit = settings.Bloom.Limit;
data.BloomThresholdKnee = settings.Bloom.ThresholdKnee;
data.BloomBaseMix = settings.Bloom.BaseMix;
data.BloomHighMix = settings.Bloom.HighMix;
data.BloomMipCount = (float)bloomMipCount;
data.BloomLayer = 0.0f;
}
else
{
data.BloomMagnitude = 0;
data.BloomIntensity = 0;
}
if (useLensFlares)
{
@@ -298,94 +377,71 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
////////////////////////////////////////////////////////////////////////////////////
// Bloom
auto tempDesc = GPUTextureDescription::New2D(w2, h2, 0, output->Format(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews);
auto bloomTmp1 = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(bloomTmp1, "PostProcessing.Bloom");
// TODO: bloomTmp2 could be quarter res because we don't use it's first mip
auto bloomTmp2 = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(bloomTmp2, "PostProcessing.Bloom");
auto tempDesc = GPUTextureDescription::New2D(w2, h2, bloomMipCount, output->Format(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews);
auto bloomBuffer1 = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(bloomBuffer1, "PostProcessing.Bloom");
auto bloomBuffer2 = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(bloomBuffer2, "PostProcessing.Bloom");
for (int32 mip = 0; mip < bloomMipCount; mip++)
{
context->Clear(bloomBuffer1->View(0, mip), Color::Transparent);
context->Clear(bloomBuffer2->View(0, mip), Color::Transparent);
}
// Check if use bloom
if (useBloom)
{
// Bloom Threshold and downscale to 1/2
context->SetRenderTarget(bloomTmp1->View(0, 0));
context->SetRenderTarget(bloomBuffer1->View(0, 0));
context->SetViewportAndScissors((float)w2, (float)h2);
context->BindSR(0, input->View());
context->SetState(_psThreshold);
context->SetState(_psBloomBrightPass);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// Downscale to 1/4
context->SetRenderTarget(bloomTmp1->View(0, 1));
context->SetViewportAndScissors((float)w4, (float)h4);
context->BindSR(0, bloomTmp1->View(0, 0));
context->SetState(_psScale);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// Downscale to 1/8
context->SetRenderTarget(bloomTmp1->View(0, 2));
context->SetViewportAndScissors((float)w8, (float)h8);
context->BindSR(0, bloomTmp1->View(0, 1));
context->SetState(_psScale);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// TODO: perform blur when downscaling (13 tap) and when upscaling? (9 tap)
// Gaussian Blur
GB_ComputeKernel(data.BloomBlurSigma, static_cast<float>(w8), static_cast<float>(h8));
//int32 blurStages = (int)Rendering.Quality + 1;
int32 blurStages = 2;
for (int32 i = 0; i < blurStages; i++)
// Progressive downsamples
for (int32 mip = 1; mip < bloomMipCount; mip++)
{
// Horizontal Bloom Blur
Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH));
context->UpdateCB(cb1, &_gbData);
context->BindCB(1, cb1);
//
context->SetRenderTarget(bloomTmp2->View(0, 2));
context->BindSR(0, bloomTmp1->View(0, 2));
context->SetState(_psBlurH);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
const int32 mipWidth = w2 >> mip;
const int32 mipHeight = h2 >> mip;
// Vertical Bloom Blur
Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV));
context->UpdateCB(cb1, &_gbData);
context->BindCB(1, cb1);
//
context->SetRenderTarget(bloomTmp1->View(0, 2));
context->BindSR(0, bloomTmp2->View(0, 2));
context->SetState(_psBlurV);
context->SetRenderTarget(bloomBuffer1->View(0, mip));
context->SetViewportAndScissors((float)mipWidth, (float)mipHeight);
context->BindSR(0, bloomBuffer1->View(0, mip - 1));
context->SetState(_psBloomDownsample);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
}
// Upscale to 1/4 (use second tmp target to cache that downscale thress data for lens flares)
context->SetRenderTarget(bloomTmp2->View(0, 1));
context->SetViewportAndScissors((float)w4, (float)h4);
context->BindSR(0, bloomTmp1->View(0, 2));
context->SetState(_psScale);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// Progressive upsamples
for (int32 mip = bloomMipCount - 2; mip >= 0; mip--)
{
auto upscaleBuffer = bloomBuffer2;
if (mip == bloomMipCount - 2)
{
// If it's the first, copy the chain over
upscaleBuffer = bloomBuffer1;
}
const int32 mipWidth = w2 >> mip;
const int32 mipHeight = h2 >> mip;
// Upscale to 1/2
context->SetRenderTarget(bloomTmp1->View(0, 0));
context->SetViewportAndScissors((float)w2, (float)h2);
context->BindSR(0, bloomTmp2->View(0, 1));
context->SetState(_psScale);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
data.BloomLayer = static_cast<float>(mip);
context->UpdateCB(cb0, &data);
context->SetRenderTarget(bloomBuffer2->View(0, mip));
context->SetViewportAndScissors((float)mipWidth, (float)mipHeight);
context->BindSR(0, upscaleBuffer->View(0, mip + 1));
context->BindSR(1, bloomBuffer1->View(0, mip + 1));
context->SetState(_psBloomDualFilterUpsample);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
}
// Set bloom
// Set bloom output
context->UnBindSR(0);
context->BindSR(2, bloomTmp1->View(0, 0));
context->UnBindSR(1);
context->BindSR(2, bloomBuffer2->View(0, 0));
}
else
{
// No bloom texture
context->UnBindSR(2);
}
@@ -396,43 +452,47 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
if (useLensFlares)
{
// Prepare lens flares helper textures
context->BindSR(5, getCustomOrDefault(settings.LensFlares.LensStar, _defaultLensStar, TEXT("Engine/Textures/DefaultLensStarburst")));
context->BindSR(6, getCustomOrDefault(settings.LensFlares.LensColor, _defaultLensColor, TEXT("Engine/Textures/DefaultLensColor")));
context->BindSR(5, GetCustomOrDefault(settings.LensFlares.LensStar, _defaultLensStar, TEXT("Engine/Textures/DefaultLensStarburst")));
context->BindSR(6, GetCustomOrDefault(settings.LensFlares.LensColor, _defaultLensColor, TEXT("Engine/Textures/DefaultLensColor")));
// Render lens flares
context->SetRenderTarget(bloomTmp2->View(0, 1));
context->SetRenderTarget(bloomBuffer2->View(0, 1));
context->SetViewportAndScissors((float)w4, (float)h4);
context->BindSR(3, bloomTmp1->View(0, 1));
context->BindSR(3, bloomBuffer1->View(0, 1)); // Use mip 1 of bloomBuffer1 as source
context->SetState(_psGenGhosts);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
context->UnBindSR(3);
// Gaussian blur kernel
GB_ComputeKernel(2.0f, static_cast<float>(w4), static_cast<float>(h4));
GaussianBlurData gbData;
Float4 GaussianBlurCacheH[GB_KERNEL_SIZE];
Float4 GaussianBlurCacheV[GB_KERNEL_SIZE];
gbData.Size = Float2(static_cast<float>(w4), static_cast<float>(h4));
GB_ComputeKernel(2.0f, gbData.Size.X, gbData.Size.Y, GaussianBlurCacheH, GaussianBlurCacheV);
// Gaussian blur H
Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH));
context->UpdateCB(cb1, &_gbData);
Platform::MemoryCopy(gbData.GaussianBlurCache, GaussianBlurCacheH, sizeof(GaussianBlurCacheH));
context->UpdateCB(cb1, &gbData);
context->BindCB(1, cb1);
context->SetRenderTarget(bloomTmp1->View(0, 1));
context->BindSR(0, bloomTmp2->View(0, 1));
context->SetRenderTarget(bloomBuffer1->View(0, 1));
context->BindSR(0, bloomBuffer2->View(0, 1));
context->SetState(_psBlurH);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// Gaussian blur V
Platform::MemoryCopy(_gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV));
context->UpdateCB(cb1, &_gbData);
Platform::MemoryCopy(gbData.GaussianBlurCache, GaussianBlurCacheV, sizeof(GaussianBlurCacheV));
context->UpdateCB(cb1, &gbData);
context->BindCB(1, cb1);
context->SetRenderTarget(bloomTmp2->View(0, 1));
context->BindSR(0, bloomTmp1->View(0, 1));
context->SetRenderTarget(bloomBuffer2->View(0, 1));
context->BindSR(0, bloomBuffer1->View(0, 1));
context->SetState(_psBlurV);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// Set lens flares output
context->BindSR(3, bloomTmp2->View(0, 1));
context->BindSR(3, bloomBuffer2->View(0, 1));
}
else
{
@@ -472,7 +532,7 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
// - 5 - LensStar - lens star texture
// - 7 - ColorGradingLUT
context->BindSR(0, input->View());
context->BindSR(4, getCustomOrDefault(settings.LensFlares.LensDirt, _defaultLensDirt, TEXT("Engine/Textures/DefaultLensDirt")));
context->BindSR(4, GetCustomOrDefault(settings.LensFlares.LensDirt, _defaultLensDirt, TEXT("Engine/Textures/DefaultLensDirt")));
context->BindSR(7, colorGradingLutView);
// Composite final frame during single pass (done in full resolution)
@@ -482,6 +542,6 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
context->DrawFullscreenTriangle();
// Cleanup
RenderTargetPool::Release(bloomTmp1);
RenderTargetPool::Release(bloomTmp2);
RenderTargetPool::Release(bloomBuffer1);
RenderTargetPool::Release(bloomBuffer2);
}

View File

@@ -5,90 +5,25 @@
#include "RendererPass.h"
#include "Engine/Graphics/GPUPipelineStatePermutations.h"
#define GB_RADIUS 6
#define GB_KERNEL_SIZE (GB_RADIUS * 2 + 1)
/// <summary>
/// Post processing rendering service
/// Post-processing rendering service.
/// </summary>
class PostProcessingPass : public RendererPass<PostProcessingPass>
{
private:
GPU_CB_STRUCT(Data {
float BloomLimit;
float BloomThreshold;
float BloomMagnitude;
float BloomBlurSigma;
Float3 VignetteColor;
float VignetteShapeFactor;
Float2 InputSize;
float InputAspect;
float GrainAmount;
float GrainTime;
float GrainParticleSize;
int32 Ghosts;
float HaloWidth;
float HaloIntensity;
float Distortion;
float GhostDispersal;
float LensFlareIntensity;
Float2 LensInputDistortion;
float LensScale;
float LensBias;
Float2 InvInputSize;
float ChromaticDistortion;
float Time;
float Dummy1;
float PostExposure;
float VignetteIntensity;
float LensDirtIntensity;
Color ScreenFadeColor;
Matrix LensFlareStarMat;
});
GPU_CB_STRUCT(GaussianBlurData {
Float2 Size;
float Dummy3;
float Dummy4;
Float4 GaussianBlurCache[GB_KERNEL_SIZE]; // x-weight, y-offset
});
// Post Processing
AssetReference<Shader> _shader;
GPUPipelineState* _psThreshold;
GPUPipelineState* _psScale;
GPUPipelineState* _psBlurH;
GPUPipelineState* _psBlurV;
GPUPipelineState* _psGenGhosts;
GPUPipelineState* _psBloomBrightPass = nullptr;
GPUPipelineState* _psBloomDownsample = nullptr;
GPUPipelineState* _psBloomDualFilterUpsample = nullptr;
GPUPipelineState* _psBlurH = nullptr;
GPUPipelineState* _psBlurV = nullptr;
GPUPipelineState* _psGenGhosts = nullptr;
GPUPipelineStatePermutationsPs<3> _psComposite;
GaussianBlurData _gbData;
Float4 GaussianBlurCacheH[GB_KERNEL_SIZE];
Float4 GaussianBlurCacheV[GB_KERNEL_SIZE];
AssetReference<Texture> _defaultLensColor;
AssetReference<Texture> _defaultLensStar;
AssetReference<Texture> _defaultLensDirt;
public:
/// <summary>
/// Init
/// </summary>
PostProcessingPass();
public:
/// <summary>
/// Perform postFx rendering for the input task
/// </summary>
@@ -99,24 +34,12 @@ public:
void Render(RenderContext& renderContext, GPUTexture* input, GPUTexture* output, GPUTexture* colorGradingLUT);
private:
GPUTexture* getCustomOrDefault(Texture* customTexture, AssetReference<Texture>& defaultTexture, const Char* defaultName);
/// <summary>
/// Calculates the Gaussian blur filter kernel. This implementation is
/// ported from the original Java code appearing in chapter 16 of
/// "Filthy Rich Clients: Developing Animated and Graphical Effects for Desktop Java".
/// </summary>
/// <param name="sigma">Gaussian Blur sigma parameter</param>
/// <param name="width">Texture to blur width in pixels</param>
/// <param name="height">Texture to blur height in pixels</param>
void GB_ComputeKernel(float sigma, float width, float height);
#if COMPILE_WITH_DEV_ENV
void OnShaderReloading(Asset* obj)
{
_psThreshold->ReleaseGPU();
_psScale->ReleaseGPU();
_psBloomBrightPass->ReleaseGPU();
_psBloomDownsample->ReleaseGPU();
_psBloomDualFilterUpsample->ReleaseGPU();
_psBlurH->ReleaseGPU();
_psBlurV->ReleaseGPU();
_psGenGhosts->ReleaseGPU();
@@ -126,14 +49,12 @@ private:
#endif
public:
// [RendererPass]
String ToString() const override;
bool Init() override;
void Dispose() override;
protected:
// [RendererPass]
bool setupResources() override;
};

View File

@@ -36,10 +36,15 @@
META_CB_BEGIN(0, Data)
float BloomLimit;
float BloomThreshold;
float BloomMagnitude;
float BloomBlurSigma;
float BloomIntensity;
float BloomClamp;
float BloomThreshold;
float BloomThresholdKnee;
float BloomBaseMix;
float BloomHighMix;
float BloomMipCount;
float BloomLayer;
float3 VignetteColor;
float VignetteShapeFactor;
@@ -254,31 +259,230 @@ float2 coordRot(in float2 tc, in float angle)
// Uses a lower exposure to produce a value suitable for a bloom pass
META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_Threshold(Quad_VS2PS input) : SV_Target
float4 PS_BloomBrightPass(Quad_VS2PS input) : SV_Target
{
float4 color = Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0);
return clamp(color - BloomThreshold, 0, BloomLimit);
// Get dimensions for precise texel calculations
uint width, height;
Input0.GetDimensions(width, height);
float2 texelSize = 1.0 / float2(width, height);
// Use fixed 13-tap sample pattern for initial bright pass
float3 color = 0;
float totalWeight = 0;
// Center sample with high weight for energy preservation
float3 center = Input0.Sample(SamplerLinearClamp, input.TexCoord).rgb;
// Apply Karis average to prevent bright pixels from dominating
float centerLuma = max(dot(center, float3(0.2126, 0.7152, 0.0722)), 0.0001);
center = center / (1.0 + centerLuma);
float centerWeight = 4.0;
color += center * centerWeight;
totalWeight += centerWeight;
// Inner ring - fixed offset at 1.0 texel distance
UNROLL
for (int i = 0; i < 4; i++)
{
float angle = i * (PI / 2.0);
float2 offset = float2(cos(angle), sin(angle)) * texelSize;
float3 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset).rgb;
// Apply Karis average
float sampleLuma = max(dot(sample, float3(0.2126, 0.7152, 0.0722)), 0.0001);
sample = sample / (1.0 + sampleLuma);
float weight = 2.0;
color += sample * weight;
totalWeight += weight;
}
// Outer ring - fixed offset at 1.4142 texel distance (diagonal)
UNROLL
for (int j = 0; j < 8; j++)
{
float angle = j * (PI / 4.0);
float2 offset = float2(cos(angle), sin(angle)) * texelSize * 1.4142;
float3 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset).rgb;
// Apply Karis average
float sampleLuma = max(dot(sample, float3(0.2126, 0.7152, 0.0722)), 0.0001);
sample = sample / (1.0 + sampleLuma);
float weight = 1.0;
color += sample * weight;
totalWeight += weight;
}
color /= totalWeight;
// Un-apply Karis average to maintain energy
float finalLuma = max(dot(color, float3(0.2126, 0.7152, 0.0722)), 0.0001);
color = color * (1.0 + finalLuma);
// Apply threshold with quadratic rolloff for smoother transition
float luminance = dot(color, float3(0.2126, 0.7152, 0.0722));
float threshold = max(BloomThreshold, 0.2);
float knee = threshold * BloomThresholdKnee;
float softMax = threshold + knee;
float contribution = 0;
if (luminance > threshold)
{
if (luminance < softMax)
{
// Quadratic softening between threshold and (threshold + knee)
float x = (luminance - threshold) / knee;
contribution = x * x * 0.5;
}
else
{
// Full contribution above softMax
contribution = luminance - threshold;
}
}
float testc = BloomClamp;
float3 clamped = (color * contribution);
clamped.r = min(clamped.r, testc);
clamped.g = min(clamped.g, testc);
clamped.b = min(clamped.b, testc);
// Store threshold result in alpha for downsample chain
return float4(clamped, luminance);
}
// Uses hw bilinear filtering for upscaling or downscaling
META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_Scale(Quad_VS2PS input) : SV_Target
float4 PS_BloomDownsample(Quad_VS2PS input) : SV_Target
{
// TODO: we could use quality switch for bloom effect
uint width, height;
Input0.GetDimensions(width, height);
float2 texelSize = 1.0 / float2(width, height);
return Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0);
/*
float3 color;
// TODO: use gather for dx11 and dx12??
color = Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0, 0)).rgb;
color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0, 1)).rgb;
color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 0,-1)).rgb;
color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2(-1, 0)).rgb;
color += Input0.SampleLevel(SamplerLinearClamp, input.TexCoord, 0, int2( 1, 0)).rgb;
color *= (1.0f / 5.0f);
// 9-tap tent filter with fixed weights
float3 color = 0;
float totalWeight = 0;
return float4(color, 1);
*/
// Sample offsets (fixed)
const float2 offsets[9] =
{
float2( 0, 0), // Center
float2(-1, -1), // Corners
float2( 1, -1),
float2(-1, 1),
float2( 1, 1),
float2( 0, -1), // Cross
float2(-1, 0),
float2( 1, 0),
float2( 0, 1)
};
// Sample weights (fixed)
const float weights[9] =
{
4.0, // Center
1.0, // Corners
1.0,
1.0,
1.0,
2.0, // Cross
2.0,
2.0,
2.0
};
UNROLL
for (int i = 0; i < 9; i++)
{
float2 offset = offsets[i] * texelSize * 2.0; // Fixed scale factor for stability
float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + offset);
color += sample.rgb * weights[i];
totalWeight += weights[i];
}
return float4(color / totalWeight, 1.0);
}
META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_BloomDualFilterUpsample(Quad_VS2PS input) : SV_Target
{
float anisotropy = 1.0;
uint width, height;
Input0.GetDimensions(width, height);
float2 texelSize = 1.0 / float2(width, height);
// Maintain fixed scale through mip chain
float baseOffset = 1.0;
float offsetScale = (1.0) * baseOffset;
float3 color = 0;
float totalWeight = 0;
// Center
float4 center = Input0.Sample(SamplerLinearClamp, input.TexCoord);
float centerWeight = 4.0;
color += center.rgb * centerWeight;
totalWeight += centerWeight;
// Cross - fixed distance samples
float2 crossOffsets[4] = {
float2(offsetScale * anisotropy, 0),
float2(-offsetScale * anisotropy, 0),
float2(0, offsetScale),
float2(0, -offsetScale)
};
UNROLL
for (int i = 0; i < 4; i++)
{
float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + crossOffsets[i] * texelSize);
float weight = 2.0;
color += sample.rgb * weight;
totalWeight += weight;
}
// Corners - fixed distance samples
float2 cornerOffsets[4] =
{
float2(offsetScale * anisotropy, offsetScale),
float2(-offsetScale * anisotropy, offsetScale),
float2(offsetScale * anisotropy, -offsetScale),
float2(-offsetScale * anisotropy, -offsetScale)
};
UNROLL
for (int j = 0; j < 4; j++)
{
float4 sample = Input0.Sample(SamplerLinearClamp, input.TexCoord + cornerOffsets[j] * texelSize);
float weight = 1.0;
color += sample.rgb * weight;
totalWeight += weight;
}
color /= totalWeight;
uint width1, height1;
Input1.GetDimensions(width1, height1);
// Calculate mip fade factor (0 = smallest mip, 1 = largest mip)
float mipFade = BloomLayer / (BloomMipCount - 1);
// Muzz says:
// Lerp between your desired intensity values based on mip level
// setting both to 0.6 is a decent default, but playing with these numbers will let you dial in the blending between the lowest and highest mips.
// you can make some really ugly bloom if you go too far.
// note this does change the intensity of the bloom.
// This was my own invention
float mipIntensity = lerp(BloomBaseMix, BloomHighMix, mipFade);
color *= mipIntensity;
BRANCH
if (width1 > 0)
{
float3 previousMip = Input1.Sample(SamplerLinearClamp, input.TexCoord).rgb;
color += previousMip;
}
return float4(color, 1.0);
}
// Horizontal gaussian blur
@@ -286,13 +490,11 @@ META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_GaussainBlurH(Quad_VS2PS input) : SV_Target
{
float4 color = 0;
UNROLL
for (int i = 0; i < GB_KERNEL_SIZE; i++)
{
color += Input0.Sample(SamplerLinearClamp, input.TexCoord + float2(GaussianBlurCache[i].y, 0.0)) * GaussianBlurCache[i].x;
}
return color;
}
@@ -301,13 +503,11 @@ META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_GaussainBlurV(Quad_VS2PS input) : SV_Target
{
float4 color = 0;
UNROLL
for (int i = 0; i < GB_KERNEL_SIZE; i++)
{
color += Input0.Sample(SamplerLinearClamp, input.TexCoord + float2(0.0, GaussianBlurCache[i].y)) * GaussianBlurCache[i].x;
}
return color;
}
@@ -471,18 +671,16 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target
color.rgb += lensFlares;
}
// Bloom
BRANCH
if (BloomMagnitude > 0)
{
// Sample the bloom
float3 bloom = Input2.SampleLevel(SamplerLinearClamp, uv, 0).rgb;
bloom = bloom * BloomMagnitude;
// Accumulate final bloom lght
lensLight += max(0, bloom * 3.0f + (- 1.0f * 3.0f));
color.rgb += bloom;
}
// Bloom
BRANCH
if (BloomIntensity > 0)
{
// Sample the final bloom result
float3 bloom = Input2.Sample(SamplerLinearClamp, input.TexCoord).rgb;
bloom = bloom * BloomIntensity;
lensLight += max(0, bloom * 3.0f + (-1.0f * 3.0f));
color.rgb += bloom;
}
// Lens Dirt
float3 lensDirt = LensDirt.SampleLevel(SamplerLinearClamp, uv, 0).rgb;