Add UseAlpha to RenderBuffers for pass-though renderer with alpha channel

2022-08-01 22:58:22 +02:00
parent fe1cab6a7f
commit a6e9abfceb
14 changed files with 136 additions and 107 deletions
--- a/Content/Editor/MaterialTemplates/Features/DeferredShading.hlsl
+++ b/Content/Editor/MaterialTemplates/Features/DeferredShading.hlsl
@@ -25,7 +25,7 @@ void PS_GBuffer(
 #endif
 	)
 {
-	Light = 0;
+	Light = float4(0, 0, 0, 1);
 	
 #if USE_DITHERED_LOD_TRANSITION
 	// LOD masking
--- a/Source/Engine/Graphics/Materials/MaterialShader.h
+++ b/Source/Engine/Graphics/Materials/MaterialShader.h
@@ -10,7 +10,7 @@
 /// <summary>
 /// Current materials shader version.
 /// </summary>
-#define MATERIAL_GRAPH_VERSION 155
+#define MATERIAL_GRAPH_VERSION 156

 class Material;
 class GPUShader;
--- a/Source/Engine/Graphics/RenderBuffers.cpp
+++ b/Source/Engine/Graphics/RenderBuffers.cpp
@@ -111,6 +111,33 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context)
    return HalfResDepth;
 }

+PixelFormat RenderBuffers::GetOutputFormat() const
+{
+    return _useAlpha ? PixelFormat::R16G16B16A16_Float : PixelFormat::R11G11B10_Float;
+}
+
+bool RenderBuffers::GetUseAlpha() const
+{
+    return _useAlpha;
+}
+
+void RenderBuffers::SetUseAlpha(bool value)
+{
+    if (_useAlpha != value)
+    {
+        _useAlpha = value;
+
+        // Reallocate buffers
+        if (_width != 0)
+        {
+            auto desc = GPUTextureDescription::New2D(_width, _height, GetOutputFormat(), GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget);
+            desc.DefaultClearColor = Color::Transparent;
+            RT1_FloatRGB->Init(desc);
+            RT2_FloatRGB->Init(desc);
+        }
+    }
+}
+
 const RenderBuffers::CustomBuffer* RenderBuffers::FindCustomBuffer(const StringView& name) const
 {
    for (const CustomBuffer* e : CustomBuffers)
@@ -169,7 +196,7 @@ bool RenderBuffers::Init(int32 width, int32 height)
    result |= GBuffer3->Init(desc);

    // Helper HDR buffers
-    desc.Format = PixelFormat::R11G11B10_Float;
+    desc.Format = GetOutputFormat();
    desc.DefaultClearColor = Color::Transparent;
    result |= RT1_FloatRGB->Init(desc);
    result |= RT2_FloatRGB->Init(desc);
--- a/Source/Engine/Graphics/RenderBuffers.h
+++ b/Source/Engine/Graphics/RenderBuffers.h
@@ -13,9 +13,6 @@
 #define GBUFFER2_FORMAT PixelFormat::R8G8B8A8_UNorm
 #define GBUFFER3_FORMAT PixelFormat::R8G8B8A8_UNorm

-// Light accumulation buffer format (direct+indirect light, materials emissive)
-#define LIGHT_BUFFER_FORMAT PixelFormat::R11G11B10_Float
-
 /// <summary>
 /// The scene rendering buffers container.
 /// </summary>
@@ -39,6 +36,7 @@ protected:
    int32 _width = 0;
    int32 _height = 0;
    float _aspectRatio = 0.0f;
+    bool _useAlpha = false;
    Viewport _viewport;
    Array<GPUTexture*, FixedAllocation<32>> _resources;

@@ -153,6 +151,21 @@ public:
        return _viewport;
    }

+    /// <summary>
+    /// Gets the output buffers format (R11G11B10 or R16G16B16A16 depending on UseAlpha property).
+    /// </summary>
+    API_PROPERTY() PixelFormat GetOutputFormat() const;
+
+    /// <summary>
+    /// True if support alpha output in the rendering buffers and pass-though alpha mask of the scene during rendering (at cost of reduced performance).
+    /// </summary>
+    API_PROPERTY() bool GetUseAlpha() const;
+
+    /// <summary>
+    /// True if support alpha output in the rendering buffers and pass-though alpha mask of the scene during rendering (at cost of reduced performance).
+    /// </summary>
+    API_PROPERTY() void SetUseAlpha(bool value);
+
    const CustomBuffer* FindCustomBuffer(const StringView& name) const;

    template<class T>
--- a/Source/Engine/Renderer/AntiAliasing/FXAA.cpp
+++ b/Source/Engine/Renderer/AntiAliasing/FXAA.cpp
@@ -7,12 +7,19 @@
 #include "Engine/Graphics/Graphics.h"
 #include "Engine/Graphics/RenderTask.h"

+PACK_STRUCT(struct Data
+    {
+    Float4 ScreenSize;
+    });
+
+String FXAA::ToString() const
+{
+    return TEXT("FXAA");
+}
+
 bool FXAA::Init()
 {
-    // Create pipeline state
    _psFXAA.CreatePipelineStates();
-
-    // Load shader
    _shader = Content::LoadAsyncInternal<Shader>(TEXT("Shaders/FXAA"));
    if (_shader == nullptr)
        return true;
@@ -25,21 +32,17 @@ bool FXAA::Init()

 bool FXAA::setupResources()
 {
-    // Check shader
    if (!_shader->IsLoaded())
    {
        return true;
    }
    const auto shader = _shader->GetShader();
-
-    // Validate shader constant buffer size
    if (shader->GetCB(0)->GetSize() != sizeof(Data))
    {
        REPORT_INVALID_SHADER_PASS_CB_SIZE(shader, 0, Data);
        return true;
    }

-    // Create pipeline state
    GPUPipelineState::Description psDesc;
    if (!_psFXAA.IsValid())
    {
@@ -56,7 +59,6 @@ void FXAA::Dispose()
    // Base
    RendererPass::Dispose();

-    // Cleanup
    _psFXAA.Delete();
    _shader = nullptr;
 }
@@ -64,9 +66,6 @@ void FXAA::Dispose()
 void FXAA::Render(RenderContext& renderContext, GPUTexture* input, GPUTextureView* output)
 {
    auto context = GPUDevice::Instance->GetMainContext();
-    const auto qualityLevel = Math::Clamp(static_cast<int32>(Graphics::AAQuality), 0, static_cast<int32>(Quality::MAX) - 1);
-
-    // Ensure to have valid data
    if (checkIfSkipPass())
    {
        // Resources are missing. Do not perform rendering, just copy input frame.
@@ -74,7 +73,6 @@ void FXAA::Render(RenderContext& renderContext, GPUTexture* input, GPUTextureVie
        context->Draw(input);
        return;
    }
-
    PROFILE_GPU_CPU("Fast Approximate Antialiasing");

    // Bind input
@@ -87,6 +85,7 @@ void FXAA::Render(RenderContext& renderContext, GPUTexture* input, GPUTextureVie

    // Render
    context->SetRenderTarget(output);
+    const auto qualityLevel = Math::Clamp(static_cast<int32>(Graphics::AAQuality), 0, static_cast<int32>(Quality::MAX) - 1);
    context->SetState(_psFXAA.Get(qualityLevel));
    context->DrawFullscreenTriangle();
 }
--- a/Source/Engine/Renderer/AntiAliasing/FXAA.h
+++ b/Source/Engine/Renderer/AntiAliasing/FXAA.h
@@ -11,17 +11,10 @@
 class FXAA : public RendererPass<FXAA>
 {
 private:
-
-    PACK_STRUCT(struct Data
-        {
-            Float4 ScreenSize;
-        });
-
    AssetReference<Shader> _shader;
    GPUPipelineStatePermutationsPs<static_cast<int32>(Quality::MAX)> _psFXAA;

 public:
-
    /// <summary>
    /// Performs AA pass rendering for the input task.
    /// </summary>
@@ -31,7 +24,6 @@ public:
    void Render(RenderContext& renderContext, GPUTexture* input, GPUTextureView* output);

 private:
-
 #if COMPILE_WITH_DEV_ENV
    void OnShaderReloading(Asset* obj)
    {
@@ -41,18 +33,12 @@ private:
 #endif

 public:
-
    // [RendererPass]
-    String ToString() const override
-    {
-        return TEXT("FXAA");
-    }
-
+    String ToString() const override;
    bool Init() override;
    void Dispose() override;

 protected:
-
    // [RendererPass]
    bool setupResources() override;
 };
--- a/Source/Engine/Renderer/DepthOfFieldPass.cpp
+++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp
@@ -20,6 +20,7 @@
 #define DOF_GRID_SIZE 450
 #define DOF_APRON_SIZE DOF_MAX_SAMPLE_RADIUS
 #define DOF_THREAD_GROUP_SIZE (DOF_GRID_SIZE + (DOF_APRON_SIZE * 2))
+#define DOF_DEPTH_BLUR_FORMAT PixelFormat::R16G16_Float

 DepthOfFieldPass::DepthOfFieldPass()
 {
@@ -320,7 +321,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    }

    // Peek temporary render target for dof pass
-    tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, DOF_RT_FORMAT);
+    auto dofFormat = renderContext.Buffers->GetOutputFormat();
+    tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
    GPUTexture* dofInput = RenderTargetPool::Get(tempDesc);

    // Do the bokeh point generation, or just do a copy if disabled
@@ -374,7 +376,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    context->FlushState();
    {
        // Peek temporary targets for two blur passes
-        tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, DOF_RT_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
+        tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
        auto dofTargetH = RenderTargetPool::Get(tempDesc);
        auto dofTargetV = RenderTargetPool::Get(tempDesc);

@@ -421,7 +423,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    // Render the bokeh points
    if (isBokehGenerationEnabled)
    {
-        tempDesc = GPUTextureDescription::New2D(bokehTargetWidth, bokehTargetHeight, DOF_RT_FORMAT);
+        tempDesc = GPUTextureDescription::New2D(bokehTargetWidth, bokehTargetHeight, dofFormat);
        auto bokehTarget = RenderTargetPool::Get(tempDesc);
        context->Clear(*bokehTarget, Color::Black);

@@ -444,7 +446,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        }

        // Composite the bokeh rendering results with the depth of field result
-        tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, DOF_RT_FORMAT);
+        tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
        auto compositeTarget = RenderTargetPool::Get(tempDesc);
        context->BindSR(0, bokehTarget);
        context->BindSR(1, dofOutput);
--- a/Source/Engine/Renderer/DepthOfFieldPass.h
+++ b/Source/Engine/Renderer/DepthOfFieldPass.h
@@ -5,9 +5,6 @@
 #include "RendererPass.h"
 #include "Engine/Graphics/PostProcessSettings.h"

-#define DOF_DEPTH_BLUR_FORMAT PixelFormat::R16G16_Float
-#define DOF_RT_FORMAT PixelFormat::R11G11B10_Float
-
 /// <summary>
 /// Depth of Field rendering
 /// </summary>
--- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp
+++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp
@@ -397,11 +397,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
        uint64 memUsage = 0;
        // TODO: try using BC4/BC5/BC7 block compression for Surface Atlas (eg. for Tiles material properties)
 #define INIT_ATLAS_TEXTURE(texture, format) desc.Format = format; surfaceAtlasData.texture = RenderTargetPool::Get(desc); if (!surfaceAtlasData.texture) return true; memUsage += surfaceAtlasData.texture->GetMemoryUsage()
-        INIT_ATLAS_TEXTURE(AtlasEmissive, LIGHT_BUFFER_FORMAT);
+        INIT_ATLAS_TEXTURE(AtlasEmissive, PixelFormat::R11G11B10_Float);
        INIT_ATLAS_TEXTURE(AtlasGBuffer0, GBUFFER0_FORMAT);
        INIT_ATLAS_TEXTURE(AtlasGBuffer1, GBUFFER1_FORMAT);
        INIT_ATLAS_TEXTURE(AtlasGBuffer2, GBUFFER2_FORMAT);
-        INIT_ATLAS_TEXTURE(AtlasLighting, LIGHT_BUFFER_FORMAT);
+        INIT_ATLAS_TEXTURE(AtlasLighting, PixelFormat::R11G11B10_Float);
        desc.Flags = GPUTextureFlags::DepthStencil | GPUTextureFlags::ShaderResource;
        INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm);
 #undef INIT_ATLAS_TEXTURE
--- a/Source/Engine/Renderer/Renderer.cpp
+++ b/Source/Engine/Renderer/Renderer.cpp
@@ -330,7 +330,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext)
    renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::Distortion);

    // Get the light accumulation buffer
-    auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), LIGHT_BUFFER_FORMAT);
+    auto outputFormat = renderContext.Buffers->GetOutputFormat();
+    auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), outputFormat);
    auto lightBuffer = RenderTargetPool::Get(tempDesc);

 #if USE_EDITOR
--- a/Source/Shaders/DepthOfField.shader
+++ b/Source/Shaders/DepthOfField.shader
@@ -156,10 +156,10 @@ void CS_DepthOfFieldH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group
 	float2 sampleCoord = saturate(samplePos / float2(textureSize));
 #endif
 #if USE_CS_LINEAR_SAMPLING
-	float3 color = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xyz;
+	float4 color = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).rgba;
 	float2 depthBlur = Input1.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xy;
 #else
-	float3 color = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xyz;
+	float4 color = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).rgba;
 	float2 depthBlur = Input1.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xy;
 #endif
 	float depth = depthBlur.x;
@@ -167,7 +167,7 @@ void CS_DepthOfFieldH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group
 	float cocSize = blur * DOF_MAX_SAMPLE_RADIUS;

 	// Store in shared memory
-	Samples[groupThreadID.x].Color = color;
+	Samples[groupThreadID.x].Color = color.rgb;
 	Samples[groupThreadID.x].Depth = depth;
 	Samples[groupThreadID.x].Blur = blur;

@@ -203,11 +203,11 @@ void CS_DepthOfFieldH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group

 			// Write out the result
 			outputColor /= totalContribution;
-			OutputTexture[samplePos] = float4(max(outputColor, 0), 1.0f);
+			OutputTexture[samplePos] = float4(max(outputColor, 0), color.a);
 		}
 		else
 		{
-			OutputTexture[samplePos] = float4(color, 1.0f);
+			OutputTexture[samplePos] = color;
 		}
 	}
 }
@@ -237,10 +237,10 @@ void CS_DepthOfFieldV(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group
 	float2 sampleCoord = saturate(samplePos / float2(textureSize));
 #endif
 #if USE_CS_LINEAR_SAMPLING
-	float3 color = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xyz;
+	float4 color = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).rgba;
 	float2 depthBlur = Input1.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xy;
 #else
-	float3 color = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xyz;
+	float4 color = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).rgba;
 	float2 depthBlur = Input1.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xy;
 #endif
 	float depth = depthBlur.x;
@@ -248,7 +248,7 @@ void CS_DepthOfFieldV(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group
 	float cocSize = blur * DOF_MAX_SAMPLE_RADIUS;

 	// Store in shared memory
-	Samples[groupThreadID.y].Color = color;
+	Samples[groupThreadID.y].Color = color.rgb;
 	Samples[groupThreadID.y].Depth = depth;
 	Samples[groupThreadID.y].Blur = blur;

@@ -284,11 +284,11 @@ void CS_DepthOfFieldV(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group

 			// Write out the result
 			outputColor /= totalContribution;
-			OutputTexture[samplePos] = float4(max(outputColor, 0), 1.0f);
+			OutputTexture[samplePos] = float4(max(outputColor, 0), color.a);
 		}
 		else
 		{
-			OutputTexture[samplePos] = float4(color, 1.0f);
+			OutputTexture[samplePos] = color;
 		}
 	}
 }
@@ -456,11 +456,10 @@ META_PS(true, FEATURE_LEVEL_SM5)
 float4 PS_GenerateBokeh(Quad_VS2PS input) : SV_Target
 {
 #if FEATURE_LEVEL >= FEATURE_LEVEL_SM5
-	
-	float2 centerCoord = input.TexCoord;
-
 	// Start with center sample color
-	float3 centerColor = Input0.Sample(SamplerPointClamp, centerCoord).rgb;
+	float2 centerCoord = input.TexCoord;
+	float4 centerSample = Input0.Sample(SamplerPointClamp, centerCoord);
+	float3 centerColor = centerSample.rgb;
 	float3 colorSum = centerColor;
 	float totalContribution = 1.0f;

@@ -505,12 +504,9 @@ float4 PS_GenerateBokeh(Quad_VS2PS input) : SV_Target
 		centerColor = 0.0f;
 	}

-	return float4(centerColor, 1.0f);
-	
+	return float4(centerColor, centerSample.a);
 #else
-
 	return float4(0, 0, 0, 1.0f);
-
 #endif
 }

@@ -520,10 +516,10 @@ float4 PS_GenerateBokeh(Quad_VS2PS input) : SV_Target
 META_PS(true, FEATURE_LEVEL_SM5)
 float4 PS_DoNotGenerateBokeh(Quad_VS2PS input) : SV_Target
 {
-	float2 centerCoord = input.TexCoord;
-
 	// Start with center sample color
-	float3 centerColor = Input0.Sample(SamplerPointClamp, centerCoord).rgb;
+	float2 centerCoord = input.TexCoord;
+	float4 centerSample = Input0.Sample(SamplerPointClamp, centerCoord);
+	float3 centerColor = centerSample.rgb;
 	float3 colorSum = centerColor;
 	float totalContribution = 1.0f;

@@ -561,7 +557,7 @@ float4 PS_DoNotGenerateBokeh(Quad_VS2PS input) : SV_Target
 		centerColor = 0.0f;
 	}

-	return float4(centerColor, 1.0f);
+	return float4(centerColor, centerSample.a);
 }

 #else
@@ -642,10 +638,8 @@ float4 PS_BokehComposite(in Quad_VS2PS input) : SV_Target
 {
    float4 bokehSample = Input0.Sample(SamplerLinearClamp, input.TexCoord);
 	float4 dofSample = Input1.Sample(SamplerPointClamp, input.TexCoord);
-
 	float3 composite = bokehSample.rgb + dofSample.rgb;
-
-	return float4(composite, 1.0f);
+	return float4(composite, dofSample.a);
 }

 #endif
--- a/Source/Shaders/EyeAdaptation.shader
+++ b/Source/Shaders/EyeAdaptation.shader
@@ -90,7 +90,7 @@ float4 PS_ApplyLuminance(Quad_VS2PS input) : SV_Target
 {
 	float averageLuminance = AverageLuminance.Load(int3(0, 0, 0)).x;
 	float exposure = 1.0f / averageLuminance;
-	return (PreExposure * exposure).xxxx;
+	return float4((PreExposure * exposure).xxx, 1);
 }

 #endif
--- a/Source/Shaders/FXAA.shader
+++ b/Source/Shaders/FXAA.shader
@@ -18,6 +18,8 @@

 #include "./Flax/Common.hlsl"

+#define FXAA_ALPHA_PASSTHROUGH 1
+
 Texture2D Source : register(t0);

 #define SAMPLE_TEXTURE_LEVEL(texture, uv, level) texture.SampleLevel(SamplerLinearClamp, uv, level)
@@ -67,6 +69,9 @@ FXAA_DEBUG_OFFSET      - Red/blue for -/+ x, gold/skyblue for -/+ y.
 #ifndef     FXAA_DEBUG_OFFSET
    #define FXAA_DEBUG_OFFSET      0
 #endif    
+#ifndef     FXAA_ALPHA_PASSTHROUGH
+    #define FXAA_ALPHA_PASSTHROUGH 0
+#endif

 #if FXAA_DEBUG_PASSTHROUGH || FXAA_DEBUG_HORZVERT || FXAA_DEBUG_PAIR
    #define FXAA_DEBUG 1
@@ -232,14 +237,18 @@ float FxaaLuma(float3 rgb)
 } 

 // Support any extra filtering before returning color
-float4 FxaaFilterReturn(float3 rgb)
+float4 FxaaFilterReturn(float3 rgb, float4 source)
 {
    #if FXAA_SRGB_ROP
        // Do sRGB encoded value to linear conversion
 		float3 b = rgb > FxaaToFloat3(0.04045);
 		rgb = rgb * FxaaToFloat3(1.0/12.92) * !b + pow(rgb * FxaaToFloat3(1.0/1.055) + FxaaToFloat3(0.055/1.055), FxaaToFloat3(2.4)) * b; 
    #endif
+#if FXAA_ALPHA_PASSTHROUGH
+	return float4(rgb, source.a);
+#else
 	return float4(rgb, 1);
+#endif
 }

 META_CB_BEGIN(0, Data)
@@ -277,9 +286,10 @@ then the shader early exits (no visible aliasing).
 This threshold is clamped at a minimum value ("FXAA_EDGE_THRESHOLD_MIN")
 to avoid processing in really dark areas.    
 */
+    float4 source = SAMPLE_TEXTURE_OFFSET(Source, input.TexCoord, 0.0, int2( 0, 0));
    float3 rgbN = SAMPLE_TEXTURE_OFFSET(Source, input.TexCoord, 0.0, int2( 0,-1)).xyz;
    float3 rgbW = SAMPLE_TEXTURE_OFFSET(Source, input.TexCoord, 0.0, int2(-1, 0)).xyz;
-    float3 rgbM = SAMPLE_TEXTURE_OFFSET(Source, input.TexCoord, 0.0, int2( 0, 0)).xyz;
+    float3 rgbM = source.xyz;
    float3 rgbE = SAMPLE_TEXTURE_OFFSET(Source, input.TexCoord, 0.0, int2( 1, 0)).xyz;
    float3 rgbS = SAMPLE_TEXTURE_OFFSET(Source, input.TexCoord, 0.0, int2( 0, 1)).xyz;
    float lumaN = FxaaLuma(rgbN);
@@ -295,9 +305,9 @@ to avoid processing in really dark areas.
    #endif        
    if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD)) {
        #if FXAA_DEBUG
-            return FxaaFilterReturn(FxaaToFloat3(lumaO));
+            return FxaaFilterReturn(FxaaToFloat3(lumaO), source);
        #endif
-        return FxaaFilterReturn(rgbM); }
+        return FxaaFilterReturn(rgbM, source); }
    #if FXAA_SUBPIX > 0
        #if FXAA_SUBPIX_FASTER
            float3 rgbL = (rgbN + rgbW + rgbE + rgbS + rgbM) * FxaaToFloat3(1.0/5.0);
@@ -334,7 +344,7 @@ of a lowpass value (computed in the next section) to the final result.
        #if FXAA_SUBPIX == 0
            float blendL = 0.0;
        #endif
-        return FxaaFilterReturn(float3(1.0, blendL/FXAA_SUBPIX_CAP, 0.0));
+        return FxaaFilterReturn(float3(1.0, blendL/FXAA_SUBPIX_CAP, 0.0), source);
    #endif
 /* 
                    CHOOSE VERTICAL OR HORIZONTAL SEARCH
@@ -379,8 +389,8 @@ flow in parallel (reusing the horizontal variables).
        abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE));
    bool horzSpan = edgeHorz >= edgeVert;
    #if FXAA_DEBUG_HORZVERT
-        if(horzSpan) return FxaaFilterReturn(float3(1.0, 0.75, 0.0));
-        else         return FxaaFilterReturn(float3(0.0, 0.50, 1.0));
+        if(horzSpan) return FxaaFilterReturn(float3(1.0, 0.75, 0.0), source);
+        else         return FxaaFilterReturn(float3(0.0, 0.50, 1.0), source);
    #endif
    float lengthSign = horzSpan ? -ScreenHeightInv : -ScreenWidthInv;
    if(!horzSpan) lumaN = lumaW;
@@ -410,8 +420,8 @@ until edge status changes
 */    
    bool pairN = gradientN >= gradientS;
    #if FXAA_DEBUG_PAIR
-        if(pairN) return FxaaFilterReturn(float3(0.0, 0.0, 1.0));
-        else      return FxaaFilterReturn(float3(0.0, 1.0, 0.0));
+        if(pairN) return FxaaFilterReturn(float3(0.0, 0.0, 1.0), source);
+        else      return FxaaFilterReturn(float3(0.0, 1.0, 0.0), source);
    #endif
    if(!pairN) lumaN = lumaS;
    if(!pairN) gradientN = gradientS;
@@ -498,8 +508,8 @@ On negative side if dstN < dstP,
    float dstP = horzSpan ? posP.x - input.TexCoord.x : posP.y - input.TexCoord.y;
    bool directionN = dstN < dstP;
    #if FXAA_DEBUG_NEGPOS
-        if(directionN) return FxaaFilterReturn(float3(1.0, 0.0, 0.0));
-        else           return FxaaFilterReturn(float3(0.0, 0.0, 1.0));
+        if(directionN) return FxaaFilterReturn(float3(1.0, 0.0, 0.0), source);
+        else           return FxaaFilterReturn(float3(0.0, 0.0, 1.0), source);
    #endif
    lumaEndN = directionN ? lumaEndN : lumaEndP;
    
@@ -627,19 +637,19 @@ Position on span is used to compute sub-pixel filter offset using simple ramp,
        float ox = horzSpan ? 0.0 : subPixelOffset * 2.0 / ScreenWidthInv;
        float oy = horzSpan ? subPixelOffset * 2.0 / ScreenHeightInv : 0.0;
        if(ox < 0.0)
-			return FxaaFilterReturn(lerp(float3(1.0, 0.0, 0.0), FxaaToFloat3(lumaO), -ox));
+			return FxaaFilterReturn(lerp(float3(1.0, 0.0, 0.0), FxaaToFloat3(lumaO), -ox), source);
        if(ox > 0.0)
-			return FxaaFilterReturn(lerp(float3(0.0, 0.0, 1.0), FxaaToFloat3(lumaO),  ox));
+			return FxaaFilterReturn(lerp(float3(0.0, 0.0, 1.0), FxaaToFloat3(lumaO),  ox), source);
        if(oy < 0.0)
-			return FxaaFilterReturn(lerp(float3(1.0, 0.6, 0.2), FxaaToFloat3(lumaO), -oy));
+			return FxaaFilterReturn(lerp(float3(1.0, 0.6, 0.2), FxaaToFloat3(lumaO), -oy), source);
        if(oy > 0.0)
-			return FxaaFilterReturn(lerp(float3(0.2, 0.6, 1.0), FxaaToFloat3(lumaO),  oy));
-        return FxaaFilterReturn(float3(lumaO, lumaO, lumaO));
+			return FxaaFilterReturn(lerp(float3(0.2, 0.6, 1.0), FxaaToFloat3(lumaO),  oy), source);
+        return FxaaFilterReturn(float3(lumaO, lumaO, lumaO), source);
    #endif
    float3 rgbF = SAMPLE_TEXTURE_LEVEL(Source, input.TexCoord + float2(horzSpan ? 0.0 : subPixelOffset, horzSpan ? subPixelOffset : 0.0), 0.0).xyz;
    #if FXAA_SUBPIX == 0
-        return FxaaFilterReturn(rgbF); 
+        return FxaaFilterReturn(rgbF, source); 
    #else        
-        return FxaaFilterReturn(lerp(rgbF, rgbL, blendL)); 
+        return FxaaFilterReturn(lerp(rgbF, rgbL, blendL), source); 
    #endif
 }
--- a/Source/Shaders/PostProcessing.shader
+++ b/Source/Shaders/PostProcessing.shader
@@ -416,7 +416,7 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target
 {
 	float2 uv = input.TexCoord;
 	float3 lensLight = 0;
-	float3 color = 0;
+	float4 color;

 	// Chromatic Abberation
 	if (ChromaticDistortion > 0)
@@ -434,22 +434,22 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target
 		float rnd = nrand(uv + Time);
 		float t = rnd * stepsiz;

-		float3 sumcol = 0;
-		float3 sumw = 0;
+		float4 sumcol = 0;
+		float4 sumw = 0;
 		for (int i = 0; i < iterations; i++)
 		{
-			float3 w = spectrum_offset(t);
+			float4 w = float4(spectrum_offset(t), 1);
 			sumw += w;
 			float2 uvd = distort(uv, t, min_distort, max_distort);
-			sumcol += Input0.Sample(SamplerLinearClamp, uvd).rgb * w;
+			sumcol += Input0.Sample(SamplerLinearClamp, uvd) * w;
 			t += stepsiz;
 		}
-		sumcol.rgb /= sumw;
-		color = sumcol.rgb + (rnd / 255.0);
+		sumcol /= sumw;
+		color = sumcol + (rnd / 255.0);
 	}
 	else
 	{
-		color = Input0.Sample(SamplerLinearClamp, uv).rgb;
+		color = Input0.Sample(SamplerLinearClamp, uv);
 	}

 	// Lens Flares
@@ -468,7 +468,7 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target

 		// Accumulate final lens flares lght
 		lensLight += lensFlares * 1.5f;
-		color += lensFlares;
+		color.rgb += lensFlares;
 	}

 	// Bloom
@@ -481,19 +481,19 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target

 		// Accumulate final bloom lght
 		lensLight += max(0, bloom * 3.0f + (- 1.0f * 3.0f));
-		color += bloom;
+		color.rgb += bloom;
 	}

 	// Lens Dirt
 	float3 lensDirt = LensDirt.SampleLevel(SamplerLinearClamp, uv, 0).rgb;
-	color += lensDirt * (lensLight * LensDirtIntensity);
+	color.rgb += lensDirt * (lensLight * LensDirtIntensity);

 	// Eye Adaptation post exposure
-	color *= PostExposure;
+	color.rgb *= PostExposure;

 	// Color Grading and Tone Mapping
 #if !NO_GRADING_LUT
-	color = ColorLookupTable(color);
+	color.rgb = ColorLookupTable(color.rgb);
 #endif

 	// Film Grain
@@ -505,12 +505,12 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target
 		float noise = pnoise2D(rotCoordsR * (InputSize / GrainParticleSize), GrainTime);

 		// Noisiness response curve based on scene luminance
-		float luminance = Luminance(saturate(color));
+		float luminance = Luminance(saturate(color.rgb));
 		luminance += smoothstep(0.2, 0.0, luminance);

 		// Add noise to the final color
 		noise = lerp(noise, 0, min(pow(luminance, 4.0), 100));
-		color += noise * GrainAmount;
+		color.rgb += noise * GrainAmount;
 	}

 	// Vignette
@@ -520,15 +520,15 @@ float4 PS_Composite(Quad_VS2PS input) : SV_Target
 		float2 uvCircle = uv * (1 - uv);
 		float uvCircleScale = uvCircle.x * uvCircle.y * 16.0f;
 		float mask = lerp(1, pow(uvCircleScale, VignetteShapeFactor), VignetteIntensity);
-		color = lerp(VignetteColor, color, mask);
+		color.rgb = lerp(VignetteColor, color.rgb, mask);
 	}

 	// Screen fade
-	color = lerp(color, ScreenFadeColor.rgb, ScreenFadeColor.a);
+	color.rgb = lerp(color.rgb, ScreenFadeColor.rgb, ScreenFadeColor.a);

 	// Saturate color since it will be rendered to the screen
-	color = saturate(color);
+	color.rgb = saturate(color.rgb);

-	// Return final pixel color
-	return float4(color, 1.0f);
+	// Return final pixel color (preserve input alpha)
+	return color;
 }