diff --git a/Content/Shaders/MotionBlur.flax b/Content/Shaders/MotionBlur.flax index 2f93589f0..282a811d2 100644 --- a/Content/Shaders/MotionBlur.flax +++ b/Content/Shaders/MotionBlur.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bb224fc630264fcfa58c6d8ce27759e44d3f989a53655dc35959e594890d146 -size 1838 +oid sha256:cc3b15d7619864afa2b0e18aae4901e825a5c655cb151a95507c7c664b87d377 +size 9685 diff --git a/Source/Engine/Renderer/MotionBlurPass.cpp b/Source/Engine/Renderer/MotionBlurPass.cpp index e6264ead6..31cadb877 100644 --- a/Source/Engine/Renderer/MotionBlurPass.cpp +++ b/Source/Engine/Renderer/MotionBlurPass.cpp @@ -10,17 +10,25 @@ #include "Engine/Graphics/GPULimits.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderBuffers.h" +#include "Engine/Engine/Time.h" PACK_STRUCT(struct Data { GBufferData GBuffer; Matrix CurrentVP; Matrix PreviousVP; Vector4 TemporalAAJitter; + + float VelocityScale; + float Dummy0; + int32 MaxBlurSamples; + uint32 VariableTileLoopCount; + + Vector2 Input0SizeInv; + Vector2 Input2SizeInv; }); MotionBlurPass::MotionBlurPass() : _motionVectorsFormat(PixelFormat::Unknown) - , _velocityFormat(PixelFormat::Unknown) { } @@ -31,8 +39,13 @@ String MotionBlurPass::ToString() const bool MotionBlurPass::Init() { - // Create pipeline state + // Create pipeline states _psCameraMotionVectors = GPUDevice::Instance->CreatePipelineState(); + _psMotionVectorsDebug = GPUDevice::Instance->CreatePipelineState(); + _psTileMax = GPUDevice::Instance->CreatePipelineState(); + _psTileMaxVariable = GPUDevice::Instance->CreatePipelineState(); + _psNeighborMax = GPUDevice::Instance->CreatePipelineState(); + _psMotionBlur = GPUDevice::Instance->CreatePipelineState(); // Load shader _shader = Content::LoadAsyncInternal(TEXT("Shaders/MotionBlur")); @@ -48,16 +61,12 @@ bool MotionBlurPass::Init() { if (FORMAT_FEATURES_ARE_NOT_SUPPORTED(GPUDevice::Instance->GetFormatFeatures(PixelFormat::R32G32_Float).Support, (FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D))) format = PixelFormat::R32G32_Float; + else if (FORMAT_FEATURES_ARE_NOT_SUPPORTED(GPUDevice::Instance->GetFormatFeatures(PixelFormat::R16G16B16A16_Float).Support, (FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D))) + format = PixelFormat::R16G16B16A16_Float; else format = PixelFormat::R32G32B32A32_Float; } _motionVectorsFormat = format; - format = PixelFormat::R10G10B10A2_UNorm; - if (FORMAT_FEATURES_ARE_NOT_SUPPORTED(GPUDevice::Instance->FeaturesPerFormat[(int32)format].Support, (FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D))) - { - format = PixelFormat::R32G32B32A32_Float; - } - _velocityFormat = format; return false; } @@ -86,6 +95,36 @@ bool MotionBlurPass::setupResources() if (_psCameraMotionVectors->Init(psDesc)) return true; } + if (!_psMotionVectorsDebug->IsValid()) + { + psDesc.PS = shader->GetPS("PS_MotionVectorsDebug"); + if (_psMotionVectorsDebug->Init(psDesc)) + return true; + } + if (!_psTileMax->IsValid()) + { + psDesc.PS = shader->GetPS("PS_TileMax"); + if (_psTileMax->Init(psDesc)) + return true; + } + if (!_psTileMaxVariable->IsValid()) + { + psDesc.PS = shader->GetPS("PS_TileMaxVariable"); + if (_psTileMaxVariable->Init(psDesc)) + return true; + } + if (!_psNeighborMax->IsValid()) + { + psDesc.PS = shader->GetPS("PS_NeighborMax"); + if (_psNeighborMax->Init(psDesc)) + return true; + } + if (!_psMotionBlur->IsValid()) + { + psDesc.PS = shader->GetPS("PS_MotionBlur"); + if (_psMotionBlur->Init(psDesc)) + return true; + } return false; } @@ -97,6 +136,11 @@ void MotionBlurPass::Dispose() // Delete pipeline state SAFE_DELETE_GPU_RESOURCE(_psCameraMotionVectors); + SAFE_DELETE_GPU_RESOURCE(_psMotionVectorsDebug); + SAFE_DELETE_GPU_RESOURCE(_psTileMax); + SAFE_DELETE_GPU_RESOURCE(_psTileMaxVariable); + SAFE_DELETE_GPU_RESOURCE(_psNeighborMax); + SAFE_DELETE_GPU_RESOURCE(_psMotionBlur); // Release asset _shader.Unlink(); @@ -198,13 +242,18 @@ void MotionBlurPass::RenderDebug(RenderContext& renderContext, GPUTextureView* f { auto context = GPUDevice::Instance->GetMainContext(); const auto motionVectors = renderContext.Buffers->MotionVectors; - //if (!motionVectors->IsAllocated() || setupResources()) + if (!motionVectors->IsAllocated() || setupResources()) { context->Draw(frame); return; } - // .. + PROFILE_GPU_CPU("Motion Vectors Debug"); + context->BindSR(0, frame); + context->BindSR(1, renderContext.Buffers->MotionVectors->View()); + context->SetState(_psMotionVectorsDebug); + context->DrawFullscreenTriangle(); + context->ResetSR(); } void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output) @@ -218,8 +267,6 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP const int32 screenHeight = renderContext.Buffers->GetHeight(); const int32 motionVectorsWidth = screenWidth / static_cast(settings.MotionVectorsResolution); const int32 motionVectorsHeight = screenHeight / static_cast(settings.MotionVectorsResolution); - - // Ensure to have valid data if ((renderContext.View.Flags & ViewFlags::MotionBlur) == 0 || !_hasValidResources || isCameraCut || @@ -232,5 +279,103 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP return; } - // .. + // Need to have valid motion vectors created and rendered before + ASSERT(motionVectors->IsAllocated()); + + PROFILE_GPU_CPU("Motion Blur"); + + // Setup shader inputs + const int32 maxBlurSize = (int32)((float)motionVectorsHeight * 0.05f); + const int32 tileSize = Math::AlignUp(maxBlurSize, 8); + const float timeScale = renderContext.Task->View.IsOfflinePass ? 1.0f : 1.0f / Time::Draw.UnscaledDeltaTime.GetTotalSeconds() / 60.0f; // 60fps as a reference + Data data; + GBufferPass::SetInputs(renderContext.View, data.GBuffer); + data.TemporalAAJitter = renderContext.View.TemporalAAJitter; + data.VelocityScale = settings.Scale * 0.5f * timeScale; // 2x samples in loop + data.MaxBlurSamples = Math::Clamp(settings.SampleCount / 2, 1, 64); // 2x samples in loop + data.VariableTileLoopCount = tileSize / 8; + data.Input0SizeInv = Vector2(1.0f / (float)motionVectorsWidth, 1.0f / (float)motionVectorsWidth); + const auto cb = _shader->GetShader()->GetCB(0); + context->UpdateCB(cb, &data); + context->BindCB(0, cb); + + // Downscale motion vectors texture down to 1/2 (with max velocity calculation 2x2 kernel) + auto rtDesc = GPUTextureDescription::New2D(motionVectorsWidth / 2, motionVectorsHeight / 2, _motionVectorsFormat); + const auto vMaxBuffer2 = RenderTargetPool::Get(rtDesc); + context->SetRenderTarget(vMaxBuffer2->View()); + context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height); + context->BindSR(0, motionVectors->View()); + context->SetState(_psTileMax); + context->DrawFullscreenTriangle(); + + // Downscale motion vectors texture down to 1/4 (with max velocity calculation 2x2 kernel) + rtDesc.Width /= 2; + rtDesc.Height /= 2; + const auto vMaxBuffer4 = RenderTargetPool::Get(rtDesc); + context->ResetRenderTarget(); + context->SetRenderTarget(vMaxBuffer4->View()); + context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height); + context->BindSR(0, vMaxBuffer2->View()); + data.Input0SizeInv = Vector2(1.0f / (float)vMaxBuffer2->Width(), 1.0f / (float)vMaxBuffer2->Height()); + context->UpdateCB(cb, &data); + context->SetState(_psTileMax); + context->DrawFullscreenTriangle(); + RenderTargetPool::Release(vMaxBuffer2); + + // Downscale motion vectors texture down to 1/8 (with max velocity calculation 2x2 kernel) + rtDesc.Width /= 2; + rtDesc.Height /= 2; + const auto vMaxBuffer8 = RenderTargetPool::Get(rtDesc); + context->ResetRenderTarget(); + context->SetRenderTarget(vMaxBuffer8->View()); + context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height); + context->BindSR(0, vMaxBuffer4->View()); + data.Input0SizeInv = Vector2(1.0f / (float)vMaxBuffer4->Width(), 1.0f / (float)vMaxBuffer4->Height()); + context->UpdateCB(cb, &data); + context->SetState(_psTileMax); + context->DrawFullscreenTriangle(); + RenderTargetPool::Release(vMaxBuffer4); + + // Downscale motion vectors texture down to tileSize/tileSize (with max velocity calculation NxN kernel) + rtDesc.Width = motionVectorsWidth / tileSize; + rtDesc.Height = motionVectorsHeight / tileSize; + auto vMaxBuffer = RenderTargetPool::Get(rtDesc); + context->ResetRenderTarget(); + context->SetRenderTarget(vMaxBuffer->View()); + context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height); + context->BindSR(0, vMaxBuffer8->View()); + data.Input0SizeInv = Vector2(1.0f / (float)vMaxBuffer8->Width(), 1.0f / (float)vMaxBuffer8->Height()); + context->UpdateCB(cb, &data); + context->SetState(_psTileMaxVariable); + context->DrawFullscreenTriangle(); + RenderTargetPool::Release(vMaxBuffer8); + + // Extract maximum velocities for the tiles based on their neighbors + context->ResetRenderTarget(); + auto vMaxNeighborBuffer = RenderTargetPool::Get(rtDesc); + context->SetRenderTarget(vMaxNeighborBuffer->View()); + context->BindSR(0, vMaxBuffer->View()); + context->SetState(_psNeighborMax); + context->DrawFullscreenTriangle(); + RenderTargetPool::Release(vMaxBuffer); + + // Render motion blur + context->ResetRenderTarget(); + context->SetRenderTarget(*output); + context->SetViewportAndScissors((float)screenWidth, (float)screenHeight); + context->BindSR(0, input->View()); + context->BindSR(1, motionVectors->View()); + context->BindSR(2, vMaxNeighborBuffer->View()); + context->BindSR(3, renderContext.Buffers->DepthBuffer->View()); + data.Input0SizeInv = Vector2(1.0f / (float)input->Width(), 1.0f / (float)input->Height()); + data.Input2SizeInv = Vector2(1.0f / (float)renderContext.Buffers->DepthBuffer->Width(), 1.0f / (float)renderContext.Buffers->DepthBuffer->Height()); + context->UpdateCB(cb, &data); + context->SetState(_psMotionBlur); + context->DrawFullscreenTriangle(); + + // Cleanup + RenderTargetPool::Release(vMaxNeighborBuffer); + context->ResetSR(); + context->ResetRenderTarget(); + Swap(output, input); } diff --git a/Source/Engine/Renderer/MotionBlurPass.h b/Source/Engine/Renderer/MotionBlurPass.h index d27003f13..3ac9404d4 100644 --- a/Source/Engine/Renderer/MotionBlurPass.h +++ b/Source/Engine/Renderer/MotionBlurPass.h @@ -12,9 +12,13 @@ class MotionBlurPass : public RendererPass private: PixelFormat _motionVectorsFormat; - PixelFormat _velocityFormat; AssetReference _shader; GPUPipelineState* _psCameraMotionVectors = nullptr; + GPUPipelineState* _psMotionVectorsDebug = nullptr; + GPUPipelineState* _psTileMax = nullptr; + GPUPipelineState* _psTileMaxVariable = nullptr; + GPUPipelineState* _psNeighborMax = nullptr; + GPUPipelineState* _psMotionBlur = nullptr; public: @@ -52,6 +56,11 @@ private: void OnShaderReloading(Asset* obj) { _psCameraMotionVectors->ReleaseGPU(); + _psMotionVectorsDebug->ReleaseGPU(); + _psTileMax->ReleaseGPU(); + _psTileMaxVariable->ReleaseGPU(); + _psNeighborMax->ReleaseGPU(); + _psMotionBlur->ReleaseGPU(); invalidateResources(); } #endif diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index c19be8dc3..7e3a74aad 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -485,8 +485,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext) context->ResetRenderTarget(); context->SetRenderTarget(task->GetOutputView()); context->SetViewportAndScissors((float)renderContext.Buffers->GetWidth(), (float)renderContext.Buffers->GetHeight()); - context->Clear(frameBuffer->View(), Color::Black); - //MotionBlurPass::Instance()->RenderDebug(renderContext, frameBuffer->View()); + MotionBlurPass::Instance()->RenderDebug(renderContext, frameBuffer->View()); return; } diff --git a/Source/Shaders/MotionBlur.shader b/Source/Shaders/MotionBlur.shader index 53054c799..a1229b748 100644 --- a/Source/Shaders/MotionBlur.shader +++ b/Source/Shaders/MotionBlur.shader @@ -5,15 +5,22 @@ #include "./Flax/Common.hlsl" #include "./Flax/GBuffer.hlsl" +// Motion blur implementation based on: +// Jimenez, 2014, http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare +// Chapman, 2013, http://john-chapman-graphics.blogspot.com/2013/01/per-object-motion-blur.html +// McGuire et. al., 2012, "A reconstruction filter for plausible motion blur" + META_CB_BEGIN(0, Data) - GBufferData GBuffer; - -// Camera Motion Vectors float4x4 CurrentVP; float4x4 PreviousVP; float4 TemporalAAJitter; - +float VelocityScale; +float Dummy0; +int MaxBlurSamples; +uint VariableTileLoopCount; +float2 Input0SizeInv; +float2 Input2SizeInv; META_CB_END DECLARE_GBUFFERDATA_ACCESS(GBuffer) @@ -21,6 +28,7 @@ DECLARE_GBUFFERDATA_ACCESS(GBuffer) Texture2D Input0 : register(t0); Texture2D Input1 : register(t1); Texture2D Input2 : register(t2); +Texture2D Input3 : register(t3); // Pixel shader for camera motion vectors META_PS(true, FEATURE_LEVEL_ES2) @@ -48,3 +56,202 @@ float4 PS_CameraMotionVectors(Quad_VS2PS input) : SV_Target return float4(vPosCur - vPosPrev, 0, 1); } + +// Calculates the color for the a motion vector debugging +float4 MotionVectorToColor(float2 v) +{ + float angle = atan2(v.y, v.x); + float hue = (angle * (1.0f / PI) + 1.0f) * 0.5f; + return saturate(float4(abs(hue * 6.0f - 3.0f) - 1.0f, 2.0f - abs(hue * 6.0f - 4.0f), 2.0f - abs(hue * 6.0f - 2.0f), length(v))); +} + +// Pixel shader for motion vectors debug view +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_MotionVectorsDebug(Quad_VS2PS input) : SV_Target +{ + float4 c = SAMPLE_RT(Input0, input.TexCoord); + float2 v = SAMPLE_RT(Input1, input.TexCoord).xy * 20.0f; + float4 vC = MotionVectorToColor(v); + return float4(lerp(c.rgb, vC.rgb, vC.a * 0.6f), c.a); +} + +// Returns the longer velocity vector +float2 maxV(float2 a, float2 b) +{ + // Use squared length for branch + return dot(a, a) > dot(b, b) ? a : b; +} + +// Pixel shader for motion vectors downscale with maximum velocity extraction (2x2 kernel) +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_TileMax(Quad_VS2PS input) : SV_Target +{ + // Reference: [McGuire 2012] (2.3 Filter Passes) + float4 offset = Input0SizeInv.xyxy * float4(-1, -1, 1, 1); + float2 v1 = SAMPLE_RT(Input0, input.TexCoord + offset.xy).xy; + float2 v2 = SAMPLE_RT(Input0, input.TexCoord + offset.xw).xy; + float2 v3 = SAMPLE_RT(Input0, input.TexCoord + offset.zy).xy; + float2 v4 = SAMPLE_RT(Input0, input.TexCoord + offset.zw).xy; + return float4(maxV(maxV(maxV(v1, v2), v3), v4), 0, 0); +} + +// Pixel shader for motion vectors downscale with maximum velocity extraction (NxN kernel) +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_TileMaxVariable(Quad_VS2PS input) : SV_Target +{ + // Reference: [McGuire 2012] (2.3 Filter Passes) + float2 result = float2(0, 0); + LOOP + for (uint x = 0; x < VariableTileLoopCount; x++) + { + LOOP + for (uint y = 0; y < VariableTileLoopCount; y++) + { + float2 v = SAMPLE_RT(Input0, input.TexCoord + Input0SizeInv * float2(x, y)).xy; + result = maxV(result, v); + } + } + return float4(result, 0, 0); +} + +// Pixel shader for motion vectors tiles maximum neighbors velocities extraction (3x3 kernel) +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_NeighborMax(Quad_VS2PS input) : SV_Target +{ + // Reference: [McGuire 2012] (2.3 Filter Passes) + float2 result = float2(0, 0); + UNROLL + for (int x = -1; x <= 1; x++) + { + UNROLL + for (int y = -1; y <= 1; y++) + { + float2 v = SAMPLE_RT(Input0, input.TexCoord + Input0SizeInv * float2(x, y)).xy; + result = maxV(result, v); + } + } + return float4(result, 0, 0); +} + +float2 ClampVelocity(float2 v) +{ + // Prevent too big blur over the screen + float velocityLimit = 0.2f; + return clamp(v * VelocityScale, -velocityLimit, velocityLimit); +} + +// [Jimenez, 2014] +float2 DepthCmp(float centerDepth, float sampleDepth, float depthScale) +{ + return saturate(0.5f + float2(depthScale, -depthScale) * (sampleDepth - centerDepth)); +} + +// [Jimenez, 2014] +float2 SpreadCmp(float offsetLen, float2 spreadLen, float pixelToSampleUnitsScale) +{ + return saturate(pixelToSampleUnitsScale * spreadLen - offsetLen + 1.0f); + //return saturate(pixelToSampleUnitsScale * spreadLen - max(offsetLen - 1.0f, 0)); +} + +// [Jimenez, 2014] +float SampleWeight(float centerDepth, float sampleDepth, float offsetLen, float centerSpreadLen, float sampleSpreadLen, float pixelToSampleUnitsScale, float depthScale) +{ + float2 depthCmp = DepthCmp(centerDepth, sampleDepth, depthScale); + float2 spreadCmp = SpreadCmp(offsetLen, float2(centerSpreadLen, sampleSpreadLen), pixelToSampleUnitsScale); + return dot(depthCmp, spreadCmp); +} + +// [Jimenez, 2014] +float FullscreenGradientNoise(float2 uv) +{ + uv = floor(uv * GBuffer.ScreenSize.xy); + float f = dot(float2(0.06711056f, 0.00583715f), uv); + return frac(52.9829189f * frac(f)); +} + +float2 NeighborMaxJitter(float2 uv) +{ + // Reduce max velocity tiles visibility by applying some jitter and noise to the uvs + float rx, ry; + float noise = FullscreenGradientNoise(uv + float2(2.0f, 0.0f)) * (PI * 2); + sincos(noise, ry, rx); + return float2(rx, ry) * Input2SizeInv * 0.25f; +} + +// Pixel shader for motion blur rendering +META_PS(true, FEATURE_LEVEL_ES2) +float4 PS_MotionBlur(Quad_VS2PS input) : SV_Target +{ + // Reference: [McGuire 2012, 2013], [Jimenez, 2014] + + // Sample pixel color + float4 pixelColor = SAMPLE_RT(Input0, input.TexCoord); + + // Sample largest velocity in the neighborhood + float2 neighborhoodVelocity = Input2.SampleLevel(SamplerLinearClamp, input.TexCoord + NeighborMaxJitter(input.TexCoord), 0).xy; + neighborhoodVelocity = ClampVelocity(neighborhoodVelocity); + float neighborhoodVelocityLength = length(neighborhoodVelocity); + int neighborhoodVelocityPixelsLength = (int)length(neighborhoodVelocity * GBuffer.ScreenSize.xy); + if (neighborhoodVelocityPixelsLength <= 1) + return pixelColor; + + // Sample pixel velocity + float2 pixelVelocity = Input1.SampleLevel(SamplerLinearClamp, input.TexCoord, 0).xy; + pixelVelocity = ClampVelocity(pixelVelocity); + float pixelVelocityLength = length(pixelVelocity); + + // Sample pixel depth + GBufferData gBufferData = GetGBufferData(); + float pixelDepth = LinearizeZ(gBufferData, SAMPLE_RT(Input3, input.TexCoord).x); + + // Calculate noise to make it look better with less samples per pixel + float noise = FullscreenGradientNoise(input.TexCoord); + + // Accumulate color using evenly placed filter taps along maximum neighborhood velocity direction + float2 direction = neighborhoodVelocity; + //float2 direction = pixelVelocity; + uint sampleCount = MaxBlurSamples; + float pixelToSampleUnitsScale = sampleCount * rsqrt(dot(direction, direction)); + float4 sum = 0; + LOOP + for (uint i = 0; i < sampleCount; i++) + { + float2 samplePos = float2(noise - 0.5f, 0.5f - noise) + ((float)i + 0.5f); + float2 samplePosNormalized = samplePos / sampleCount; + + float2 sampleUV1 = input.TexCoord + samplePosNormalized.x * direction; + float2 sampleUV2 = input.TexCoord - samplePosNormalized.y * direction; + + // TODO: use cheaper version if neighborhood min and max are almost equal (then calc min value too) +#if 0 + float weight1 = 1; + float weight2 = 1; +#else + float depth1 = LinearizeZ(gBufferData, SAMPLE_RT(Input3, sampleUV1).x); + float2 velocity1 = Input1.SampleLevel(SamplerPointClamp, sampleUV1, 0).xy; + velocity1 = ClampVelocity(velocity1); + float velocityLength1 = length(velocity1); + + float depth2 = LinearizeZ(gBufferData, SAMPLE_RT(Input3, sampleUV2).x); + float2 velocity2 = Input1.SampleLevel(SamplerPointClamp, sampleUV2, 0).xy; + velocity2 = ClampVelocity(velocity2); + float velocityLength2 = length(velocity2); + + float weight1 = SampleWeight(pixelDepth, depth1, samplePos.x, pixelVelocityLength, velocityLength1, pixelToSampleUnitsScale, 1); + float weight2 = SampleWeight(pixelDepth, depth2, samplePos.x, pixelVelocityLength, velocityLength2, pixelToSampleUnitsScale, 1); + + bool2 mirror = bool2(depth1 > depth2, velocityLength2 > velocityLength1); + weight1 = all(mirror) ? weight2 : weight1; + weight2 = any(mirror) ? weight2 : weight1; +#endif + + sum += weight1 * float4(SAMPLE_RT(Input0, sampleUV1).rgb, 1); + sum += weight2 * float4(SAMPLE_RT(Input0, sampleUV2).rgb, 1); + } + + // Normalize result + sum *= 0.5f / sampleCount; + + // Blend result with background + return float4(sum.rgb + (1 - sum.w) * pixelColor.rgb, pixelColor.a); +}