Reimplement Motion Blur

2021-01-03 20:13:09 +01:00
parent e5286510d1
commit 23c51694c7
5 changed files with 382 additions and 22 deletions
--- a/Content/Shaders/MotionBlur.flax
+++ b/Content/Shaders/MotionBlur.flax
--- a/Source/Engine/Renderer/MotionBlurPass.cpp
+++ b/Source/Engine/Renderer/MotionBlurPass.cpp
@@ -10,17 +10,25 @@
 #include "Engine/Graphics/GPULimits.h"
 #include "Engine/Graphics/RenderTargetPool.h"
 #include "Engine/Graphics/RenderBuffers.h"
+#include "Engine/Engine/Time.h"

 PACK_STRUCT(struct Data {
    GBufferData GBuffer;
    Matrix CurrentVP;
    Matrix PreviousVP;
    Vector4 TemporalAAJitter;
+
+    float VelocityScale;
+    float Dummy0;
+    int32 MaxBlurSamples;
+    uint32 VariableTileLoopCount;
+
+    Vector2 Input0SizeInv;
+    Vector2 Input2SizeInv;
    });

 MotionBlurPass::MotionBlurPass()
    : _motionVectorsFormat(PixelFormat::Unknown)
-    , _velocityFormat(PixelFormat::Unknown)
 {
 }

@@ -31,8 +39,13 @@ String MotionBlurPass::ToString() const

 bool MotionBlurPass::Init()
 {
-    // Create pipeline state
+    // Create pipeline states
    _psCameraMotionVectors = GPUDevice::Instance->CreatePipelineState();
+    _psMotionVectorsDebug = GPUDevice::Instance->CreatePipelineState();
+    _psTileMax = GPUDevice::Instance->CreatePipelineState();
+    _psTileMaxVariable = GPUDevice::Instance->CreatePipelineState();
+    _psNeighborMax = GPUDevice::Instance->CreatePipelineState();
+    _psMotionBlur = GPUDevice::Instance->CreatePipelineState();

    // Load shader
    _shader = Content::LoadAsyncInternal<Shader>(TEXT("Shaders/MotionBlur"));
@@ -48,16 +61,12 @@ bool MotionBlurPass::Init()
    {
        if (FORMAT_FEATURES_ARE_NOT_SUPPORTED(GPUDevice::Instance->GetFormatFeatures(PixelFormat::R32G32_Float).Support, (FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D)))
            format = PixelFormat::R32G32_Float;
+        else if (FORMAT_FEATURES_ARE_NOT_SUPPORTED(GPUDevice::Instance->GetFormatFeatures(PixelFormat::R16G16B16A16_Float).Support, (FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D)))
+            format = PixelFormat::R16G16B16A16_Float;
        else
            format = PixelFormat::R32G32B32A32_Float;
    }
    _motionVectorsFormat = format;
-    format = PixelFormat::R10G10B10A2_UNorm;
-    if (FORMAT_FEATURES_ARE_NOT_SUPPORTED(GPUDevice::Instance->FeaturesPerFormat[(int32)format].Support, (FormatSupport::RenderTarget | FormatSupport::ShaderSample | FormatSupport::Texture2D)))
-    {
-        format = PixelFormat::R32G32B32A32_Float;
-    }
-    _velocityFormat = format;

    return false;
 }
@@ -86,6 +95,36 @@ bool MotionBlurPass::setupResources()
        if (_psCameraMotionVectors->Init(psDesc))
            return true;
    }
+    if (!_psMotionVectorsDebug->IsValid())
+    {
+        psDesc.PS = shader->GetPS("PS_MotionVectorsDebug");
+        if (_psMotionVectorsDebug->Init(psDesc))
+            return true;
+    }
+    if (!_psTileMax->IsValid())
+    {
+        psDesc.PS = shader->GetPS("PS_TileMax");
+        if (_psTileMax->Init(psDesc))
+            return true;
+    }
+    if (!_psTileMaxVariable->IsValid())
+    {
+        psDesc.PS = shader->GetPS("PS_TileMaxVariable");
+        if (_psTileMaxVariable->Init(psDesc))
+            return true;
+    }
+    if (!_psNeighborMax->IsValid())
+    {
+        psDesc.PS = shader->GetPS("PS_NeighborMax");
+        if (_psNeighborMax->Init(psDesc))
+            return true;
+    }
+    if (!_psMotionBlur->IsValid())
+    {
+        psDesc.PS = shader->GetPS("PS_MotionBlur");
+        if (_psMotionBlur->Init(psDesc))
+            return true;
+    }

    return false;
 }
@@ -97,6 +136,11 @@ void MotionBlurPass::Dispose()

    // Delete pipeline state
    SAFE_DELETE_GPU_RESOURCE(_psCameraMotionVectors);
+    SAFE_DELETE_GPU_RESOURCE(_psMotionVectorsDebug);
+    SAFE_DELETE_GPU_RESOURCE(_psTileMax);
+    SAFE_DELETE_GPU_RESOURCE(_psTileMaxVariable);
+    SAFE_DELETE_GPU_RESOURCE(_psNeighborMax);
+    SAFE_DELETE_GPU_RESOURCE(_psMotionBlur);

    // Release asset
    _shader.Unlink();
@@ -198,13 +242,18 @@ void MotionBlurPass::RenderDebug(RenderContext& renderContext, GPUTextureView* f
 {
    auto context = GPUDevice::Instance->GetMainContext();
    const auto motionVectors = renderContext.Buffers->MotionVectors;
-    //if (!motionVectors->IsAllocated() || setupResources())
+    if (!motionVectors->IsAllocated() || setupResources())
    {
        context->Draw(frame);
        return;
    }

-    // ..
+    PROFILE_GPU_CPU("Motion Vectors Debug");
+    context->BindSR(0, frame);
+    context->BindSR(1, renderContext.Buffers->MotionVectors->View());
+    context->SetState(_psMotionVectorsDebug);
+    context->DrawFullscreenTriangle();
+    context->ResetSR();
 }

 void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output)
@@ -218,8 +267,6 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP
    const int32 screenHeight = renderContext.Buffers->GetHeight();
    const int32 motionVectorsWidth = screenWidth / static_cast<int32>(settings.MotionVectorsResolution);
    const int32 motionVectorsHeight = screenHeight / static_cast<int32>(settings.MotionVectorsResolution);
-
-    // Ensure to have valid data
    if ((renderContext.View.Flags & ViewFlags::MotionBlur) == 0 ||
        !_hasValidResources ||
        isCameraCut ||
@@ -232,5 +279,103 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP
        return;
    }

-    // ..
+    // Need to have valid motion vectors created and rendered before
+    ASSERT(motionVectors->IsAllocated());
+
+    PROFILE_GPU_CPU("Motion Blur");
+
+    // Setup shader inputs
+    const int32 maxBlurSize = (int32)((float)motionVectorsHeight * 0.05f);
+    const int32 tileSize = Math::AlignUp(maxBlurSize, 8);
+    const float timeScale = renderContext.Task->View.IsOfflinePass ? 1.0f : 1.0f / Time::Draw.UnscaledDeltaTime.GetTotalSeconds() / 60.0f; // 60fps as a reference
+    Data data;
+    GBufferPass::SetInputs(renderContext.View, data.GBuffer);
+    data.TemporalAAJitter = renderContext.View.TemporalAAJitter;
+    data.VelocityScale = settings.Scale * 0.5f * timeScale; // 2x samples in loop
+    data.MaxBlurSamples = Math::Clamp(settings.SampleCount / 2, 1, 64); // 2x samples in loop
+    data.VariableTileLoopCount = tileSize / 8;
+    data.Input0SizeInv = Vector2(1.0f / (float)motionVectorsWidth, 1.0f / (float)motionVectorsWidth);
+    const auto cb = _shader->GetShader()->GetCB(0);
+    context->UpdateCB(cb, &data);
+    context->BindCB(0, cb);
+
+    // Downscale motion vectors texture down to 1/2 (with max velocity calculation 2x2 kernel)
+    auto rtDesc = GPUTextureDescription::New2D(motionVectorsWidth / 2, motionVectorsHeight / 2, _motionVectorsFormat);
+    const auto vMaxBuffer2 = RenderTargetPool::Get(rtDesc);
+    context->SetRenderTarget(vMaxBuffer2->View());
+    context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height);
+    context->BindSR(0, motionVectors->View());
+    context->SetState(_psTileMax);
+    context->DrawFullscreenTriangle();
+
+    // Downscale motion vectors texture down to 1/4 (with max velocity calculation 2x2 kernel)
+    rtDesc.Width /= 2;
+    rtDesc.Height /= 2;
+    const auto vMaxBuffer4 = RenderTargetPool::Get(rtDesc);
+    context->ResetRenderTarget();
+    context->SetRenderTarget(vMaxBuffer4->View());
+    context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height);
+    context->BindSR(0, vMaxBuffer2->View());
+    data.Input0SizeInv = Vector2(1.0f / (float)vMaxBuffer2->Width(), 1.0f / (float)vMaxBuffer2->Height());
+    context->UpdateCB(cb, &data);
+    context->SetState(_psTileMax);
+    context->DrawFullscreenTriangle();
+    RenderTargetPool::Release(vMaxBuffer2);
+
+    // Downscale motion vectors texture down to 1/8 (with max velocity calculation 2x2 kernel)
+    rtDesc.Width /= 2;
+    rtDesc.Height /= 2;
+    const auto vMaxBuffer8 = RenderTargetPool::Get(rtDesc);
+    context->ResetRenderTarget();
+    context->SetRenderTarget(vMaxBuffer8->View());
+    context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height);
+    context->BindSR(0, vMaxBuffer4->View());
+    data.Input0SizeInv = Vector2(1.0f / (float)vMaxBuffer4->Width(), 1.0f / (float)vMaxBuffer4->Height());
+    context->UpdateCB(cb, &data);
+    context->SetState(_psTileMax);
+    context->DrawFullscreenTriangle();
+    RenderTargetPool::Release(vMaxBuffer4);
+
+    // Downscale motion vectors texture down to tileSize/tileSize (with max velocity calculation NxN kernel)
+    rtDesc.Width = motionVectorsWidth / tileSize;
+    rtDesc.Height = motionVectorsHeight / tileSize;
+    auto vMaxBuffer = RenderTargetPool::Get(rtDesc);
+    context->ResetRenderTarget();
+    context->SetRenderTarget(vMaxBuffer->View());
+    context->SetViewportAndScissors((float)rtDesc.Width, (float)rtDesc.Height);
+    context->BindSR(0, vMaxBuffer8->View());
+    data.Input0SizeInv = Vector2(1.0f / (float)vMaxBuffer8->Width(), 1.0f / (float)vMaxBuffer8->Height());
+    context->UpdateCB(cb, &data);
+    context->SetState(_psTileMaxVariable);
+    context->DrawFullscreenTriangle();
+    RenderTargetPool::Release(vMaxBuffer8);
+
+    // Extract maximum velocities for the tiles based on their neighbors
+    context->ResetRenderTarget();
+    auto vMaxNeighborBuffer = RenderTargetPool::Get(rtDesc);
+    context->SetRenderTarget(vMaxNeighborBuffer->View());
+    context->BindSR(0, vMaxBuffer->View());
+    context->SetState(_psNeighborMax);
+    context->DrawFullscreenTriangle();
+    RenderTargetPool::Release(vMaxBuffer);
+
+    // Render motion blur
+    context->ResetRenderTarget();
+    context->SetRenderTarget(*output);
+    context->SetViewportAndScissors((float)screenWidth, (float)screenHeight);
+    context->BindSR(0, input->View());
+    context->BindSR(1, motionVectors->View());
+    context->BindSR(2, vMaxNeighborBuffer->View());
+    context->BindSR(3, renderContext.Buffers->DepthBuffer->View());
+    data.Input0SizeInv = Vector2(1.0f / (float)input->Width(), 1.0f / (float)input->Height());
+    data.Input2SizeInv = Vector2(1.0f / (float)renderContext.Buffers->DepthBuffer->Width(), 1.0f / (float)renderContext.Buffers->DepthBuffer->Height());
+    context->UpdateCB(cb, &data);
+    context->SetState(_psMotionBlur);
+    context->DrawFullscreenTriangle();
+
+    // Cleanup
+    RenderTargetPool::Release(vMaxNeighborBuffer);
+    context->ResetSR();
+    context->ResetRenderTarget();
+    Swap(output, input);
 }
--- a/Source/Engine/Renderer/MotionBlurPass.h
+++ b/Source/Engine/Renderer/MotionBlurPass.h
@@ -12,9 +12,13 @@ class MotionBlurPass : public RendererPass<MotionBlurPass>
 private:

    PixelFormat _motionVectorsFormat;
-    PixelFormat _velocityFormat;
    AssetReference<Shader> _shader;
    GPUPipelineState* _psCameraMotionVectors = nullptr;
+    GPUPipelineState* _psMotionVectorsDebug = nullptr;
+    GPUPipelineState* _psTileMax = nullptr;
+    GPUPipelineState* _psTileMaxVariable = nullptr;
+    GPUPipelineState* _psNeighborMax = nullptr;
+    GPUPipelineState* _psMotionBlur = nullptr;

 public:

@@ -52,6 +56,11 @@ private:
    void OnShaderReloading(Asset* obj)
    {
        _psCameraMotionVectors->ReleaseGPU();
+        _psMotionVectorsDebug->ReleaseGPU();
+        _psTileMax->ReleaseGPU();
+        _psTileMaxVariable->ReleaseGPU();
+        _psNeighborMax->ReleaseGPU();
+        _psMotionBlur->ReleaseGPU();
        invalidateResources();
    }
 #endif
--- a/Source/Engine/Renderer/Renderer.cpp
+++ b/Source/Engine/Renderer/Renderer.cpp
@@ -485,8 +485,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext)
        context->ResetRenderTarget();
        context->SetRenderTarget(task->GetOutputView());
        context->SetViewportAndScissors((float)renderContext.Buffers->GetWidth(), (float)renderContext.Buffers->GetHeight());
-        context->Clear(frameBuffer->View(), Color::Black);
-        //MotionBlurPass::Instance()->RenderDebug(renderContext, frameBuffer->View());
+        MotionBlurPass::Instance()->RenderDebug(renderContext, frameBuffer->View());
        return;
    }

--- a/Source/Shaders/MotionBlur.shader
+++ b/Source/Shaders/MotionBlur.shader
@@ -5,15 +5,22 @@
 #include "./Flax/Common.hlsl"
 #include "./Flax/GBuffer.hlsl"

+// Motion blur implementation based on:
+// Jimenez, 2014, http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+// Chapman, 2013, http://john-chapman-graphics.blogspot.com/2013/01/per-object-motion-blur.html
+// McGuire et. al., 2012, "A reconstruction filter for plausible motion blur"
+
 META_CB_BEGIN(0, Data)
-
 GBufferData GBuffer;
-
-// Camera Motion Vectors
 float4x4 CurrentVP;
 float4x4 PreviousVP;
 float4 TemporalAAJitter;
-
+float VelocityScale;
+float Dummy0;
+int MaxBlurSamples;
+uint VariableTileLoopCount;
+float2 Input0SizeInv;
+float2 Input2SizeInv;
 META_CB_END

 DECLARE_GBUFFERDATA_ACCESS(GBuffer)
@@ -21,6 +28,7 @@ DECLARE_GBUFFERDATA_ACCESS(GBuffer)
 Texture2D Input0 : register(t0);
 Texture2D Input1 : register(t1);
 Texture2D Input2 : register(t2);
+Texture2D Input3 : register(t3);

 // Pixel shader for camera motion vectors
 META_PS(true, FEATURE_LEVEL_ES2)
@@ -48,3 +56,202 @@ float4 PS_CameraMotionVectors(Quad_VS2PS input) : SV_Target

 	return float4(vPosCur - vPosPrev, 0, 1);
 }
+
+// Calculates the color for the a motion vector debugging
+float4 MotionVectorToColor(float2 v)
+{
+    float angle = atan2(v.y, v.x);
+    float hue = (angle * (1.0f / PI) + 1.0f) * 0.5f;
+    return saturate(float4(abs(hue * 6.0f - 3.0f) - 1.0f, 2.0f - abs(hue * 6.0f - 4.0f), 2.0f - abs(hue * 6.0f - 2.0f), length(v)));
+}
+
+// Pixel shader for motion vectors debug view
+META_PS(true, FEATURE_LEVEL_ES2)
+float4 PS_MotionVectorsDebug(Quad_VS2PS input) : SV_Target
+{
+	float4 c = SAMPLE_RT(Input0, input.TexCoord);
+    float2 v = SAMPLE_RT(Input1, input.TexCoord).xy * 20.0f;
+    float4 vC = MotionVectorToColor(v);
+    return float4(lerp(c.rgb, vC.rgb, vC.a * 0.6f), c.a);
+}
+
+// Returns the longer velocity vector
+float2 maxV(float2 a, float2 b)
+{
+	// Use squared length for branch
+	return dot(a, a) > dot(b, b) ? a : b;
+}
+
+// Pixel shader for motion vectors downscale with maximum velocity extraction (2x2 kernel)
+META_PS(true, FEATURE_LEVEL_ES2)
+float4 PS_TileMax(Quad_VS2PS input) : SV_Target
+{
+	// Reference: [McGuire 2012] (2.3 Filter Passes)
+	float4 offset = Input0SizeInv.xyxy * float4(-1, -1, 1, 1);
+	float2 v1 = SAMPLE_RT(Input0, input.TexCoord + offset.xy).xy;
+	float2 v2 = SAMPLE_RT(Input0, input.TexCoord + offset.xw).xy;
+	float2 v3 = SAMPLE_RT(Input0, input.TexCoord + offset.zy).xy;
+	float2 v4 = SAMPLE_RT(Input0, input.TexCoord + offset.zw).xy;
+	return float4(maxV(maxV(maxV(v1, v2), v3), v4), 0, 0);
+}
+
+// Pixel shader for motion vectors downscale with maximum velocity extraction (NxN kernel)
+META_PS(true, FEATURE_LEVEL_ES2)
+float4 PS_TileMaxVariable(Quad_VS2PS input) : SV_Target
+{
+	// Reference: [McGuire 2012] (2.3 Filter Passes)
+	float2 result = float2(0, 0);
+	LOOP
+	for (uint x = 0; x < VariableTileLoopCount; x++)
+	{
+		LOOP
+		for (uint y = 0; y < VariableTileLoopCount; y++)
+		{
+			float2 v = SAMPLE_RT(Input0, input.TexCoord + Input0SizeInv * float2(x, y)).xy;
+			result = maxV(result, v);
+		}
+	}
+	return float4(result, 0, 0);
+}
+
+// Pixel shader for motion vectors tiles maximum neighbors velocities extraction (3x3 kernel)
+META_PS(true, FEATURE_LEVEL_ES2)
+float4 PS_NeighborMax(Quad_VS2PS input) : SV_Target
+{
+	// Reference: [McGuire 2012] (2.3 Filter Passes)
+	float2 result = float2(0, 0);
+	UNROLL
+	for (int x = -1; x <= 1; x++)
+	{
+		UNROLL
+		for (int y = -1; y <= 1; y++)
+		{
+			float2 v = SAMPLE_RT(Input0, input.TexCoord + Input0SizeInv * float2(x, y)).xy;
+			result = maxV(result, v);
+		}
+	}
+	return float4(result, 0, 0);
+}
+
+float2 ClampVelocity(float2 v)
+{
+	// Prevent too big blur over the screen
+	float velocityLimit = 0.2f;
+	return clamp(v * VelocityScale, -velocityLimit, velocityLimit);
+}
+
+// [Jimenez, 2014]
+float2 DepthCmp(float centerDepth, float sampleDepth, float depthScale)
+{
+	return saturate(0.5f + float2(depthScale, -depthScale) * (sampleDepth - centerDepth));
+}
+
+// [Jimenez, 2014]
+float2 SpreadCmp(float offsetLen, float2 spreadLen, float pixelToSampleUnitsScale)
+{
+	return saturate(pixelToSampleUnitsScale * spreadLen - offsetLen + 1.0f);
+	//return saturate(pixelToSampleUnitsScale * spreadLen - max(offsetLen - 1.0f, 0));
+}
+
+// [Jimenez, 2014]
+float SampleWeight(float centerDepth, float sampleDepth, float offsetLen, float centerSpreadLen, float sampleSpreadLen, float pixelToSampleUnitsScale, float depthScale)
+{
+	float2 depthCmp = DepthCmp(centerDepth, sampleDepth, depthScale);
+	float2 spreadCmp = SpreadCmp(offsetLen, float2(centerSpreadLen, sampleSpreadLen), pixelToSampleUnitsScale);
+	return dot(depthCmp, spreadCmp);
+}
+
+// [Jimenez, 2014]
+float FullscreenGradientNoise(float2 uv)
+{
+    uv = floor(uv * GBuffer.ScreenSize.xy);
+    float f = dot(float2(0.06711056f, 0.00583715f), uv);
+    return frac(52.9829189f * frac(f));
+}
+
+float2 NeighborMaxJitter(float2 uv)
+{
+	// Reduce max velocity tiles visibility by applying some jitter and noise to the uvs
+	float rx, ry;
+	float noise = FullscreenGradientNoise(uv + float2(2.0f, 0.0f)) * (PI * 2);
+	sincos(noise, ry, rx);
+	return float2(rx, ry) * Input2SizeInv * 0.25f;
+}
+
+// Pixel shader for motion blur rendering
+META_PS(true, FEATURE_LEVEL_ES2)
+float4 PS_MotionBlur(Quad_VS2PS input) : SV_Target
+{
+	// Reference: [McGuire 2012, 2013], [Jimenez, 2014]
+
+	// Sample pixel color
+	float4 pixelColor = SAMPLE_RT(Input0, input.TexCoord);
+
+	// Sample largest velocity in the neighborhood
+	float2 neighborhoodVelocity = Input2.SampleLevel(SamplerLinearClamp, input.TexCoord + NeighborMaxJitter(input.TexCoord), 0).xy;
+	neighborhoodVelocity = ClampVelocity(neighborhoodVelocity);
+	float neighborhoodVelocityLength = length(neighborhoodVelocity);
+	int neighborhoodVelocityPixelsLength = (int)length(neighborhoodVelocity * GBuffer.ScreenSize.xy);
+	if (neighborhoodVelocityPixelsLength <= 1)
+		return pixelColor;
+
+	// Sample pixel velocity
+	float2 pixelVelocity = Input1.SampleLevel(SamplerLinearClamp, input.TexCoord, 0).xy;
+	pixelVelocity = ClampVelocity(pixelVelocity);
+	float pixelVelocityLength = length(pixelVelocity);
+
+	// Sample pixel depth
+	GBufferData gBufferData = GetGBufferData();
+	float pixelDepth = LinearizeZ(gBufferData, SAMPLE_RT(Input3, input.TexCoord).x);
+
+	// Calculate noise to make it look better with less samples per pixel
+	float noise = FullscreenGradientNoise(input.TexCoord);
+
+	// Accumulate color using evenly placed filter taps along maximum neighborhood velocity direction
+	float2 direction = neighborhoodVelocity;
+	//float2 direction = pixelVelocity;
+	uint sampleCount = MaxBlurSamples;
+	float pixelToSampleUnitsScale = sampleCount * rsqrt(dot(direction, direction));
+	float4 sum = 0;
+	LOOP
+	for (uint i = 0; i < sampleCount; i++)
+	{
+		float2 samplePos = float2(noise - 0.5f, 0.5f - noise) + ((float)i + 0.5f);
+		float2 samplePosNormalized = samplePos / sampleCount;
+
+		float2 sampleUV1 = input.TexCoord + samplePosNormalized.x * direction;
+		float2 sampleUV2 = input.TexCoord - samplePosNormalized.y * direction;
+
+		// TODO: use cheaper version if neighborhood min and max are almost equal (then calc min value too)
+#if 0
+		float weight1 = 1;
+		float weight2 = 1;
+#else
+		float depth1 = LinearizeZ(gBufferData, SAMPLE_RT(Input3, sampleUV1).x);
+		float2 velocity1 = Input1.SampleLevel(SamplerPointClamp, sampleUV1, 0).xy;
+		velocity1 = ClampVelocity(velocity1);
+		float velocityLength1 = length(velocity1);
+
+		float depth2 = LinearizeZ(gBufferData, SAMPLE_RT(Input3, sampleUV2).x);
+		float2 velocity2 = Input1.SampleLevel(SamplerPointClamp, sampleUV2, 0).xy;
+		velocity2 = ClampVelocity(velocity2);
+		float velocityLength2 = length(velocity2);
+
+		float weight1 = SampleWeight(pixelDepth, depth1, samplePos.x, pixelVelocityLength, velocityLength1, pixelToSampleUnitsScale, 1);
+		float weight2 = SampleWeight(pixelDepth, depth2, samplePos.x, pixelVelocityLength, velocityLength2, pixelToSampleUnitsScale, 1);
+
+		bool2 mirror = bool2(depth1 > depth2, velocityLength2 > velocityLength1);
+		weight1 = all(mirror) ? weight2 : weight1;
+		weight2 = any(mirror) ? weight2 : weight1;
+#endif
+
+		sum += weight1 * float4(SAMPLE_RT(Input0, sampleUV1).rgb, 1);
+		sum += weight2 * float4(SAMPLE_RT(Input0, sampleUV2).rgb, 1);
+	}
+
+	// Normalize result
+	sum *= 0.5f / sampleCount;
+
+	// Blend result with background
+	return float4(sum.rgb + (1 - sum.w) * pixelColor.rgb, pixelColor.a);
+}