Fix the Depth Of Field to be consistent no matter the resolution

2022-08-23 17:26:59 +02:00
parent d50908e10b
commit 57cb15486e
3 changed files with 84 additions and 234 deletions
--- a/Source/Engine/Renderer/DepthOfFieldPass.cpp
+++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp
@@ -15,12 +15,32 @@
 #include "Engine/Graphics/Shaders/GPUShader.h"

 // This must match hlsl defines
-#define DOF_MAX_SAMPLE_RADIUS 10
 #define DOF_GRID_SIZE 450
-#define DOF_APRON_SIZE DOF_MAX_SAMPLE_RADIUS
-#define DOF_THREAD_GROUP_SIZE (DOF_GRID_SIZE + (DOF_APRON_SIZE * 2))
 #define DOF_DEPTH_BLUR_FORMAT PixelFormat::R16G16_Float

+PACK_STRUCT(struct Data {
+    Float2 ProjectionAB;
+    float BokehDepthCullThreshold;
+    float BokehDepthCutoff;
+
+    Float4 DOFDepths;
+
+    float MaxBokehSize;
+    float BokehBrightnessThreshold;
+    float BokehBlurThreshold;
+    float BokehFalloff;
+
+    Float2 BokehTargetSize;
+    Float2 DOFTargetSize;
+
+    Float2 InputSize;
+    float DepthLimit;
+    float BlurStrength;
+
+    Float3 Dummy;
+    float BokehBrightness;
+    });
+
 DepthOfFieldPass::DepthOfFieldPass()
 {
 }
@@ -36,7 +56,7 @@ bool DepthOfFieldPass::Init()
    // (in future we should support it or faster solution using pixel shaders)
    auto& limits = GPUDevice::Instance->Limits;
    _platformSupportsDoF = limits.HasCompute;
-    _platformSupportsBokeh = _platformSupportsDoF && limits.HasGeometryShaders && limits.HasDrawIndirect && limits.HasAppendConsumeBuffers;
+    _platformSupportsBokeh = false && _platformSupportsDoF && limits.HasGeometryShaders && limits.HasDrawIndirect && limits.HasAppendConsumeBuffers;

    // Create pipeline states
    if (_platformSupportsDoF)
@@ -147,7 +167,7 @@ bool DepthOfFieldPass::setupResources()
            _bokehBuffer = GPUDevice::Instance->CreateBuffer(TEXT("Bokeh Buffer"));
        if (_bokehIndirectArgsBuffer == nullptr)
            _bokehIndirectArgsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("Bokeh Indirect Args Buffer"));
-        GPUDrawIndirectArgs indirectArgsBufferInitData{0, 1, 0, 0};
+        GPUDrawIndirectArgs indirectArgsBufferInitData{ 0, 1, 0, 0 };
        if (_bokehIndirectArgsBuffer->Init(GPUBufferDescription::Argument(&indirectArgsBufferInitData, sizeof(indirectArgsBufferInitData))))
            return true;
    }
@@ -182,22 +202,16 @@ GPUTexture* DepthOfFieldPass::getDofBokehShape(DepthOfFieldSettings& dofSettings

 GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* input)
 {
-    // Ensure to have valid data
    if (!_platformSupportsDoF || checkIfSkipPass())
        return nullptr;
-
-    // Cache data
    auto device = GPUDevice::Instance;
    auto context = device->GetMainContext();
    const auto depthBuffer = renderContext.Buffers->DepthBuffer;
    const auto shader = _shader->GetShader();
    DepthOfFieldSettings& dofSettings = renderContext.List->Settings.DepthOfField;
    const bool useDoF = _platformSupportsDoF && (renderContext.View.Flags & ViewFlags::DepthOfField) != 0 && dofSettings.Enabled;
-
-    // Skip if disabled
    if (!useDoF)
        return nullptr;
-
    PROFILE_GPU_CPU("Depth Of Field");

    context->ResetSR();
@@ -207,10 +221,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    const int32 cocResolutionDivider = 1;
    const int32 dofResolutionDivider = 1;
    const int32 bokehResolutionDivider = 1;
-
    // TODO: in low-res DoF maybe use shared HalfResDepth?
-
-    // Cache viewport sizes
    const int32 w1 = input->Width();
    const int32 h1 = input->Height();
    const int32 cocWidth = w1 / cocResolutionDivider;
@@ -219,13 +230,20 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    const int32 dofHeight = h1 / dofResolutionDivider;
    const int32 bokehTargetWidth = w1 / bokehResolutionDivider;
    const int32 bokehTargetHeight = h1 / bokehResolutionDivider;
+    float textureSizeScale = (float)Math::Max(w1, h1) * (1.0f / 1920.0f); // Keep DOF blur the same no matter the image resolution is (reference FullHD res)
+    int32 blurScalePermutationOffset = 0;
+    const float sampleRadius[] = { 1.0f, 3.6f }; // This has to match CS_DepthOfField permutations
+    if (textureSizeScale > sampleRadius[0])
+    {
+        blurScalePermutationOffset += 2;
+        textureSizeScale /= sampleRadius[1];
+    }

    // TODO: maybe we could render particles (whole transparency in general) to the depth buffer to apply DoF on them as well?

    // TODO: reduce amount of used temporary render targets, we could plan rendering steps in more static way and hardcode some logic to make it run faster with less memory usage (less bandwitch)

    // Setup constant buffer
-    Data cbData;
    {
        float nearPlane = renderContext.View.Near;
        float farPlane = renderContext.View.Far;
@@ -237,6 +255,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        float farFocusEnd = Math::Min(farPlane - 5.0f, farFocusStart + dofSettings.FarTransitionRange);
        float depthLimitMax = farPlane - 10.0f;

+        Data cbData;
        cbData.DOFDepths.X = nearFocusStart;
        cbData.DOFDepths.Y = nearFocusEnd;
        cbData.DOFDepths.Z = farFocusStart;
@@ -247,7 +266,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        cbData.BokehFalloff = dofSettings.BokehFalloff;
        cbData.BokehDepthCutoff = dofSettings.BokehDepthCutoff;
        cbData.DepthLimit = dofSettings.DepthLimit > ZeroTolerance ? Math::Min(dofSettings.DepthLimit, depthLimitMax) : depthLimitMax;
-        cbData.BlurStrength = Math::Saturate(dofSettings.BlurStrength);
+        cbData.BlurStrength = Math::Saturate(dofSettings.BlurStrength) * Math::Min(textureSizeScale, 1.0f);
        cbData.BokehBrightness = dofSettings.BokehBrightness;

        cbData.DOFTargetSize.X = static_cast<float>(dofWidth); // TODO: check if this param is binded right. maybe use w1 or bokehTargetWidth?
@@ -260,12 +279,11 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        // TODO: use projection matrix instead of this far and near stuff?
        cbData.ProjectionAB.X = farPlane / (farPlane - nearPlane);
        cbData.ProjectionAB.Y = (-farPlane * nearPlane) / (farPlane - nearPlane);
-    }

-    // Bind constant buffer
-    auto cb = shader->GetCB(0);
-    context->UpdateCB(cb, &cbData);
-    context->BindCB(0, cb);
+        auto cb = shader->GetCB(0);
+        context->UpdateCB(cb, &cbData);
+        context->BindCB(0, cb);
+    }

    // Depth/blur generation pass
    auto tempDesc = GPUTextureDescription::New2D(cocWidth, cocHeight, DOF_DEPTH_BLUR_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
@@ -277,55 +295,13 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    context->DrawFullscreenTriangle();
    context->ResetRenderTarget();

-    // CoC Spread pass
-    // todo: add config for CoC spread in postFx settings?
-    // TODO: test it out
-    bool isCoCSpreadEnabled = false;
-    if (isCoCSpreadEnabled)
-    {
-        context->ResetRenderTarget();
-        context->ResetSR();
-        context->ResetUA();
-        context->FlushState();
-
-        tempDesc = GPUTextureDescription::New2D(cocWidth, cocHeight, DOF_DEPTH_BLUR_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
-        GPUTexture* tempTarget = RenderTargetPool::Get(tempDesc);
-
-        // Horizontal pass
-        context->BindSR(0, depthBlurTarget);
-        //
-        context->BindUA(0, tempTarget->View());
-        //
-        uint32 groupCountX = (cocWidth / DOF_GRID_SIZE) + ((cocWidth % DOF_GRID_SIZE) > 0 ? 1 : 0);
-        uint32 groupCountY = cocHeight;
-        //
-        context->Dispatch(shader->GetCS("CS_CoCSpread", 0), groupCountX, groupCountY, 1);
-
-        // Vertical pass
-        context->BindSR(0, tempTarget);
-        //
-        context->BindUA(0, depthBlurTarget->View());
-        //
-        groupCountX = cocWidth;
-        groupCountY = (cocHeight / DOF_GRID_SIZE) + (cocHeight % DOF_GRID_SIZE) > 0 ? 1 : 0;
-        //
-        context->Dispatch(shader->GetCS("CS_CoCSpread", 1), groupCountX, groupCountY, 1);
-
-        // Cleanup
-        context->ResetRenderTarget();
-        context->UnBindSR(0);
-        context->UnBindUA(0);
-        context->FlushState();
-        RenderTargetPool::Release(tempTarget);
-    }
-
    // Peek temporary render target for dof pass
    auto dofFormat = renderContext.Buffers->GetOutputFormat();
    tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
    GPUTexture* dofInput = RenderTargetPool::Get(tempDesc);

    // Do the bokeh point generation, or just do a copy if disabled
-    bool isBokehGenerationEnabled = dofSettings.BokehEnabled && _platformSupportsBokeh && dofSettings.BokehBrightness > 0.0f;
+    bool isBokehGenerationEnabled = dofSettings.BokehEnabled && _platformSupportsBokeh && dofSettings.BokehBrightness > 0.0f && dofSettings.BokehSize > 0.0f;
    if (isBokehGenerationEnabled)
    {
        // Update bokeh buffer to have enough size for points
@@ -388,7 +364,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        uint32 groupCountX = (dofWidth / DOF_GRID_SIZE) + ((dofWidth % DOF_GRID_SIZE) > 0 ? 1 : 0);
        uint32 groupCountY = dofHeight;
        //
-        context->Dispatch(shader->GetCS("CS_DepthOfField", 0), groupCountX, groupCountY, 1);
+        context->Dispatch(shader->GetCS("CS_DepthOfField", blurScalePermutationOffset + 0), groupCountX, groupCountY, 1);

        // Cleanup
        context->ResetRenderTarget();
@@ -405,7 +381,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        groupCountY = (dofHeight / DOF_GRID_SIZE) + ((dofHeight % DOF_GRID_SIZE) > 0 ? 1 : 0);
        //
        // TODO: cache Compute Shaders
-        context->Dispatch(shader->GetCS("CS_DepthOfField", 1), groupCountX, groupCountY, 1);
+        context->Dispatch(shader->GetCS("CS_DepthOfField", blurScalePermutationOffset + 1), groupCountX, groupCountY, 1);
        context->ResetRenderTarget();

        // Cleanup
--- a/Source/Engine/Renderer/DepthOfFieldPass.h
+++ b/Source/Engine/Renderer/DepthOfFieldPass.h
@@ -11,30 +11,6 @@
 class DepthOfFieldPass : public RendererPass<DepthOfFieldPass>
 {
 private:
-
-    PACK_STRUCT(struct Data {
-        Float2 ProjectionAB;
-        float BokehDepthCullThreshold;
-        float BokehDepthCutoff;
-
-        Float4 DOFDepths;
-
-        float MaxBokehSize;
-        float BokehBrightnessThreshold;
-        float BokehBlurThreshold;
-        float BokehFalloff;
-
-        Float2 BokehTargetSize;
-        Float2 DOFTargetSize;
-
-        Float2 InputSize;
-        float DepthLimit;
-        float BlurStrength;
-
-        Float3 Dummy;
-        float BokehBrightness;
-    });
-
    // Structure used for outputting bokeh points to an AppendStructuredBuffer
    struct BokehPoint
    {
@@ -59,11 +35,9 @@ private:
    AssetReference<Texture> _defaultBokehCross;

 public:
-
    DepthOfFieldPass();

 public:
-
    /// <summary>
    /// Perform Depth Of Field rendering for the input task
    /// </summary>
@@ -73,7 +47,6 @@ public:
    GPUTexture* Render(RenderContext& renderContext, GPUTexture* input);

 private:
-
    GPUTexture* getDofBokehShape(DepthOfFieldSettings& dofSettings);
 #if COMPILE_WITH_DEV_ENV
    void OnShaderReloading(Asset* obj)
@@ -87,14 +60,12 @@ private:
 #endif

 public:
-
    // [RendererPass]
    String ToString() const override;
    bool Init() override;
    void Dispose() override;

 protected:
-
    // [RendererPass]
    bool setupResources() override;
 };
--- a/Source/Shaders/DepthOfField.shader
+++ b/Source/Shaders/DepthOfField.shader
@@ -8,7 +8,9 @@
 #include "./Flax/Common.hlsl"

 // This must match C++ defines
+#ifndef DOF_MAX_SAMPLE_RADIUS
 #define DOF_MAX_SAMPLE_RADIUS 10
+#endif
 #define DOF_GRID_SIZE 450
 #define DOF_APRON_SIZE DOF_MAX_SAMPLE_RADIUS
 #define DOF_THREAD_GROUP_SIZE (DOF_GRID_SIZE + (DOF_APRON_SIZE * 2))
@@ -18,7 +20,6 @@
 #define USE_CS_LINEAR_SAMPLING 0

 META_CB_BEGIN(0, DofData)
-
 float2 ProjectionAB;
 float BokehDepthCullThreshold;
 float BokehDepthCutoff;
@@ -98,23 +99,16 @@ float LinearDepth(in float zBufferDepth)
 	return ProjectionAB.y / (zBufferDepth - ProjectionAB.x);
 }

-// Computes the depth of field blur factor
-float BlurFactor(float depth)
-{
-    float f0 = 1.0f - saturate((depth - DOFDepths.x) / max(DOFDepths.y - DOFDepths.x, 0.01f));
-    float f1 = saturate((depth - DOFDepths.z) / max(DOFDepths.w - DOFDepths.z, 0.01f));
-    float blur = saturate(f0 + f1);
-	float fade = 1 - saturate((depth - DepthLimit) * 100);
-    return blur * fade * BlurStrength;
-}
-
 // Depth of Field depth blur generation (outputs linear depth + blur factor to R16G16 target)
 META_PS(true, FEATURE_LEVEL_ES2)
 float4 PS_DofDepthBlurGeneration(Quad_VS2PS input) : SV_Target
 {
-	float depth = LinearDepth(Input0.SampleLevel(SamplerPointClamp, input.TexCoord, 0).r);
-	float blur = BlurFactor(depth);
-	return float4(depth, blur, 1.0f, 1.0f);
+    float depth = LinearDepth(Input0.SampleLevel(SamplerPointClamp, input.TexCoord, 0).r);
+    float f0 = 1.0f - saturate((depth - DOFDepths.x) / max(DOFDepths.y - DOFDepths.x, 0.01f));
+    float f1 = saturate((depth - DOFDepths.z) / max(DOFDepths.w - DOFDepths.z, 0.01f));
+    float fade = 1 - saturate((depth - DepthLimit) * 100);
+    float blur = saturate(f0 + f1) * fade * BlurStrength;
+    return float4(depth, blur, 1.0f, 1.0f);
 }

 #if defined(_CS_DepthOfField)
@@ -145,24 +139,25 @@ groupshared DOFSample Samples[DOF_THREAD_GROUP_SIZE];
 META_CS(true, FEATURE_LEVEL_SM5)
 META_PERMUTATION_1(HORIZONTAL=1)
 META_PERMUTATION_1(HORIZONTAL=0)
+META_PERMUTATION_2(HORIZONTAL=1,DOF_MAX_SAMPLE_RADIUS=36)
+META_PERMUTATION_2(HORIZONTAL=0,DOF_MAX_SAMPLE_RADIUS=36)
 [numthreads(_CS_DepthOfField_X, _CS_DepthOfField_Y, 1)]
 void CS_DepthOfField(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
 {
-	// These positions are relative to the "grid", AKA the horizontal group of pixels that this thread group is writing to
+	// These positions are relative to the "grid", AKA the horizontal/vertical group of pixels that this thread group is writing to
 	const int gridStart = groupID.DOF_COMP * DOF_GRID_SIZE;
 	const int grid = groupThreadID.DOF_COMP - DOF_APRON_SIZE;

 	// These positions are relative to the pixel coordinates
 #if HORIZONTAL
-	const uint2 samplePos = uint2(max(gridStart + grid, 0), groupID.y);
+	const uint2 samplePos = uint2(gridStart + grid, groupID.y);
 #else
-	const uint2 samplePos = uint2(groupID.x, max(gridStart + grid, 0));
+	const uint2 samplePos = uint2(groupID.x, gridStart + grid);
 #endif

+	// Sample the textures
 	uint2 textureSize;
 	Input0.GetDimensions(textureSize.x, textureSize.y);
-
-	// Sample the textures
 #if USE_CS_HALF_PIXEL_OFFSET
 	float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize));
 #else
@@ -180,141 +175,49 @@ void CS_DepthOfField(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupT
 	float cocSize = blur * DOF_MAX_SAMPLE_RADIUS;

 	// Store in shared memory
-	Samples[groupThreadID.DOF_COMP].Color = color.rgb;
-	Samples[groupThreadID.DOF_COMP].Depth = depth;
-	Samples[groupThreadID.DOF_COMP].Blur = blur;
+    {
+		DOFSample tap;
+	    tap.Color = color.rgb;
+	    tap.Depth = depth;
+	    tap.Blur = blur;
+	    Samples[groupThreadID.DOF_COMP] = tap;
+    }
 	GroupMemoryBarrierWithGroupSync();

 	// Don't continue for threads in the apron, and threads outside the render target size
-	if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP < textureSize.DOF_COMP)
+	if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP >= 0 && samplePos.DOF_COMP < textureSize.DOF_COMP)
 	{
 		BRANCH
 		if (cocSize > 0.0f)
 		{
-			float3 outputColor = 0.0f;
-			float totalContribution = 0.0f;
+            float3 outputColor = 0.0f;
+            float totalContribution = 0.0f;

-			// Gather sample taps inside the radius
-			for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++)
-			{
-				// Grab the sample from shared memory
-				uint groupTap = groupThreadID.DOF_COMP + i;
-				DOFSample tap = Samples[groupTap];
+            // Gather sample taps inside the radius
+            float depthTest = depth + 100;
+            for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++)
+            {
+                // Grab the sample from shared memory
+                DOFSample tap = Samples[groupThreadID.DOF_COMP + i];

-				// Reject the sample if it's outside the CoC radius
-				float cocWeight = saturate(cocSize + 1.0f - abs(float(i)));
+                // Reject the sample if it's outside the CoC radius
+                float cocWeight = saturate(cocSize + 1.0f - abs(float(i)));

-				// Reject foreground samples, unless they're blurred as well
-				float depthWeight = tap.Depth >= depth;
-				float blurWeight = tap.Blur;
-				float tapWeight = cocWeight * saturate(depthWeight + blurWeight);
+                // Reject foreground samples, unless they're blurred as well
+                float depthWeight = tap.Depth > depthTest;
+                float blurWeight = tap.Blur;
+                float tapWeight = cocWeight * saturate(depthWeight + blurWeight);

-				outputColor += tap.Color * tapWeight;
-				totalContribution += tapWeight;
-			}
+                outputColor += tap.Color * tapWeight;
+                totalContribution += tapWeight;
+            }

-			// Write out the result
-			outputColor /= totalContribution;
-			OutputTexture[samplePos] = float4(max(outputColor, 0), color.a);
-		}
-		else
-		{
-			OutputTexture[samplePos] = color;
-		}
-	}
-}
-
-#elif defined(_CS_CoCSpread)
-
-struct CoCSample
-{
-	float Depth;
-	float Blur;
-};
-
-RWTexture2D<float2> OutputTexture : register(u0);
-
-groupshared CoCSample Samples[DOF_THREAD_GROUP_SIZE];
-
-#if HORIZONTAL
-#define _CS_CoCSpread_X DOF_THREAD_GROUP_SIZE
-#define _CS_CoCSpread_Y 1
-#define DOF_COMP x
-#else
-#define _CS_CoCSpread_X 1
-#define _CS_CoCSpread_Y DOF_THREAD_GROUP_SIZE
-#define DOF_COMP y
-#endif
-
-// Performs the CoC spread
-META_CS(true, FEATURE_LEVEL_SM5)
-META_PERMUTATION_1(HORIZONTAL=1)
-META_PERMUTATION_1(HORIZONTAL=0)
-[numthreads(_CS_CoCSpread_X, _CS_CoCSpread_Y, 1)]
-void CS_CoCSpread(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
-{
-	// These positions are relative to the "grid", AKA the horizontal group of pixels that this thread group is writing to
-	const int gridStart = groupID.DOF_COMP * DOF_GRID_SIZE;
-	const int grid = groupThreadID.DOF_COMP - DOF_APRON_SIZE;
-
-	// These positions are relative to the pixel coordinates
-#if HORIZONTAL
-	const uint2 samplePos = uint2(max(gridStart + grid, 0), groupID.y);
-#else
-	const uint2 samplePos = uint2(groupID.x, max(gridStart + grid, 0));
-#endif
-
-	uint2 textureSize;
-	Input0.GetDimensions(textureSize.x, textureSize.y);
-
-	// Sample the textures
-#if USE_CS_HALF_PIXEL_OFFSET
-	float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize));
-#else
-	float2 sampleCoord = saturate(samplePos / float2(textureSize));
-#endif
-#if USE_CS_LINEAR_SAMPLING
-	float2 depthBlur = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xy;
-#else
-	float2 depthBlur = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xy;
-#endif
-	float depth = depthBlur.x;
-	float blur = depthBlur.y;
-	float cocSize = blur * DOF_MAX_SAMPLE_RADIUS;
-
-	// Store in shared memory
-	Samples[groupThreadID.DOF_COMP].Depth = depth;
-	Samples[groupThreadID.DOF_COMP].Blur = blur;
-	GroupMemoryBarrierWithGroupSync();
-
-	// Don't continue for threads in the apron, and threads outside the render target size
-	if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP < textureSize.DOF_COMP)
-	{
-		float outputBlur = 0.0f;
-		float totalContribution = 0.0f;
-
-		// Gather sample taps inside the radius
-		for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++)
-		{
-			// Grab the sample from shared memory
-			uint groupTap = groupThreadID.DOF_COMP + i;
-			CoCSample tap = Samples[groupTap];
-
-			// Only accept samples if they're from the foreground, and have a higher blur amount
-			float depthWeight = tap.Depth <= depth;
-			float blurWeight = saturate(tap.Blur - blur);
-			float tapWeight = depthWeight * blurWeight;
-
-			// If it's the center tap, set the weight to 1 so and don't reject it
-			float centerWeight = i == 0 ? 1.0 : 0.0f;
-			tapWeight = saturate(tapWeight + centerWeight);
-
-			outputBlur += tap.Blur * tapWeight;
-			totalContribution += tapWeight;
+            outputColor /= totalContribution;
+            color.rgb = max(outputColor, 0);
 		}

 		// Write out the result
-		OutputTexture[samplePos] = float2(depth, outputBlur / totalContribution);
+		OutputTexture[samplePos] = color;
 	}
 }