Fix the Depth Of Field to be consistent no matter the resolution

This commit is contained in:
Wojtek Figat
2022-08-23 17:26:59 +02:00
parent d50908e10b
commit 57cb15486e
3 changed files with 84 additions and 234 deletions

View File

@@ -15,12 +15,32 @@
#include "Engine/Graphics/Shaders/GPUShader.h"
// This must match hlsl defines
#define DOF_MAX_SAMPLE_RADIUS 10
#define DOF_GRID_SIZE 450
#define DOF_APRON_SIZE DOF_MAX_SAMPLE_RADIUS
#define DOF_THREAD_GROUP_SIZE (DOF_GRID_SIZE + (DOF_APRON_SIZE * 2))
#define DOF_DEPTH_BLUR_FORMAT PixelFormat::R16G16_Float
PACK_STRUCT(struct Data {
Float2 ProjectionAB;
float BokehDepthCullThreshold;
float BokehDepthCutoff;
Float4 DOFDepths;
float MaxBokehSize;
float BokehBrightnessThreshold;
float BokehBlurThreshold;
float BokehFalloff;
Float2 BokehTargetSize;
Float2 DOFTargetSize;
Float2 InputSize;
float DepthLimit;
float BlurStrength;
Float3 Dummy;
float BokehBrightness;
});
DepthOfFieldPass::DepthOfFieldPass()
{
}
@@ -36,7 +56,7 @@ bool DepthOfFieldPass::Init()
// (in future we should support it or faster solution using pixel shaders)
auto& limits = GPUDevice::Instance->Limits;
_platformSupportsDoF = limits.HasCompute;
_platformSupportsBokeh = _platformSupportsDoF && limits.HasGeometryShaders && limits.HasDrawIndirect && limits.HasAppendConsumeBuffers;
_platformSupportsBokeh = false && _platformSupportsDoF && limits.HasGeometryShaders && limits.HasDrawIndirect && limits.HasAppendConsumeBuffers;
// Create pipeline states
if (_platformSupportsDoF)
@@ -147,7 +167,7 @@ bool DepthOfFieldPass::setupResources()
_bokehBuffer = GPUDevice::Instance->CreateBuffer(TEXT("Bokeh Buffer"));
if (_bokehIndirectArgsBuffer == nullptr)
_bokehIndirectArgsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("Bokeh Indirect Args Buffer"));
GPUDrawIndirectArgs indirectArgsBufferInitData{0, 1, 0, 0};
GPUDrawIndirectArgs indirectArgsBufferInitData{ 0, 1, 0, 0 };
if (_bokehIndirectArgsBuffer->Init(GPUBufferDescription::Argument(&indirectArgsBufferInitData, sizeof(indirectArgsBufferInitData))))
return true;
}
@@ -182,22 +202,16 @@ GPUTexture* DepthOfFieldPass::getDofBokehShape(DepthOfFieldSettings& dofSettings
GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* input)
{
// Ensure to have valid data
if (!_platformSupportsDoF || checkIfSkipPass())
return nullptr;
// Cache data
auto device = GPUDevice::Instance;
auto context = device->GetMainContext();
const auto depthBuffer = renderContext.Buffers->DepthBuffer;
const auto shader = _shader->GetShader();
DepthOfFieldSettings& dofSettings = renderContext.List->Settings.DepthOfField;
const bool useDoF = _platformSupportsDoF && (renderContext.View.Flags & ViewFlags::DepthOfField) != 0 && dofSettings.Enabled;
// Skip if disabled
if (!useDoF)
return nullptr;
PROFILE_GPU_CPU("Depth Of Field");
context->ResetSR();
@@ -207,10 +221,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
const int32 cocResolutionDivider = 1;
const int32 dofResolutionDivider = 1;
const int32 bokehResolutionDivider = 1;
// TODO: in low-res DoF maybe use shared HalfResDepth?
// Cache viewport sizes
const int32 w1 = input->Width();
const int32 h1 = input->Height();
const int32 cocWidth = w1 / cocResolutionDivider;
@@ -219,13 +230,20 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
const int32 dofHeight = h1 / dofResolutionDivider;
const int32 bokehTargetWidth = w1 / bokehResolutionDivider;
const int32 bokehTargetHeight = h1 / bokehResolutionDivider;
float textureSizeScale = (float)Math::Max(w1, h1) * (1.0f / 1920.0f); // Keep DOF blur the same no matter the image resolution is (reference FullHD res)
int32 blurScalePermutationOffset = 0;
const float sampleRadius[] = { 1.0f, 3.6f }; // This has to match CS_DepthOfField permutations
if (textureSizeScale > sampleRadius[0])
{
blurScalePermutationOffset += 2;
textureSizeScale /= sampleRadius[1];
}
// TODO: maybe we could render particles (whole transparency in general) to the depth buffer to apply DoF on them as well?
// TODO: reduce amount of used temporary render targets, we could plan rendering steps in more static way and hardcode some logic to make it run faster with less memory usage (less bandwitch)
// Setup constant buffer
Data cbData;
{
float nearPlane = renderContext.View.Near;
float farPlane = renderContext.View.Far;
@@ -237,6 +255,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
float farFocusEnd = Math::Min(farPlane - 5.0f, farFocusStart + dofSettings.FarTransitionRange);
float depthLimitMax = farPlane - 10.0f;
Data cbData;
cbData.DOFDepths.X = nearFocusStart;
cbData.DOFDepths.Y = nearFocusEnd;
cbData.DOFDepths.Z = farFocusStart;
@@ -247,7 +266,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
cbData.BokehFalloff = dofSettings.BokehFalloff;
cbData.BokehDepthCutoff = dofSettings.BokehDepthCutoff;
cbData.DepthLimit = dofSettings.DepthLimit > ZeroTolerance ? Math::Min(dofSettings.DepthLimit, depthLimitMax) : depthLimitMax;
cbData.BlurStrength = Math::Saturate(dofSettings.BlurStrength);
cbData.BlurStrength = Math::Saturate(dofSettings.BlurStrength) * Math::Min(textureSizeScale, 1.0f);
cbData.BokehBrightness = dofSettings.BokehBrightness;
cbData.DOFTargetSize.X = static_cast<float>(dofWidth); // TODO: check if this param is binded right. maybe use w1 or bokehTargetWidth?
@@ -260,12 +279,11 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
// TODO: use projection matrix instead of this far and near stuff?
cbData.ProjectionAB.X = farPlane / (farPlane - nearPlane);
cbData.ProjectionAB.Y = (-farPlane * nearPlane) / (farPlane - nearPlane);
}
// Bind constant buffer
auto cb = shader->GetCB(0);
context->UpdateCB(cb, &cbData);
context->BindCB(0, cb);
auto cb = shader->GetCB(0);
context->UpdateCB(cb, &cbData);
context->BindCB(0, cb);
}
// Depth/blur generation pass
auto tempDesc = GPUTextureDescription::New2D(cocWidth, cocHeight, DOF_DEPTH_BLUR_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
@@ -277,55 +295,13 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
// CoC Spread pass
// todo: add config for CoC spread in postFx settings?
// TODO: test it out
bool isCoCSpreadEnabled = false;
if (isCoCSpreadEnabled)
{
context->ResetRenderTarget();
context->ResetSR();
context->ResetUA();
context->FlushState();
tempDesc = GPUTextureDescription::New2D(cocWidth, cocHeight, DOF_DEPTH_BLUR_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
GPUTexture* tempTarget = RenderTargetPool::Get(tempDesc);
// Horizontal pass
context->BindSR(0, depthBlurTarget);
//
context->BindUA(0, tempTarget->View());
//
uint32 groupCountX = (cocWidth / DOF_GRID_SIZE) + ((cocWidth % DOF_GRID_SIZE) > 0 ? 1 : 0);
uint32 groupCountY = cocHeight;
//
context->Dispatch(shader->GetCS("CS_CoCSpread", 0), groupCountX, groupCountY, 1);
// Vertical pass
context->BindSR(0, tempTarget);
//
context->BindUA(0, depthBlurTarget->View());
//
groupCountX = cocWidth;
groupCountY = (cocHeight / DOF_GRID_SIZE) + (cocHeight % DOF_GRID_SIZE) > 0 ? 1 : 0;
//
context->Dispatch(shader->GetCS("CS_CoCSpread", 1), groupCountX, groupCountY, 1);
// Cleanup
context->ResetRenderTarget();
context->UnBindSR(0);
context->UnBindUA(0);
context->FlushState();
RenderTargetPool::Release(tempTarget);
}
// Peek temporary render target for dof pass
auto dofFormat = renderContext.Buffers->GetOutputFormat();
tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
GPUTexture* dofInput = RenderTargetPool::Get(tempDesc);
// Do the bokeh point generation, or just do a copy if disabled
bool isBokehGenerationEnabled = dofSettings.BokehEnabled && _platformSupportsBokeh && dofSettings.BokehBrightness > 0.0f;
bool isBokehGenerationEnabled = dofSettings.BokehEnabled && _platformSupportsBokeh && dofSettings.BokehBrightness > 0.0f && dofSettings.BokehSize > 0.0f;
if (isBokehGenerationEnabled)
{
// Update bokeh buffer to have enough size for points
@@ -388,7 +364,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
uint32 groupCountX = (dofWidth / DOF_GRID_SIZE) + ((dofWidth % DOF_GRID_SIZE) > 0 ? 1 : 0);
uint32 groupCountY = dofHeight;
//
context->Dispatch(shader->GetCS("CS_DepthOfField", 0), groupCountX, groupCountY, 1);
context->Dispatch(shader->GetCS("CS_DepthOfField", blurScalePermutationOffset + 0), groupCountX, groupCountY, 1);
// Cleanup
context->ResetRenderTarget();
@@ -405,7 +381,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
groupCountY = (dofHeight / DOF_GRID_SIZE) + ((dofHeight % DOF_GRID_SIZE) > 0 ? 1 : 0);
//
// TODO: cache Compute Shaders
context->Dispatch(shader->GetCS("CS_DepthOfField", 1), groupCountX, groupCountY, 1);
context->Dispatch(shader->GetCS("CS_DepthOfField", blurScalePermutationOffset + 1), groupCountX, groupCountY, 1);
context->ResetRenderTarget();
// Cleanup

View File

@@ -11,30 +11,6 @@
class DepthOfFieldPass : public RendererPass<DepthOfFieldPass>
{
private:
PACK_STRUCT(struct Data {
Float2 ProjectionAB;
float BokehDepthCullThreshold;
float BokehDepthCutoff;
Float4 DOFDepths;
float MaxBokehSize;
float BokehBrightnessThreshold;
float BokehBlurThreshold;
float BokehFalloff;
Float2 BokehTargetSize;
Float2 DOFTargetSize;
Float2 InputSize;
float DepthLimit;
float BlurStrength;
Float3 Dummy;
float BokehBrightness;
});
// Structure used for outputting bokeh points to an AppendStructuredBuffer
struct BokehPoint
{
@@ -59,11 +35,9 @@ private:
AssetReference<Texture> _defaultBokehCross;
public:
DepthOfFieldPass();
public:
/// <summary>
/// Perform Depth Of Field rendering for the input task
/// </summary>
@@ -73,7 +47,6 @@ public:
GPUTexture* Render(RenderContext& renderContext, GPUTexture* input);
private:
GPUTexture* getDofBokehShape(DepthOfFieldSettings& dofSettings);
#if COMPILE_WITH_DEV_ENV
void OnShaderReloading(Asset* obj)
@@ -87,14 +60,12 @@ private:
#endif
public:
// [RendererPass]
String ToString() const override;
bool Init() override;
void Dispose() override;
protected:
// [RendererPass]
bool setupResources() override;
};

View File

@@ -8,7 +8,9 @@
#include "./Flax/Common.hlsl"
// This must match C++ defines
#ifndef DOF_MAX_SAMPLE_RADIUS
#define DOF_MAX_SAMPLE_RADIUS 10
#endif
#define DOF_GRID_SIZE 450
#define DOF_APRON_SIZE DOF_MAX_SAMPLE_RADIUS
#define DOF_THREAD_GROUP_SIZE (DOF_GRID_SIZE + (DOF_APRON_SIZE * 2))
@@ -18,7 +20,6 @@
#define USE_CS_LINEAR_SAMPLING 0
META_CB_BEGIN(0, DofData)
float2 ProjectionAB;
float BokehDepthCullThreshold;
float BokehDepthCutoff;
@@ -98,23 +99,16 @@ float LinearDepth(in float zBufferDepth)
return ProjectionAB.y / (zBufferDepth - ProjectionAB.x);
}
// Computes the depth of field blur factor
float BlurFactor(float depth)
{
float f0 = 1.0f - saturate((depth - DOFDepths.x) / max(DOFDepths.y - DOFDepths.x, 0.01f));
float f1 = saturate((depth - DOFDepths.z) / max(DOFDepths.w - DOFDepths.z, 0.01f));
float blur = saturate(f0 + f1);
float fade = 1 - saturate((depth - DepthLimit) * 100);
return blur * fade * BlurStrength;
}
// Depth of Field depth blur generation (outputs linear depth + blur factor to R16G16 target)
META_PS(true, FEATURE_LEVEL_ES2)
float4 PS_DofDepthBlurGeneration(Quad_VS2PS input) : SV_Target
{
float depth = LinearDepth(Input0.SampleLevel(SamplerPointClamp, input.TexCoord, 0).r);
float blur = BlurFactor(depth);
return float4(depth, blur, 1.0f, 1.0f);
float depth = LinearDepth(Input0.SampleLevel(SamplerPointClamp, input.TexCoord, 0).r);
float f0 = 1.0f - saturate((depth - DOFDepths.x) / max(DOFDepths.y - DOFDepths.x, 0.01f));
float f1 = saturate((depth - DOFDepths.z) / max(DOFDepths.w - DOFDepths.z, 0.01f));
float fade = 1 - saturate((depth - DepthLimit) * 100);
float blur = saturate(f0 + f1) * fade * BlurStrength;
return float4(depth, blur, 1.0f, 1.0f);
}
#if defined(_CS_DepthOfField)
@@ -145,24 +139,25 @@ groupshared DOFSample Samples[DOF_THREAD_GROUP_SIZE];
META_CS(true, FEATURE_LEVEL_SM5)
META_PERMUTATION_1(HORIZONTAL=1)
META_PERMUTATION_1(HORIZONTAL=0)
META_PERMUTATION_2(HORIZONTAL=1,DOF_MAX_SAMPLE_RADIUS=36)
META_PERMUTATION_2(HORIZONTAL=0,DOF_MAX_SAMPLE_RADIUS=36)
[numthreads(_CS_DepthOfField_X, _CS_DepthOfField_Y, 1)]
void CS_DepthOfField(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
{
// These positions are relative to the "grid", AKA the horizontal group of pixels that this thread group is writing to
// These positions are relative to the "grid", AKA the horizontal/vertical group of pixels that this thread group is writing to
const int gridStart = groupID.DOF_COMP * DOF_GRID_SIZE;
const int grid = groupThreadID.DOF_COMP - DOF_APRON_SIZE;
// These positions are relative to the pixel coordinates
#if HORIZONTAL
const uint2 samplePos = uint2(max(gridStart + grid, 0), groupID.y);
const uint2 samplePos = uint2(gridStart + grid, groupID.y);
#else
const uint2 samplePos = uint2(groupID.x, max(gridStart + grid, 0));
const uint2 samplePos = uint2(groupID.x, gridStart + grid);
#endif
// Sample the textures
uint2 textureSize;
Input0.GetDimensions(textureSize.x, textureSize.y);
// Sample the textures
#if USE_CS_HALF_PIXEL_OFFSET
float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize));
#else
@@ -180,141 +175,49 @@ void CS_DepthOfField(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupT
float cocSize = blur * DOF_MAX_SAMPLE_RADIUS;
// Store in shared memory
Samples[groupThreadID.DOF_COMP].Color = color.rgb;
Samples[groupThreadID.DOF_COMP].Depth = depth;
Samples[groupThreadID.DOF_COMP].Blur = blur;
{
DOFSample tap;
tap.Color = color.rgb;
tap.Depth = depth;
tap.Blur = blur;
Samples[groupThreadID.DOF_COMP] = tap;
}
GroupMemoryBarrierWithGroupSync();
// Don't continue for threads in the apron, and threads outside the render target size
if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP < textureSize.DOF_COMP)
if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP >= 0 && samplePos.DOF_COMP < textureSize.DOF_COMP)
{
BRANCH
if (cocSize > 0.0f)
{
float3 outputColor = 0.0f;
float totalContribution = 0.0f;
float3 outputColor = 0.0f;
float totalContribution = 0.0f;
// Gather sample taps inside the radius
for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++)
{
// Grab the sample from shared memory
uint groupTap = groupThreadID.DOF_COMP + i;
DOFSample tap = Samples[groupTap];
// Gather sample taps inside the radius
float depthTest = depth + 100;
for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++)
{
// Grab the sample from shared memory
DOFSample tap = Samples[groupThreadID.DOF_COMP + i];
// Reject the sample if it's outside the CoC radius
float cocWeight = saturate(cocSize + 1.0f - abs(float(i)));
// Reject the sample if it's outside the CoC radius
float cocWeight = saturate(cocSize + 1.0f - abs(float(i)));
// Reject foreground samples, unless they're blurred as well
float depthWeight = tap.Depth >= depth;
float blurWeight = tap.Blur;
float tapWeight = cocWeight * saturate(depthWeight + blurWeight);
// Reject foreground samples, unless they're blurred as well
float depthWeight = tap.Depth > depthTest;
float blurWeight = tap.Blur;
float tapWeight = cocWeight * saturate(depthWeight + blurWeight);
outputColor += tap.Color * tapWeight;
totalContribution += tapWeight;
}
outputColor += tap.Color * tapWeight;
totalContribution += tapWeight;
}
// Write out the result
outputColor /= totalContribution;
OutputTexture[samplePos] = float4(max(outputColor, 0), color.a);
}
else
{
OutputTexture[samplePos] = color;
}
}
}
#elif defined(_CS_CoCSpread)
struct CoCSample
{
float Depth;
float Blur;
};
RWTexture2D<float2> OutputTexture : register(u0);
groupshared CoCSample Samples[DOF_THREAD_GROUP_SIZE];
#if HORIZONTAL
#define _CS_CoCSpread_X DOF_THREAD_GROUP_SIZE
#define _CS_CoCSpread_Y 1
#define DOF_COMP x
#else
#define _CS_CoCSpread_X 1
#define _CS_CoCSpread_Y DOF_THREAD_GROUP_SIZE
#define DOF_COMP y
#endif
// Performs the CoC spread
META_CS(true, FEATURE_LEVEL_SM5)
META_PERMUTATION_1(HORIZONTAL=1)
META_PERMUTATION_1(HORIZONTAL=0)
[numthreads(_CS_CoCSpread_X, _CS_CoCSpread_Y, 1)]
void CS_CoCSpread(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
{
// These positions are relative to the "grid", AKA the horizontal group of pixels that this thread group is writing to
const int gridStart = groupID.DOF_COMP * DOF_GRID_SIZE;
const int grid = groupThreadID.DOF_COMP - DOF_APRON_SIZE;
// These positions are relative to the pixel coordinates
#if HORIZONTAL
const uint2 samplePos = uint2(max(gridStart + grid, 0), groupID.y);
#else
const uint2 samplePos = uint2(groupID.x, max(gridStart + grid, 0));
#endif
uint2 textureSize;
Input0.GetDimensions(textureSize.x, textureSize.y);
// Sample the textures
#if USE_CS_HALF_PIXEL_OFFSET
float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize));
#else
float2 sampleCoord = saturate(samplePos / float2(textureSize));
#endif
#if USE_CS_LINEAR_SAMPLING
float2 depthBlur = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xy;
#else
float2 depthBlur = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xy;
#endif
float depth = depthBlur.x;
float blur = depthBlur.y;
float cocSize = blur * DOF_MAX_SAMPLE_RADIUS;
// Store in shared memory
Samples[groupThreadID.DOF_COMP].Depth = depth;
Samples[groupThreadID.DOF_COMP].Blur = blur;
GroupMemoryBarrierWithGroupSync();
// Don't continue for threads in the apron, and threads outside the render target size
if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP < textureSize.DOF_COMP)
{
float outputBlur = 0.0f;
float totalContribution = 0.0f;
// Gather sample taps inside the radius
for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++)
{
// Grab the sample from shared memory
uint groupTap = groupThreadID.DOF_COMP + i;
CoCSample tap = Samples[groupTap];
// Only accept samples if they're from the foreground, and have a higher blur amount
float depthWeight = tap.Depth <= depth;
float blurWeight = saturate(tap.Blur - blur);
float tapWeight = depthWeight * blurWeight;
// If it's the center tap, set the weight to 1 so and don't reject it
float centerWeight = i == 0 ? 1.0 : 0.0f;
tapWeight = saturate(tapWeight + centerWeight);
outputBlur += tap.Blur * tapWeight;
totalContribution += tapWeight;
outputColor /= totalContribution;
color.rgb = max(outputColor, 0);
}
// Write out the result
OutputTexture[samplePos] = float2(depth, outputBlur / totalContribution);
OutputTexture[samplePos] = color;
}
}