diff --git a/Content/Shaders/DepthOfField.flax b/Content/Shaders/DepthOfField.flax index be055b325..07ffb8e9d 100644 --- a/Content/Shaders/DepthOfField.flax +++ b/Content/Shaders/DepthOfField.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5126ac1919a411af96b95cd96cdf8a4f2ab676d3c2a1f09d3e42136fdee7899b -size 21153 +oid sha256:5592dd558f25e897bc671a64f3f588e208fa6bb56bfd28661219f7b4df10c26e +size 16616 diff --git a/Source/Engine/Renderer/DepthOfFieldPass.cpp b/Source/Engine/Renderer/DepthOfFieldPass.cpp index 01d370256..24ef815f2 100644 --- a/Source/Engine/Renderer/DepthOfFieldPass.cpp +++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp @@ -9,7 +9,6 @@ #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPULimits.h" -#include "Engine/Graphics/PostProcessBase.h" #include "Engine/Graphics/RenderTargetPool.h" #include "Engine/Graphics/RenderBuffers.h" #include "Engine/Graphics/RenderTask.h" @@ -300,7 +299,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i uint32 groupCountX = (cocWidth / DOF_GRID_SIZE) + ((cocWidth % DOF_GRID_SIZE) > 0 ? 1 : 0); uint32 groupCountY = cocHeight; // - context->Dispatch(shader->GetCS("CS_CoCSpreadH"), groupCountX, groupCountY, 1); + context->Dispatch(shader->GetCS("CS_CoCSpread", 0), groupCountX, groupCountY, 1); // Vertical pass context->BindSR(0, tempTarget); @@ -310,7 +309,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i groupCountX = cocWidth; groupCountY = (cocHeight / DOF_GRID_SIZE) + (cocHeight % DOF_GRID_SIZE) > 0 ? 1 : 0; // - context->Dispatch(shader->GetCS("CS_CoCSpreadV"), groupCountX, groupCountY, 1); + context->Dispatch(shader->GetCS("CS_CoCSpread", 1), groupCountX, groupCountY, 1); // Cleanup context->ResetRenderTarget(); @@ -389,7 +388,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i uint32 groupCountX = (dofWidth / DOF_GRID_SIZE) + ((dofWidth % DOF_GRID_SIZE) > 0 ? 1 : 0); uint32 groupCountY = dofHeight; // - context->Dispatch(shader->GetCS("CS_DepthOfFieldH"), groupCountX, groupCountY, 1); + context->Dispatch(shader->GetCS("CS_DepthOfField", 0), groupCountX, groupCountY, 1); // Cleanup context->ResetRenderTarget(); @@ -406,7 +405,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i groupCountY = (dofHeight / DOF_GRID_SIZE) + ((dofHeight % DOF_GRID_SIZE) > 0 ? 1 : 0); // // TODO: cache Compute Shaders - context->Dispatch(shader->GetCS("CS_DepthOfFieldV"), groupCountX, groupCountY, 1); + context->Dispatch(shader->GetCS("CS_DepthOfField", 1), groupCountX, groupCountY, 1); context->ResetRenderTarget(); // Cleanup diff --git a/Source/Shaders/DepthOfField.shader b/Source/Shaders/DepthOfField.shader index b035eb10f..772d41cc9 100644 --- a/Source/Shaders/DepthOfField.shader +++ b/Source/Shaders/DepthOfField.shader @@ -117,7 +117,7 @@ float4 PS_DofDepthBlurGeneration(Quad_VS2PS input) : SV_Target return float4(depth, blur, 1.0f, 1.0f); } -#if defined(_CS_DepthOfFieldH) || defined(_CS_DepthOfFieldV) +#if defined(_CS_DepthOfField) RWTexture2D OutputTexture : register(u0); @@ -131,24 +131,37 @@ struct DOFSample // Shared memory for actial depth of field pass groupshared DOFSample Samples[DOF_THREAD_GROUP_SIZE]; -// Performs the horizontal pass for the DOF blur +#if HORIZONTAL +#define _CS_DepthOfField_X DOF_THREAD_GROUP_SIZE +#define _CS_DepthOfField_Y 1 +#define DOF_COMP x +#else +#define _CS_DepthOfField_X 1 +#define _CS_DepthOfField_Y DOF_THREAD_GROUP_SIZE +#define DOF_COMP y +#endif + +// Performs the blur pass for the DOF META_CS(true, FEATURE_LEVEL_SM5) -[numthreads(DOF_THREAD_GROUP_SIZE, 1, 1)] -void CS_DepthOfFieldH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) +META_PERMUTATION_1(HORIZONTAL=1) +META_PERMUTATION_1(HORIZONTAL=0) +[numthreads(_CS_DepthOfField_X, _CS_DepthOfField_Y, 1)] +void CS_DepthOfField(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) { // These positions are relative to the "grid", AKA the horizontal group of pixels that this thread group is writing to - const int gridStartX = groupID.x * DOF_GRID_SIZE; - const int gridX = groupThreadID.x - DOF_APRON_SIZE; + const int gridStart = groupID.DOF_COMP * DOF_GRID_SIZE; + const int grid = groupThreadID.DOF_COMP - DOF_APRON_SIZE; // These positions are relative to the pixel coordinates - const uint sampleX = max(gridStartX + gridX, 0); - const uint sampleY = groupID.y; +#if HORIZONTAL + const uint2 samplePos = uint2(max(gridStart + grid, 0), groupID.y); +#else + const uint2 samplePos = uint2(groupID.x, max(gridStart + grid, 0)); +#endif uint2 textureSize; Input0.GetDimensions(textureSize.x, textureSize.y); - const uint2 samplePos = uint2(sampleX, sampleY); - // Sample the textures #if USE_CS_HALF_PIXEL_OFFSET float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize)); @@ -167,14 +180,13 @@ void CS_DepthOfFieldH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group float cocSize = blur * DOF_MAX_SAMPLE_RADIUS; // Store in shared memory - Samples[groupThreadID.x].Color = color.rgb; - Samples[groupThreadID.x].Depth = depth; - Samples[groupThreadID.x].Blur = blur; - + Samples[groupThreadID.DOF_COMP].Color = color.rgb; + Samples[groupThreadID.DOF_COMP].Depth = depth; + Samples[groupThreadID.DOF_COMP].Blur = blur; GroupMemoryBarrierWithGroupSync(); // Don't continue for threads in the apron, and threads outside the render target size - if (gridX >= 0 && gridX < DOF_GRID_SIZE && sampleX < textureSize.x) + if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP < textureSize.DOF_COMP) { BRANCH if (cocSize > 0.0f) @@ -183,14 +195,14 @@ void CS_DepthOfFieldH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group float totalContribution = 0.0f; // Gather sample taps inside the radius - for (int x = -DOF_MAX_SAMPLE_RADIUS; x <= DOF_MAX_SAMPLE_RADIUS; x++) + for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++) { // Grab the sample from shared memory - uint groupTapX = groupThreadID.x + x; - DOFSample tap = Samples[groupTapX]; + uint groupTap = groupThreadID.DOF_COMP + i; + DOFSample tap = Samples[groupTap]; // Reject the sample if it's outside the CoC radius - float cocWeight = saturate(cocSize + 1.0f - abs(float(x))); + float cocWeight = saturate(cocSize + 1.0f - abs(float(i))); // Reject foreground samples, unless they're blurred as well float depthWeight = tap.Depth >= depth; @@ -212,88 +224,7 @@ void CS_DepthOfFieldH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_Group } } -// Performs the vertical DOF pass -META_CS(true, FEATURE_LEVEL_SM5) -[numthreads(1, DOF_THREAD_GROUP_SIZE, 1)] -void CS_DepthOfFieldV(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) -{ - // These positions are relative to the "grid", AKA the vertical group of pixels that this thread group is writing to - const int gridStartY = groupID.y * DOF_GRID_SIZE; - const int gridY = groupThreadID.y - DOF_APRON_SIZE; - - // These positions are relative to the pixel coordinates - const uint sampleX = groupID.x; - const uint sampleY = max(gridStartY + gridY, 0); - - uint2 textureSize; - Input0.GetDimensions(textureSize.x, textureSize.y); - - const uint2 samplePos = uint2(sampleX, sampleY); - - // Sample the textures -#if USE_CS_HALF_PIXEL_OFFSET - float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize)); -#else - float2 sampleCoord = saturate(samplePos / float2(textureSize)); -#endif -#if USE_CS_LINEAR_SAMPLING - float4 color = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).rgba; - float2 depthBlur = Input1.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xy; -#else - float4 color = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).rgba; - float2 depthBlur = Input1.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xy; -#endif - float depth = depthBlur.x; - float blur = depthBlur.y; - float cocSize = blur * DOF_MAX_SAMPLE_RADIUS; - - // Store in shared memory - Samples[groupThreadID.y].Color = color.rgb; - Samples[groupThreadID.y].Depth = depth; - Samples[groupThreadID.y].Blur = blur; - - GroupMemoryBarrierWithGroupSync(); - - // Don't continue for threads in the apron, and threads outside the render target size - if (gridY >= 0 && gridY < DOF_GRID_SIZE && sampleY < textureSize.y) - { - BRANCH - if (cocSize > 0.0f) - { - float3 outputColor = 0.0f; - float totalContribution = 0.0f; - - // Gather sample taps inside the radius - for (int y = -DOF_MAX_SAMPLE_RADIUS; y <= DOF_MAX_SAMPLE_RADIUS; y++) - { - // Grab the sample from shared memory - uint groupTapY = groupThreadID.y + y; - DOFSample tap = Samples[groupTapY]; - - // Reject the sample if it's outside the CoC radius - float cocWeight = saturate(cocSize + 1.0f - abs(float(y))); - - // Reject foreground samples, unless they're blurred as well - float depthWeight = tap.Depth >= depth; - float blurWeight = tap.Blur; - float tapWeight = cocWeight * saturate(depthWeight + blurWeight); - - outputColor += tap.Color * tapWeight; - totalContribution += tapWeight; - } - - // Write out the result - outputColor /= totalContribution; - OutputTexture[samplePos] = float4(max(outputColor, 0), color.a); - } - else - { - OutputTexture[samplePos] = color; - } - } -} - -#elif defined(_CS_CoCSpreadH) || defined(_CS_CoCSpreadV) +#elif defined(_CS_CoCSpread) struct CoCSample { @@ -305,24 +236,37 @@ RWTexture2D OutputTexture : register(u0); groupshared CoCSample Samples[DOF_THREAD_GROUP_SIZE]; -// Performs the horizontal CoC spread +#if HORIZONTAL +#define _CS_CoCSpread_X DOF_THREAD_GROUP_SIZE +#define _CS_CoCSpread_Y 1 +#define DOF_COMP x +#else +#define _CS_CoCSpread_X 1 +#define _CS_CoCSpread_Y DOF_THREAD_GROUP_SIZE +#define DOF_COMP y +#endif + +// Performs the CoC spread META_CS(true, FEATURE_LEVEL_SM5) -[numthreads(DOF_THREAD_GROUP_SIZE, 1, 1)] -void CS_CoCSpreadH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) +META_PERMUTATION_1(HORIZONTAL=1) +META_PERMUTATION_1(HORIZONTAL=0) +[numthreads(_CS_CoCSpread_X, _CS_CoCSpread_Y, 1)] +void CS_CoCSpread(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) { // These positions are relative to the "grid", AKA the horizontal group of pixels that this thread group is writing to - const int gridStartX = groupID.x * DOF_GRID_SIZE; - const int gridX = groupThreadID.x - DOF_APRON_SIZE; + const int gridStart = groupID.DOF_COMP * DOF_GRID_SIZE; + const int grid = groupThreadID.DOF_COMP - DOF_APRON_SIZE; // These positions are relative to the pixel coordinates - const uint sampleX = max(gridStartX + gridX, 0); - const uint sampleY = groupID.y; +#if HORIZONTAL + const uint2 samplePos = uint2(max(gridStart + grid, 0), groupID.y); +#else + const uint2 samplePos = uint2(groupID.x, max(gridStart + grid, 0)); +#endif uint2 textureSize; Input0.GetDimensions(textureSize.x, textureSize.y); - const uint2 samplePos = uint2(sampleX, sampleY); - // Sample the textures #if USE_CS_HALF_PIXEL_OFFSET float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize)); @@ -334,29 +278,27 @@ void CS_CoCSpreadH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThr #else float2 depthBlur = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xy; #endif - float depth = depthBlur.x; float blur = depthBlur.y; float cocSize = blur * DOF_MAX_SAMPLE_RADIUS; // Store in shared memory - Samples[groupThreadID.x].Depth = depth; - Samples[groupThreadID.x].Blur = blur; - + Samples[groupThreadID.DOF_COMP].Depth = depth; + Samples[groupThreadID.DOF_COMP].Blur = blur; GroupMemoryBarrierWithGroupSync(); // Don't continue for threads in the apron, and threads outside the render target size - if (gridX >= 0 && gridX < DOF_GRID_SIZE && sampleX < textureSize.x) + if (grid >= 0 && grid < DOF_GRID_SIZE && samplePos.DOF_COMP < textureSize.DOF_COMP) { float outputBlur = 0.0f; float totalContribution = 0.0f; // Gather sample taps inside the radius - for (int x = -DOF_MAX_SAMPLE_RADIUS; x <= DOF_MAX_SAMPLE_RADIUS; x++) + for (int i = -DOF_MAX_SAMPLE_RADIUS; i <= DOF_MAX_SAMPLE_RADIUS; i++) { // Grab the sample from shared memory - uint groupTapX = groupThreadID.x + x; - CoCSample tap = Samples[groupTapX]; + uint groupTap = groupThreadID.DOF_COMP + i; + CoCSample tap = Samples[groupTap]; // Only accept samples if they're from the foreground, and have a higher blur amount float depthWeight = tap.Depth <= depth; @@ -364,77 +306,7 @@ void CS_CoCSpreadH(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThr float tapWeight = depthWeight * blurWeight; // If it's the center tap, set the weight to 1 so and don't reject it - float centerWeight = x == 0 ? 1.0 : 0.0f; - tapWeight = saturate(tapWeight + centerWeight); - - outputBlur += tap.Blur * tapWeight; - totalContribution += tapWeight; - } - - // Write out the result - OutputTexture[samplePos] = float2(depth, outputBlur / totalContribution); - } -} - -// Performs the vertical CoC spread -META_CS(true, FEATURE_LEVEL_SM5) -[numthreads(1, DOF_THREAD_GROUP_SIZE, 1)] -void CS_CoCSpreadV(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) -{ - // These positions are relative to the "grid", AKA the vertical group of pixels that this thread group is writing to - const int gridStartY = groupID.y * DOF_GRID_SIZE; - const int gridY = groupThreadID.y - DOF_APRON_SIZE; - - // These positions are relative to the pixel coordinates - const uint sampleX = groupID.x; - const uint sampleY = max(gridStartY + gridY, 0); - - uint2 textureSize; - Input0.GetDimensions(textureSize.x, textureSize.y); - - const uint2 samplePos = uint2(sampleX, sampleY); - - // Sample the textures -#if USE_CS_HALF_PIXEL_OFFSET - float2 sampleCoord = saturate(((float2)samplePos + 0.5f) / float2(textureSize)); -#else - float2 sampleCoord = saturate(samplePos / float2(textureSize)); -#endif -#if USE_CS_LINEAR_SAMPLING - float2 depthBlur = Input0.SampleLevel(SamplerLinearClamp, sampleCoord, 0.0f).xy; -#else - float2 depthBlur = Input0.SampleLevel(SamplerPointClamp, sampleCoord, 0.0f).xy; -#endif - float depth = depthBlur.x; - float blur = depthBlur.y; - float cocSize = blur * DOF_MAX_SAMPLE_RADIUS; - - // Store in shared memory - Samples[groupThreadID.y].Depth = depth; - Samples[groupThreadID.y].Blur = blur; - - GroupMemoryBarrierWithGroupSync(); - - // Don't continue for threads in the apron, and threads outside the render target size - if (gridY >= 0 && gridY < DOF_GRID_SIZE && sampleY < textureSize.y) - { - float outputBlur = 0.0f; - float totalContribution = 0.0f; - - // Gather sample taps inside the radius - for (int y = -DOF_MAX_SAMPLE_RADIUS; y <= DOF_MAX_SAMPLE_RADIUS; y++) - { - // Grab the sample from shared memory - uint groupTapY = groupThreadID.y + y; - CoCSample tap = Samples[groupTapY]; - - // Only accept samples if they're from the foreground, and have a higher blur amount - float depthWeight = tap.Depth <= depth; - float blurWeight = saturate(tap.Blur - blur); - float tapWeight = depthWeight * blurWeight; - - // If it's the center tap, set the weight to 1 and don't reject it - float centerWeight = y == 0 ? 1.0 : 0.0f; + float centerWeight = i == 0 ? 1.0 : 0.0f; tapWeight = saturate(tapWeight + centerWeight); outputBlur += tap.Blur * tapWeight;