Fix regression crash when using DoF, optimize DoF usage of GPU memory

This commit is contained in:
Wojciech Figat
2023-01-09 16:06:47 +01:00
parent 8c9a3055e1
commit 70cce0e1ee
5 changed files with 34 additions and 60 deletions

View File

@@ -200,10 +200,10 @@ GPUTexture* DepthOfFieldPass::getDofBokehShape(DepthOfFieldSettings& dofSettings
return result ? result->GetTexture() : nullptr;
}
GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* input)
void DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp)
{
if (!_platformSupportsDoF || checkIfSkipPass())
return nullptr;
return;
auto device = GPUDevice::Instance;
auto context = device->GetMainContext();
const auto depthBuffer = renderContext.Buffers->DepthBuffer;
@@ -211,7 +211,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
DepthOfFieldSettings& dofSettings = renderContext.List->Settings.DepthOfField;
const bool useDoF = _platformSupportsDoF && (renderContext.View.Flags & ViewFlags::DepthOfField) != 0 && dofSettings.Enabled;
if (!useDoF)
return nullptr;
return;
PROFILE_GPU_CPU("Depth Of Field");
context->ResetSR();
@@ -222,8 +222,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
const int32 dofResolutionDivider = 1;
const int32 bokehResolutionDivider = 1;
// TODO: in low-res DoF maybe use shared HalfResDepth?
const int32 w1 = input->Width();
const int32 h1 = input->Height();
const int32 w1 = frame->Width();
const int32 h1 = frame->Height();
const int32 cocWidth = w1 / cocResolutionDivider;
const int32 cocHeight = h1 / cocResolutionDivider;
const int32 dofWidth = w1 / dofResolutionDivider;
@@ -241,8 +241,6 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
// TODO: maybe we could render particles (whole transparency in general) to the depth buffer to apply DoF on them as well?
// TODO: reduce amount of used temporary render targets, we could plan rendering steps in more static way and hardcode some logic to make it run faster with less memory usage (less bandwitch)
// Setup constant buffer
{
float nearPlane = renderContext.View.Near;
@@ -299,8 +297,6 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
// Peek temporary render target for dof pass
auto dofFormat = renderContext.Buffers->GetOutputFormat();
tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
GPUTexture* dofInput = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(dofInput, "DOF.Output");
// Do the bokeh point generation, or just do a copy if disabled
bool isBokehGenerationEnabled = dofSettings.BokehEnabled && _platformSupportsBokeh && dofSettings.BokehBrightness > 0.0f && dofSettings.BokehSize > 0.0f;
@@ -318,7 +314,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
if (_bokehBuffer->Init(GPUBufferDescription::StructuredAppend(minRequiredElements, elementStride)))
{
LOG(Fatal, "Cannot create buffer {0}.", TEXT("Bokeh Buffer"));
return nullptr;
return;
}
}
@@ -326,10 +322,10 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
context->ResetCounter(_bokehBuffer);
// Generate bokeh points
context->BindSR(0, input);
context->BindSR(0, frame);
context->BindSR(1, depthBlurTarget);
context->BindUA(1, _bokehBuffer->View());
context->SetRenderTarget(*dofInput);
context->SetRenderTarget(*tmp);
context->SetViewportAndScissors((float)dofWidth, (float)dofHeight);
context->SetState(_psBokehGeneration);
context->DrawFullscreenTriangle();
@@ -337,33 +333,26 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
else
{
// Generate bokeh points
context->BindSR(0, input);
context->BindSR(0, frame);
context->BindSR(1, depthBlurTarget);
context->SetRenderTarget(*dofInput);
context->SetRenderTarget(*tmp);
context->SetViewportAndScissors((float)dofWidth, (float)dofHeight);
context->SetState(_psDoNotGenerateBokeh);
context->DrawFullscreenTriangle();
}
Swap(frame, tmp);
// Do depth of field (using compute shaders in full resolution)
GPUTexture* dofOutput;
context->ResetRenderTarget();
context->ResetSR();
context->ResetUA();
context->FlushState();
{
// Peek temporary targets for two blur passes
tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
auto dofTargetH = RenderTargetPool::Get(tempDesc);
auto dofTargetV = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(dofTargetH, "DOF.TargetH");
RENDER_TARGET_POOL_SET_NAME(dofTargetV, "DOF.TargetV");
// Horizontal pass
context->BindSR(0, dofInput);
context->BindSR(0, frame);
context->BindSR(1, depthBlurTarget);
//
context->BindUA(0, dofTargetH->View());
context->BindUA(0, tmp->View());
//
uint32 groupCountX = (dofWidth / DOF_GRID_SIZE) + ((dofWidth % DOF_GRID_SIZE) > 0 ? 1 : 0);
uint32 groupCountY = dofHeight;
@@ -376,9 +365,9 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
context->ResetSR();
// Vertical pass
context->BindUA(0, dofTargetV->View());
context->BindUA(0, frame->View());
//
context->BindSR(0, dofTargetH);
context->BindSR(0, tmp);
context->BindSR(1, depthBlurTarget);
//
groupCountX = dofWidth;
@@ -391,20 +380,14 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
// Cleanup
context->ResetUA();
context->ResetSR();
RenderTargetPool::Release(dofTargetH);
dofOutput = dofTargetV;
}
// Cleanup temporary texture
RenderTargetPool::Release(dofInput);
// Render the bokeh points
if (isBokehGenerationEnabled)
{
tempDesc = GPUTextureDescription::New2D(bokehTargetWidth, bokehTargetHeight, dofFormat);
auto bokehTarget = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(depthBlurTarget, "DOF.Bokeh");
RENDER_TARGET_POOL_SET_NAME(bokehTarget, "DOF.Bokeh");
context->Clear(*bokehTarget, Color::Black);
{
@@ -426,24 +409,17 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
}
// Composite the bokeh rendering results with the depth of field result
tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
auto compositeTarget = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(depthBlurTarget, "DOF.Composite");
context->BindSR(0, bokehTarget);
context->BindSR(1, dofOutput);
context->SetRenderTarget(*compositeTarget);
context->BindSR(1, frame);
context->SetRenderTarget(*tmp);
context->SetViewportAndScissors((float)dofWidth, (float)dofHeight);
context->SetState(_psBokehComposite);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
RenderTargetPool::Release(bokehTarget);
RenderTargetPool::Release(dofOutput);
dofOutput = compositeTarget;
Swap(frame, tmp);
}
RenderTargetPool::Release(depthBlurTarget);
// Return output temporary render target
return dofOutput;
}

View File

@@ -42,9 +42,9 @@ public:
/// Perform Depth Of Field rendering for the input task
/// </summary>
/// <param name="renderContext">The rendering context.</param>
/// <param name="input">Target with rendered HDR frame</param>
/// <returns>Allocated temporary render target, should be released by the called. Can be null if pass skipped.</returns>
GPUTexture* Render(RenderContext& renderContext, GPUTexture* input);
/// <param name="frame">Input and output frame (leave unchanged when not using this effect).</param>
/// <param name="tmp">Temporary frame (the same format as frame)</param>
void Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp);
private:
GPUTexture* getDofBokehShape(DepthOfFieldSettings& dofSettings);

View File

@@ -258,15 +258,15 @@ void MotionBlurPass::RenderDebug(RenderContext& renderContext, GPUTextureView* f
context->ResetSR();
}
void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output)
void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp)
{
const bool isCameraCut = renderContext.Task->IsCameraCut;
const auto motionVectors = renderContext.Buffers->MotionVectors;
ASSERT(motionVectors);
auto context = GPUDevice::Instance->GetMainContext();
MotionBlurSettings& settings = renderContext.List->Settings.MotionBlur;
const int32 screenWidth = input->Width();
const int32 screenHeight = input->Height();
const int32 screenWidth = frame->Width();
const int32 screenHeight = frame->Height();
const int32 motionVectorsWidth = screenWidth / static_cast<int32>(settings.MotionVectorsResolution);
const int32 motionVectorsHeight = screenHeight / static_cast<int32>(settings.MotionVectorsResolution);
if ((renderContext.View.Flags & ViewFlags::MotionBlur) == 0 ||
@@ -368,13 +368,13 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP
// Render motion blur
context->ResetRenderTarget();
context->SetRenderTarget(*output);
context->SetRenderTarget(*tmp);
context->SetViewportAndScissors((float)screenWidth, (float)screenHeight);
context->BindSR(0, input->View());
context->BindSR(0, frame->View());
context->BindSR(1, motionVectors->View());
context->BindSR(2, vMaxNeighborBuffer->View());
context->BindSR(3, renderContext.Buffers->DepthBuffer->View());
data.Input0SizeInv = Float2(1.0f / (float)input->Width(), 1.0f / (float)input->Height());
data.Input0SizeInv = Float2(1.0f / (float)screenWidth, 1.0f / (float)screenHeight);
data.Input2SizeInv = Float2(1.0f / (float)renderContext.Buffers->DepthBuffer->Width(), 1.0f / (float)renderContext.Buffers->DepthBuffer->Height());
context->UpdateCB(cb, &data);
context->SetState(_psMotionBlur);
@@ -384,5 +384,5 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP
RenderTargetPool::Release(vMaxNeighborBuffer);
context->ResetSR();
context->ResetRenderTarget();
Swap(output, input);
Swap(frame, tmp);
}

View File

@@ -46,9 +46,9 @@ public:
/// Renders the motion blur. Swaps the input with output if rendering is performed. Does nothing if rendering is not performed.
/// </summary>
/// <param name="renderContext">The rendering context.</param>
/// <param name="input">The input frame.</param>
/// <param name="output">The output frame.</param>
void Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output);
/// <param name="frame">Input and output frame (leave unchanged when not using this effect).</param>
/// <param name="tmp">Temporary frame (the same format as frame)</param>
void Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp);
private:

View File

@@ -397,7 +397,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
// Get the light accumulation buffer
auto outputFormat = renderContext.Buffers->GetOutputFormat();
auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), outputFormat);
auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), outputFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
auto lightBuffer = RenderTargetPool::Get(tempDesc);
RENDER_TARGET_POOL_SET_NAME(lightBuffer, "LightBuffer");
@@ -587,8 +587,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
}
// Depth of Field
auto dofTemporary = DepthOfFieldPass::Instance()->Render(renderContext, frameBuffer);
frameBuffer = dofTemporary ? dofTemporary : frameBuffer;
DepthOfFieldPass::Instance()->Render(renderContext, frameBuffer, tempBuffer);
// Motion Blur
MotionBlurPass::Instance()->Render(renderContext, frameBuffer, tempBuffer);
@@ -600,7 +599,6 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
EyeAdaptationPass::Instance()->Render(renderContext, frameBuffer);
PostProcessingPass::Instance()->Render(renderContext, frameBuffer, tempBuffer, colorGradingLUT);
RenderTargetPool::Release(colorGradingLUT);
RenderTargetPool::Release(dofTemporary);
Swap(frameBuffer, tempBuffer);
// Cleanup