From 70cce0e1ee957c698f76882dfee114be3ff66801 Mon Sep 17 00:00:00 2001 From: Wojciech Figat Date: Mon, 9 Jan 2023 16:06:47 +0100 Subject: [PATCH] Fix regression crash when using DoF, optimize DoF usage of GPU memory --- Source/Engine/Renderer/DepthOfFieldPass.cpp | 62 +++++++-------------- Source/Engine/Renderer/DepthOfFieldPass.h | 6 +- Source/Engine/Renderer/MotionBlurPass.cpp | 14 ++--- Source/Engine/Renderer/MotionBlurPass.h | 6 +- Source/Engine/Renderer/Renderer.cpp | 6 +- 5 files changed, 34 insertions(+), 60 deletions(-) diff --git a/Source/Engine/Renderer/DepthOfFieldPass.cpp b/Source/Engine/Renderer/DepthOfFieldPass.cpp index e3a76ad84..aad8a035f 100644 --- a/Source/Engine/Renderer/DepthOfFieldPass.cpp +++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp @@ -200,10 +200,10 @@ GPUTexture* DepthOfFieldPass::getDofBokehShape(DepthOfFieldSettings& dofSettings return result ? result->GetTexture() : nullptr; } -GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* input) +void DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp) { if (!_platformSupportsDoF || checkIfSkipPass()) - return nullptr; + return; auto device = GPUDevice::Instance; auto context = device->GetMainContext(); const auto depthBuffer = renderContext.Buffers->DepthBuffer; @@ -211,7 +211,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i DepthOfFieldSettings& dofSettings = renderContext.List->Settings.DepthOfField; const bool useDoF = _platformSupportsDoF && (renderContext.View.Flags & ViewFlags::DepthOfField) != 0 && dofSettings.Enabled; if (!useDoF) - return nullptr; + return; PROFILE_GPU_CPU("Depth Of Field"); context->ResetSR(); @@ -222,8 +222,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i const int32 dofResolutionDivider = 1; const int32 bokehResolutionDivider = 1; // TODO: in low-res DoF maybe use shared HalfResDepth? - const int32 w1 = input->Width(); - const int32 h1 = input->Height(); + const int32 w1 = frame->Width(); + const int32 h1 = frame->Height(); const int32 cocWidth = w1 / cocResolutionDivider; const int32 cocHeight = h1 / cocResolutionDivider; const int32 dofWidth = w1 / dofResolutionDivider; @@ -241,8 +241,6 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i // TODO: maybe we could render particles (whole transparency in general) to the depth buffer to apply DoF on them as well? - // TODO: reduce amount of used temporary render targets, we could plan rendering steps in more static way and hardcode some logic to make it run faster with less memory usage (less bandwitch) - // Setup constant buffer { float nearPlane = renderContext.View.Near; @@ -299,8 +297,6 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i // Peek temporary render target for dof pass auto dofFormat = renderContext.Buffers->GetOutputFormat(); tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat); - GPUTexture* dofInput = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(dofInput, "DOF.Output"); // Do the bokeh point generation, or just do a copy if disabled bool isBokehGenerationEnabled = dofSettings.BokehEnabled && _platformSupportsBokeh && dofSettings.BokehBrightness > 0.0f && dofSettings.BokehSize > 0.0f; @@ -318,7 +314,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i if (_bokehBuffer->Init(GPUBufferDescription::StructuredAppend(minRequiredElements, elementStride))) { LOG(Fatal, "Cannot create buffer {0}.", TEXT("Bokeh Buffer")); - return nullptr; + return; } } @@ -326,10 +322,10 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i context->ResetCounter(_bokehBuffer); // Generate bokeh points - context->BindSR(0, input); + context->BindSR(0, frame); context->BindSR(1, depthBlurTarget); context->BindUA(1, _bokehBuffer->View()); - context->SetRenderTarget(*dofInput); + context->SetRenderTarget(*tmp); context->SetViewportAndScissors((float)dofWidth, (float)dofHeight); context->SetState(_psBokehGeneration); context->DrawFullscreenTriangle(); @@ -337,33 +333,26 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i else { // Generate bokeh points - context->BindSR(0, input); + context->BindSR(0, frame); context->BindSR(1, depthBlurTarget); - context->SetRenderTarget(*dofInput); + context->SetRenderTarget(*tmp); context->SetViewportAndScissors((float)dofWidth, (float)dofHeight); context->SetState(_psDoNotGenerateBokeh); context->DrawFullscreenTriangle(); } + Swap(frame, tmp); // Do depth of field (using compute shaders in full resolution) - GPUTexture* dofOutput; context->ResetRenderTarget(); context->ResetSR(); context->ResetUA(); context->FlushState(); { - // Peek temporary targets for two blur passes - tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess); - auto dofTargetH = RenderTargetPool::Get(tempDesc); - auto dofTargetV = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(dofTargetH, "DOF.TargetH"); - RENDER_TARGET_POOL_SET_NAME(dofTargetV, "DOF.TargetV"); - // Horizontal pass - context->BindSR(0, dofInput); + context->BindSR(0, frame); context->BindSR(1, depthBlurTarget); // - context->BindUA(0, dofTargetH->View()); + context->BindUA(0, tmp->View()); // uint32 groupCountX = (dofWidth / DOF_GRID_SIZE) + ((dofWidth % DOF_GRID_SIZE) > 0 ? 1 : 0); uint32 groupCountY = dofHeight; @@ -376,9 +365,9 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i context->ResetSR(); // Vertical pass - context->BindUA(0, dofTargetV->View()); + context->BindUA(0, frame->View()); // - context->BindSR(0, dofTargetH); + context->BindSR(0, tmp); context->BindSR(1, depthBlurTarget); // groupCountX = dofWidth; @@ -391,20 +380,14 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i // Cleanup context->ResetUA(); context->ResetSR(); - RenderTargetPool::Release(dofTargetH); - - dofOutput = dofTargetV; } - // Cleanup temporary texture - RenderTargetPool::Release(dofInput); - // Render the bokeh points if (isBokehGenerationEnabled) { tempDesc = GPUTextureDescription::New2D(bokehTargetWidth, bokehTargetHeight, dofFormat); auto bokehTarget = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(depthBlurTarget, "DOF.Bokeh"); + RENDER_TARGET_POOL_SET_NAME(bokehTarget, "DOF.Bokeh"); context->Clear(*bokehTarget, Color::Black); { @@ -426,24 +409,17 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i } // Composite the bokeh rendering results with the depth of field result - tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat); - auto compositeTarget = RenderTargetPool::Get(tempDesc); - RENDER_TARGET_POOL_SET_NAME(depthBlurTarget, "DOF.Composite"); context->BindSR(0, bokehTarget); - context->BindSR(1, dofOutput); - context->SetRenderTarget(*compositeTarget); + context->BindSR(1, frame); + context->SetRenderTarget(*tmp); context->SetViewportAndScissors((float)dofWidth, (float)dofHeight); context->SetState(_psBokehComposite); context->DrawFullscreenTriangle(); context->ResetRenderTarget(); RenderTargetPool::Release(bokehTarget); - RenderTargetPool::Release(dofOutput); - dofOutput = compositeTarget; + Swap(frame, tmp); } RenderTargetPool::Release(depthBlurTarget); - - // Return output temporary render target - return dofOutput; } diff --git a/Source/Engine/Renderer/DepthOfFieldPass.h b/Source/Engine/Renderer/DepthOfFieldPass.h index 9b2841fbd..5350e59be 100644 --- a/Source/Engine/Renderer/DepthOfFieldPass.h +++ b/Source/Engine/Renderer/DepthOfFieldPass.h @@ -42,9 +42,9 @@ public: /// Perform Depth Of Field rendering for the input task /// /// The rendering context. - /// Target with rendered HDR frame - /// Allocated temporary render target, should be released by the called. Can be null if pass skipped. - GPUTexture* Render(RenderContext& renderContext, GPUTexture* input); + /// Input and output frame (leave unchanged when not using this effect). + /// Temporary frame (the same format as frame) + void Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp); private: GPUTexture* getDofBokehShape(DepthOfFieldSettings& dofSettings); diff --git a/Source/Engine/Renderer/MotionBlurPass.cpp b/Source/Engine/Renderer/MotionBlurPass.cpp index ba7b20569..dbb9b801e 100644 --- a/Source/Engine/Renderer/MotionBlurPass.cpp +++ b/Source/Engine/Renderer/MotionBlurPass.cpp @@ -258,15 +258,15 @@ void MotionBlurPass::RenderDebug(RenderContext& renderContext, GPUTextureView* f context->ResetSR(); } -void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output) +void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp) { const bool isCameraCut = renderContext.Task->IsCameraCut; const auto motionVectors = renderContext.Buffers->MotionVectors; ASSERT(motionVectors); auto context = GPUDevice::Instance->GetMainContext(); MotionBlurSettings& settings = renderContext.List->Settings.MotionBlur; - const int32 screenWidth = input->Width(); - const int32 screenHeight = input->Height(); + const int32 screenWidth = frame->Width(); + const int32 screenHeight = frame->Height(); const int32 motionVectorsWidth = screenWidth / static_cast(settings.MotionVectorsResolution); const int32 motionVectorsHeight = screenHeight / static_cast(settings.MotionVectorsResolution); if ((renderContext.View.Flags & ViewFlags::MotionBlur) == 0 || @@ -368,13 +368,13 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP // Render motion blur context->ResetRenderTarget(); - context->SetRenderTarget(*output); + context->SetRenderTarget(*tmp); context->SetViewportAndScissors((float)screenWidth, (float)screenHeight); - context->BindSR(0, input->View()); + context->BindSR(0, frame->View()); context->BindSR(1, motionVectors->View()); context->BindSR(2, vMaxNeighborBuffer->View()); context->BindSR(3, renderContext.Buffers->DepthBuffer->View()); - data.Input0SizeInv = Float2(1.0f / (float)input->Width(), 1.0f / (float)input->Height()); + data.Input0SizeInv = Float2(1.0f / (float)screenWidth, 1.0f / (float)screenHeight); data.Input2SizeInv = Float2(1.0f / (float)renderContext.Buffers->DepthBuffer->Width(), 1.0f / (float)renderContext.Buffers->DepthBuffer->Height()); context->UpdateCB(cb, &data); context->SetState(_psMotionBlur); @@ -384,5 +384,5 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP RenderTargetPool::Release(vMaxNeighborBuffer); context->ResetSR(); context->ResetRenderTarget(); - Swap(output, input); + Swap(frame, tmp); } diff --git a/Source/Engine/Renderer/MotionBlurPass.h b/Source/Engine/Renderer/MotionBlurPass.h index 3ea11897d..1ff1dc8eb 100644 --- a/Source/Engine/Renderer/MotionBlurPass.h +++ b/Source/Engine/Renderer/MotionBlurPass.h @@ -46,9 +46,9 @@ public: /// Renders the motion blur. Swaps the input with output if rendering is performed. Does nothing if rendering is not performed. /// /// The rendering context. - /// The input frame. - /// The output frame. - void Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output); + /// Input and output frame (leave unchanged when not using this effect). + /// Temporary frame (the same format as frame) + void Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp); private: diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 0ab25f6e5..f7f5c88ba 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -397,7 +397,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Get the light accumulation buffer auto outputFormat = renderContext.Buffers->GetOutputFormat(); - auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), outputFormat); + auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), outputFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess); auto lightBuffer = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(lightBuffer, "LightBuffer"); @@ -587,8 +587,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont } // Depth of Field - auto dofTemporary = DepthOfFieldPass::Instance()->Render(renderContext, frameBuffer); - frameBuffer = dofTemporary ? dofTemporary : frameBuffer; + DepthOfFieldPass::Instance()->Render(renderContext, frameBuffer, tempBuffer); // Motion Blur MotionBlurPass::Instance()->Render(renderContext, frameBuffer, tempBuffer); @@ -600,7 +599,6 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont EyeAdaptationPass::Instance()->Render(renderContext, frameBuffer); PostProcessingPass::Instance()->Render(renderContext, frameBuffer, tempBuffer, colorGradingLUT); RenderTargetPool::Release(colorGradingLUT); - RenderTargetPool::Release(dofTemporary); Swap(frameBuffer, tempBuffer); // Cleanup