Fix regression crash when using DoF, optimize DoF usage of GPU memory

2023-01-09 16:06:47 +01:00
parent 8c9a3055e1
commit 70cce0e1ee
5 changed files with 34 additions and 60 deletions
--- a/Source/Engine/Renderer/DepthOfFieldPass.cpp
+++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp
@@ -200,10 +200,10 @@ GPUTexture* DepthOfFieldPass::getDofBokehShape(DepthOfFieldSettings& dofSettings
    return result ? result->GetTexture() : nullptr;
 }

-GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* input)
+void DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp)
 {
    if (!_platformSupportsDoF || checkIfSkipPass())
-        return nullptr;
+        return;
    auto device = GPUDevice::Instance;
    auto context = device->GetMainContext();
    const auto depthBuffer = renderContext.Buffers->DepthBuffer;
@@ -211,7 +211,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    DepthOfFieldSettings& dofSettings = renderContext.List->Settings.DepthOfField;
    const bool useDoF = _platformSupportsDoF && (renderContext.View.Flags & ViewFlags::DepthOfField) != 0 && dofSettings.Enabled;
    if (!useDoF)
-        return nullptr;
+        return;
    PROFILE_GPU_CPU("Depth Of Field");

    context->ResetSR();
@@ -222,8 +222,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    const int32 dofResolutionDivider = 1;
    const int32 bokehResolutionDivider = 1;
    // TODO: in low-res DoF maybe use shared HalfResDepth?
-    const int32 w1 = input->Width();
-    const int32 h1 = input->Height();
+    const int32 w1 = frame->Width();
+    const int32 h1 = frame->Height();
    const int32 cocWidth = w1 / cocResolutionDivider;
    const int32 cocHeight = h1 / cocResolutionDivider;
    const int32 dofWidth = w1 / dofResolutionDivider;
@@ -241,8 +241,6 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i

    // TODO: maybe we could render particles (whole transparency in general) to the depth buffer to apply DoF on them as well?

-    // TODO: reduce amount of used temporary render targets, we could plan rendering steps in more static way and hardcode some logic to make it run faster with less memory usage (less bandwitch)
-
    // Setup constant buffer
    {
        float nearPlane = renderContext.View.Near;
@@ -299,8 +297,6 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    // Peek temporary render target for dof pass
    auto dofFormat = renderContext.Buffers->GetOutputFormat();
    tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
-    GPUTexture* dofInput = RenderTargetPool::Get(tempDesc);
-    RENDER_TARGET_POOL_SET_NAME(dofInput, "DOF.Output");

    // Do the bokeh point generation, or just do a copy if disabled
    bool isBokehGenerationEnabled = dofSettings.BokehEnabled && _platformSupportsBokeh && dofSettings.BokehBrightness > 0.0f && dofSettings.BokehSize > 0.0f;
@@ -318,7 +314,7 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
            if (_bokehBuffer->Init(GPUBufferDescription::StructuredAppend(minRequiredElements, elementStride)))
            {
                LOG(Fatal, "Cannot create buffer {0}.", TEXT("Bokeh Buffer"));
-                return nullptr;
+                return;
            }
        }

@@ -326,10 +322,10 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        context->ResetCounter(_bokehBuffer);

        // Generate bokeh points
-        context->BindSR(0, input);
+        context->BindSR(0, frame);
        context->BindSR(1, depthBlurTarget);
        context->BindUA(1, _bokehBuffer->View());
-        context->SetRenderTarget(*dofInput);
+        context->SetRenderTarget(*tmp);
        context->SetViewportAndScissors((float)dofWidth, (float)dofHeight);
        context->SetState(_psBokehGeneration);
        context->DrawFullscreenTriangle();
@@ -337,33 +333,26 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
    else
    {
        // Generate bokeh points
-        context->BindSR(0, input);
+        context->BindSR(0, frame);
        context->BindSR(1, depthBlurTarget);
-        context->SetRenderTarget(*dofInput);
+        context->SetRenderTarget(*tmp);
        context->SetViewportAndScissors((float)dofWidth, (float)dofHeight);
        context->SetState(_psDoNotGenerateBokeh);
        context->DrawFullscreenTriangle();
    }
+    Swap(frame, tmp);

    // Do depth of field (using compute shaders in full resolution)
-    GPUTexture* dofOutput;
    context->ResetRenderTarget();
    context->ResetSR();
    context->ResetUA();
    context->FlushState();
    {
-        // Peek temporary targets for two blur passes
-        tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
-        auto dofTargetH = RenderTargetPool::Get(tempDesc);
-        auto dofTargetV = RenderTargetPool::Get(tempDesc);
-        RENDER_TARGET_POOL_SET_NAME(dofTargetH, "DOF.TargetH");
-        RENDER_TARGET_POOL_SET_NAME(dofTargetV, "DOF.TargetV");
-
        // Horizontal pass
-        context->BindSR(0, dofInput);
+        context->BindSR(0, frame);
        context->BindSR(1, depthBlurTarget);
        //
-        context->BindUA(0, dofTargetH->View());
+        context->BindUA(0, tmp->View());
        //
        uint32 groupCountX = (dofWidth / DOF_GRID_SIZE) + ((dofWidth % DOF_GRID_SIZE) > 0 ? 1 : 0);
        uint32 groupCountY = dofHeight;
@@ -376,9 +365,9 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        context->ResetSR();

        // Vertical pass
-        context->BindUA(0, dofTargetV->View());
+        context->BindUA(0, frame->View());
        //
-        context->BindSR(0, dofTargetH);
+        context->BindSR(0, tmp);
        context->BindSR(1, depthBlurTarget);
        //
        groupCountX = dofWidth;
@@ -391,20 +380,14 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        // Cleanup
        context->ResetUA();
        context->ResetSR();
-        RenderTargetPool::Release(dofTargetH);
-
-        dofOutput = dofTargetV;
    }

-    // Cleanup temporary texture
-    RenderTargetPool::Release(dofInput);
-
    // Render the bokeh points
    if (isBokehGenerationEnabled)
    {
        tempDesc = GPUTextureDescription::New2D(bokehTargetWidth, bokehTargetHeight, dofFormat);
        auto bokehTarget = RenderTargetPool::Get(tempDesc);
-        RENDER_TARGET_POOL_SET_NAME(depthBlurTarget, "DOF.Bokeh");
+        RENDER_TARGET_POOL_SET_NAME(bokehTarget, "DOF.Bokeh");
        context->Clear(*bokehTarget, Color::Black);

        {
@@ -426,24 +409,17 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
        }

        // Composite the bokeh rendering results with the depth of field result
-        tempDesc = GPUTextureDescription::New2D(dofWidth, dofHeight, dofFormat);
-        auto compositeTarget = RenderTargetPool::Get(tempDesc);
-        RENDER_TARGET_POOL_SET_NAME(depthBlurTarget, "DOF.Composite");
        context->BindSR(0, bokehTarget);
-        context->BindSR(1, dofOutput);
-        context->SetRenderTarget(*compositeTarget);
+        context->BindSR(1, frame);
+        context->SetRenderTarget(*tmp);
        context->SetViewportAndScissors((float)dofWidth, (float)dofHeight);
        context->SetState(_psBokehComposite);
        context->DrawFullscreenTriangle();
        context->ResetRenderTarget();

        RenderTargetPool::Release(bokehTarget);
-        RenderTargetPool::Release(dofOutput);
-        dofOutput = compositeTarget;
+        Swap(frame, tmp);
    }

    RenderTargetPool::Release(depthBlurTarget);
-
-    // Return output temporary render target
-    return dofOutput;
 }
--- a/Source/Engine/Renderer/DepthOfFieldPass.h
+++ b/Source/Engine/Renderer/DepthOfFieldPass.h
@@ -42,9 +42,9 @@ public:
    /// Perform Depth Of Field rendering for the input task
    /// </summary>
    /// <param name="renderContext">The rendering context.</param>
-    /// <param name="input">Target with rendered HDR frame</param>
-    /// <returns>Allocated temporary render target, should be released by the called. Can be null if pass skipped.</returns>
-    GPUTexture* Render(RenderContext& renderContext, GPUTexture* input);
+    /// <param name="frame">Input and output frame (leave unchanged when not using this effect).</param>
+    /// <param name="tmp">Temporary frame (the same format as frame)</param>
+    void Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp);

 private:
    GPUTexture* getDofBokehShape(DepthOfFieldSettings& dofSettings);
--- a/Source/Engine/Renderer/MotionBlurPass.cpp
+++ b/Source/Engine/Renderer/MotionBlurPass.cpp
@@ -258,15 +258,15 @@ void MotionBlurPass::RenderDebug(RenderContext& renderContext, GPUTextureView* f
    context->ResetSR();
 }

-void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output)
+void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp)
 {
    const bool isCameraCut = renderContext.Task->IsCameraCut;
    const auto motionVectors = renderContext.Buffers->MotionVectors;
    ASSERT(motionVectors);
    auto context = GPUDevice::Instance->GetMainContext();
    MotionBlurSettings& settings = renderContext.List->Settings.MotionBlur;
-    const int32 screenWidth = input->Width();
-    const int32 screenHeight = input->Height();
+    const int32 screenWidth = frame->Width();
+    const int32 screenHeight = frame->Height();
    const int32 motionVectorsWidth = screenWidth / static_cast<int32>(settings.MotionVectorsResolution);
    const int32 motionVectorsHeight = screenHeight / static_cast<int32>(settings.MotionVectorsResolution);
    if ((renderContext.View.Flags & ViewFlags::MotionBlur) == 0 ||
@@ -368,13 +368,13 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP

    // Render motion blur
    context->ResetRenderTarget();
-    context->SetRenderTarget(*output);
+    context->SetRenderTarget(*tmp);
    context->SetViewportAndScissors((float)screenWidth, (float)screenHeight);
-    context->BindSR(0, input->View());
+    context->BindSR(0, frame->View());
    context->BindSR(1, motionVectors->View());
    context->BindSR(2, vMaxNeighborBuffer->View());
    context->BindSR(3, renderContext.Buffers->DepthBuffer->View());
-    data.Input0SizeInv = Float2(1.0f / (float)input->Width(), 1.0f / (float)input->Height());
+    data.Input0SizeInv = Float2(1.0f / (float)screenWidth, 1.0f / (float)screenHeight);
    data.Input2SizeInv = Float2(1.0f / (float)renderContext.Buffers->DepthBuffer->Width(), 1.0f / (float)renderContext.Buffers->DepthBuffer->Height());
    context->UpdateCB(cb, &data);
    context->SetState(_psMotionBlur);
@@ -384,5 +384,5 @@ void MotionBlurPass::Render(RenderContext& renderContext, GPUTexture*& input, GP
    RenderTargetPool::Release(vMaxNeighborBuffer);
    context->ResetSR();
    context->ResetRenderTarget();
-    Swap(output, input);
+    Swap(frame, tmp);
 }
--- a/Source/Engine/Renderer/MotionBlurPass.h
+++ b/Source/Engine/Renderer/MotionBlurPass.h
@@ -46,9 +46,9 @@ public:
    /// Renders the motion blur. Swaps the input with output if rendering is performed. Does nothing if rendering is not performed.
    /// </summary>
    /// <param name="renderContext">The rendering context.</param>
-    /// <param name="input">The input frame.</param>
-    /// <param name="output">The output frame.</param>
-    void Render(RenderContext& renderContext, GPUTexture*& input, GPUTexture*& output);
+    /// <param name="frame">Input and output frame (leave unchanged when not using this effect).</param>
+    /// <param name="tmp">Temporary frame (the same format as frame)</param>
+    void Render(RenderContext& renderContext, GPUTexture*& frame, GPUTexture*& tmp);

 private:

--- a/Source/Engine/Renderer/Renderer.cpp
+++ b/Source/Engine/Renderer/Renderer.cpp
@@ -397,7 +397,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont

    // Get the light accumulation buffer
    auto outputFormat = renderContext.Buffers->GetOutputFormat();
-    auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), outputFormat);
+    auto tempDesc = GPUTextureDescription::New2D(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight(), outputFormat, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::UnorderedAccess);
    auto lightBuffer = RenderTargetPool::Get(tempDesc);
    RENDER_TARGET_POOL_SET_NAME(lightBuffer, "LightBuffer");

@@ -587,8 +587,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
    }

    // Depth of Field
-    auto dofTemporary = DepthOfFieldPass::Instance()->Render(renderContext, frameBuffer);
-    frameBuffer = dofTemporary ? dofTemporary : frameBuffer;
+    DepthOfFieldPass::Instance()->Render(renderContext, frameBuffer, tempBuffer);

    // Motion Blur
    MotionBlurPass::Instance()->Render(renderContext, frameBuffer, tempBuffer);
@@ -600,7 +599,6 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
    EyeAdaptationPass::Instance()->Render(renderContext, frameBuffer);
    PostProcessingPass::Instance()->Render(renderContext, frameBuffer, tempBuffer, colorGradingLUT);
    RenderTargetPool::Release(colorGradingLUT);
-    RenderTargetPool::Release(dofTemporary);
    Swap(frameBuffer, tempBuffer);

    // Cleanup