From 88cf1bcd28582aae7a93d331a3d47ed7f6e906b1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 7 Oct 2021 14:15:45 +0200 Subject: [PATCH] Refactor UAV slots binding when rendering from PS into UAV --- Source/Engine/Graphics/GPUContext.h | 7 -- .../DirectX/DX11/GPUContextDX11.cpp | 99 +++++-------------- .../DirectX/DX11/GPUContextDX11.h | 4 +- .../DirectX/DX12/GPUContextDX12.cpp | 16 +-- .../DirectX/DX12/GPUContextDX12.h | 3 +- .../GraphicsDevice/Null/GPUContextNull.h | 4 - .../Vulkan/GPUContextVulkan.cpp | 6 -- .../GraphicsDevice/Vulkan/GPUContextVulkan.h | 1 - Source/Engine/Renderer/DepthOfFieldPass.cpp | 14 ++- 9 files changed, 38 insertions(+), 116 deletions(-) diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index c46e81d46..721bd406a 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -305,13 +305,6 @@ public: /// The array with render targets to bind. API_FUNCTION() virtual void SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) = 0; - /// - /// Sets the render target and unordered access output. - /// - /// The render target to bind to output. - /// The unordered access buffer to bind to output. - API_FUNCTION() virtual void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) = 0; - public: /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index ed2a13ae0..7221ee05b 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -49,7 +49,6 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte , _omDirtyFlag(false) , _rtCount(0) , _rtDepth(nullptr) - , _uaOutput(nullptr) , _srDirtyFlag(false) , _uaDirtyFlag(false) , _cbDirtyFlag(false) @@ -61,9 +60,9 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte #endif // Only DirectX 11 supports more than 1 UAV - _maxUASlotsForCS = GPU_MAX_UA_BINDED; + _maxUASlots = GPU_MAX_UA_BINDED; if (_device->GetRendererType() != RendererType::DirectX11) - _maxUASlotsForCS = 1; + _maxUASlots = 1; } GPUContextDX11::~GPUContextDX11() @@ -86,7 +85,6 @@ void GPUContextDX11::FrameBegin() _rtCount = 0; _currentState = nullptr; _rtDepth = nullptr; - _uaOutput = nullptr; Platform::MemoryClear(_rtHandles, sizeof(_rtHandles)); Platform::MemoryClear(_srHandles, sizeof(_srHandles)); Platform::MemoryClear(_uaHandles, sizeof(_uaHandles)); @@ -180,12 +178,11 @@ void GPUContextDX11::ClearUA(GPUBuffer* buf, const Vector4& value) void GPUContextDX11::ResetRenderTarget() { - if (_rtCount != 0 || _uaOutput || _rtDepth) + if (_rtCount != 0 || _rtDepth) { _omDirtyFlag = true; _rtCount = 0; _rtDepth = nullptr; - _uaOutput = nullptr; Platform::MemoryClear(_rtHandles, sizeof(_rtHandles)); @@ -200,13 +197,12 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* rt) ID3D11RenderTargetView* rtv = rtDX11 ? rtDX11->RTV() : nullptr; int32 newRtCount = rtv ? 1 : 0; - if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != nullptr || _uaOutput) + if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != nullptr) { _omDirtyFlag = true; _rtCount = newRtCount; _rtDepth = nullptr; _rtHandles[0] = rtv; - _uaOutput = nullptr; } } @@ -219,13 +215,12 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureView ID3D11DepthStencilView* dsv = depthBufferDX11 ? depthBufferDX11->DSV() : nullptr; int32 newRtCount = rtv ? 1 : 0; - if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != dsv || _uaOutput) + if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != dsv) { _omDirtyFlag = true; _rtCount = newRtCount; _rtDepth = dsv; _rtHandles[0] = rtv; - _uaOutput = nullptr; } } @@ -244,35 +239,15 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, const Span(rt); - auto uaOutputDX11 = reinterpret_cast(uaOutput); - - ID3D11RenderTargetView* rtv = rtDX11 ? rtDX11->RTV() : nullptr; - ID3D11UnorderedAccessView* uav = uaOutputDX11 ? ((GPUBufferViewDX11*)uaOutputDX11->View())->UAV() : nullptr; - int32 newRtCount = rtv ? 1 : 0; - - if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != nullptr || _uaOutput != uav) - { - _omDirtyFlag = true; - _rtCount = newRtCount; - _rtDepth = nullptr; - _rtHandles[0] = rtv; - _uaOutput = uav; - } -} - void GPUContextDX11::ResetSR() { _srDirtyFlag = false; @@ -291,7 +266,8 @@ void GPUContextDX11::ResetUA() _uaDirtyFlag = false; Platform::MemoryClear(_uaHandles, sizeof(_uaHandles)); - _context->CSSetUnorderedAccessViews(0, _maxUASlotsForCS, _uaHandles, nullptr); + _context->CSSetUnorderedAccessViews(0, _maxUASlots, _uaHandles, nullptr); + _context->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 0, 0, nullptr, nullptr); } void GPUContextDX11::ResetCB() @@ -791,10 +767,8 @@ void GPUContextDX11::CopySubresource(GPUResource* dstResource, uint32 dstSubreso void GPUContextDX11::flushSRVs() { - // Check if need to flush shader resources if (_srDirtyFlag) { - // Clear flag _srDirtyFlag = false; // Flush with the driver @@ -816,24 +790,23 @@ void GPUContextDX11::flushSRVs() void GPUContextDX11::flushUAVs() { - // Check if need to flush unordered access if (_uaDirtyFlag) { - // Clear flag _uaDirtyFlag = false; // Flush with the driver uint32 initialCounts[GPU_MAX_UA_BINDED] = { 0 }; - _context->CSSetUnorderedAccessViews(0, _maxUASlotsForCS, _uaHandles, initialCounts); + if (CurrentCS) + _context->CSSetUnorderedAccessViews(0, _maxUASlots, _uaHandles, initialCounts); + else + _context->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, _rtCount, _maxUASlots - _rtCount, _uaHandles + _rtCount, initialCounts); } } void GPUContextDX11::flushCBs() { - // Check if need to flush constant buffers if (_cbDirtyFlag) { - // Clear flag _cbDirtyFlag = false; // Flush with the driver @@ -855,47 +828,29 @@ void GPUContextDX11::flushCBs() void GPUContextDX11::flushOM() { - // Check if need to flush output merger state or/and unordered access views if (_omDirtyFlag) { -#if _DEBUG - // Validate binded render targets amount - int32 rtCount = 0; - for (int i = 0; i < ARRAY_COUNT(_rtHandles) && i < _rtCount; i++) - { - if (_rtHandles[i] != nullptr) - rtCount++; - else - break; - } - ASSERT(rtCount == _rtCount); -#endif - - // Check if don't use UAVs and set output merger render targets table - if (_uaOutput == nullptr) - { - _context->OMSetRenderTargets(_rtCount, _rtHandles, _rtDepth); - } - else - { - // Note: current dx11 content implementation assumes that there must be one or more render targets binded in order to use uav output - ASSERT(_rtCount > 0); - - uint32 initialCounts[1] = { 0 }; - // TODO: set -1 if buffer had no ResetCounter call since last time - _context->OMSetRenderTargetsAndUnorderedAccessViews(_rtCount, _rtHandles, _rtDepth, _rtCount, 1, &_uaOutput, initialCounts); - } - - // Clear flag _omDirtyFlag = false; + int32 uaCount = 0; + for (int32 i = _maxUASlots - 1; i >= 0; i--) + { + if (_uaHandles[i]) + { + uaCount = i + 1; + break; + } + } + + // Flush with the driver + if (uaCount > 0) + _context->OMSetRenderTargetsAndUnorderedAccessViews(_rtCount, _rtHandles, _rtDepth, 0, D3D11_KEEP_UNORDERED_ACCESS_VIEWS, nullptr, nullptr); + else + _context->OMSetRenderTargets(_rtCount, _rtHandles, _rtDepth); } } void GPUContextDX11::onDrawCall() { - ASSERT(_currentState); - - // Flush flushCBs(); flushSRVs(); flushUAVs(); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index 0ec811565..76b91f26e 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -23,14 +23,13 @@ private: #if GPU_ALLOW_PROFILE_EVENTS ID3DUserDefinedAnnotation* _userDefinedAnnotations; #endif - int32 _maxUASlotsForCS; + int32 _maxUASlots; // Output Merger bool _omDirtyFlag; int32 _rtCount; ID3D11DepthStencilView* _rtDepth; ID3D11RenderTargetView* _rtHandles[GPU_MAX_RT_BINDED]; - ID3D11UnorderedAccessView* _uaOutput; // Shader Resources bool _srDirtyFlag; @@ -113,7 +112,6 @@ public: void SetRenderTarget(GPUTextureView* rt) override; void SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureView* rt) override; void SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) override; - void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override; void ResetSR() override; void ResetUA() override; void ResetCB() override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 42b51f993..45df37e4d 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -417,11 +417,11 @@ void GPUContextDX12::flushUAVs() // Count UAVs required to be bind to the pipeline (the index of the most significant bit that's set) const uint32 uaCount = Math::FloorLog2(uaMask) + 1; - ASSERT(uaCount <= GPU_MAX_UA_BINDED + 1); + ASSERT(uaCount <= GPU_MAX_UA_BINDED); // Fill table with source descriptors DxShaderHeader& header = _currentCompute ? ((GPUShaderProgramCSDX12*)_currentCompute)->Header : _currentState->Header; - D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[GPU_MAX_UA_BINDED + 1]; + D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[GPU_MAX_UA_BINDED]; for (uint32 i = 0; i < uaCount; i++) { const auto handle = _uaHandles[i]; @@ -716,7 +716,6 @@ void GPUContextDX12::ClearDepth(GPUTextureView* depthBuffer, float depthValue) void GPUContextDX12::ClearUA(GPUBuffer* buf, const Vector4& value) { ASSERT(buf != nullptr && buf->IsUnorderedAccess()); - auto bufDX12 = reinterpret_cast(buf); SetResourceState(bufDX12, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); @@ -797,17 +796,6 @@ void GPUContextDX12::SetRenderTarget(GPUTextureView* depthBuffer, const SpanView() : nullptr; - - // Set render target normally - SetRenderTarget(nullptr, rt); - - // Bind UAV output to the 2nd slot (after render target to match DX11 binding model) - _uaHandles[1] = uaOutputDX12; -} - void GPUContextDX12::ResetSR() { for (int32 slot = 0; slot < GPU_MAX_SR_BINDED; slot++) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h index 1d5fa2f42..52e6b8226 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h @@ -158,11 +158,12 @@ public: void Clear(GPUTextureView* rt, const Color& color) override; void ClearDepth(GPUTextureView* depthBuffer, float depthValue) override; void ClearUA(GPUBuffer* buf, const Vector4& value) override; + void ClearUA(GPUBuffer* buf, const uint32 value[4]) override; + void ClearUA(GPUTexture* texture, const uint32 value[4]) override; void ResetRenderTarget() override; void SetRenderTarget(GPUTextureView* rt) override; void SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureView* rt) override; void SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) override; - void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override; void ResetSR() override; void ResetUA() override; void ResetCB() override; diff --git a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h index 96f2e9485..5d47b51bc 100644 --- a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h +++ b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h @@ -72,10 +72,6 @@ public: { } - void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override - { - } - void ResetSR() override { } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 1ad2c948b..42b93ca4c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -912,12 +912,6 @@ void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) override; - void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override; void ResetSR() override; void ResetUA() override; void ResetCB() override; diff --git a/Source/Engine/Renderer/DepthOfFieldPass.cpp b/Source/Engine/Renderer/DepthOfFieldPass.cpp index 3affcdfda..4ad3fdaf5 100644 --- a/Source/Engine/Renderer/DepthOfFieldPass.cpp +++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp @@ -349,7 +349,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i // Generate bokeh points context->BindSR(0, input); context->BindSR(1, depthBlurTarget); - context->SetRenderTarget(*dofInput, _bokehBuffer); + context->BindUA(1, _bokehBuffer->View()); + context->SetRenderTarget(*dofInput); context->SetViewportAndScissors((float)dofWidth, (float)dofHeight); context->SetState(_psBokehGeneration); context->DrawFullscreenTriangle(); @@ -390,9 +391,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i // Cleanup context->ResetRenderTarget(); - context->UnBindSR(0); - context->UnBindUA(0); - context->FlushState(); + context->ResetUA(); + context->ResetSR(); // Vertical pass context->BindUA(0, dofTargetV->View()); @@ -408,10 +408,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i context->ResetRenderTarget(); // Cleanup - context->UnBindSR(0); - context->UnBindSR(1); - context->UnBindUA(0); - context->FlushState(); + context->ResetUA(); + context->ResetSR(); RenderTargetPool::Release(dofTargetH); dofOutput = dofTargetV;