diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h
index c46e81d46..721bd406a 100644
--- a/Source/Engine/Graphics/GPUContext.h
+++ b/Source/Engine/Graphics/GPUContext.h
@@ -305,13 +305,6 @@ public:
/// The array with render targets to bind.
API_FUNCTION() virtual void SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) = 0;
- ///
- /// Sets the render target and unordered access output.
- ///
- /// The render target to bind to output.
- /// The unordered access buffer to bind to output.
- API_FUNCTION() virtual void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) = 0;
-
public:
///
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
index ed2a13ae0..7221ee05b 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
@@ -49,7 +49,6 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte
, _omDirtyFlag(false)
, _rtCount(0)
, _rtDepth(nullptr)
- , _uaOutput(nullptr)
, _srDirtyFlag(false)
, _uaDirtyFlag(false)
, _cbDirtyFlag(false)
@@ -61,9 +60,9 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte
#endif
// Only DirectX 11 supports more than 1 UAV
- _maxUASlotsForCS = GPU_MAX_UA_BINDED;
+ _maxUASlots = GPU_MAX_UA_BINDED;
if (_device->GetRendererType() != RendererType::DirectX11)
- _maxUASlotsForCS = 1;
+ _maxUASlots = 1;
}
GPUContextDX11::~GPUContextDX11()
@@ -86,7 +85,6 @@ void GPUContextDX11::FrameBegin()
_rtCount = 0;
_currentState = nullptr;
_rtDepth = nullptr;
- _uaOutput = nullptr;
Platform::MemoryClear(_rtHandles, sizeof(_rtHandles));
Platform::MemoryClear(_srHandles, sizeof(_srHandles));
Platform::MemoryClear(_uaHandles, sizeof(_uaHandles));
@@ -180,12 +178,11 @@ void GPUContextDX11::ClearUA(GPUBuffer* buf, const Vector4& value)
void GPUContextDX11::ResetRenderTarget()
{
- if (_rtCount != 0 || _uaOutput || _rtDepth)
+ if (_rtCount != 0 || _rtDepth)
{
_omDirtyFlag = true;
_rtCount = 0;
_rtDepth = nullptr;
- _uaOutput = nullptr;
Platform::MemoryClear(_rtHandles, sizeof(_rtHandles));
@@ -200,13 +197,12 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* rt)
ID3D11RenderTargetView* rtv = rtDX11 ? rtDX11->RTV() : nullptr;
int32 newRtCount = rtv ? 1 : 0;
- if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != nullptr || _uaOutput)
+ if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != nullptr)
{
_omDirtyFlag = true;
_rtCount = newRtCount;
_rtDepth = nullptr;
_rtHandles[0] = rtv;
- _uaOutput = nullptr;
}
}
@@ -219,13 +215,12 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureView
ID3D11DepthStencilView* dsv = depthBufferDX11 ? depthBufferDX11->DSV() : nullptr;
int32 newRtCount = rtv ? 1 : 0;
- if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != dsv || _uaOutput)
+ if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != dsv)
{
_omDirtyFlag = true;
_rtCount = newRtCount;
_rtDepth = dsv;
_rtHandles[0] = rtv;
- _uaOutput = nullptr;
}
}
@@ -244,35 +239,15 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, const Span(rt);
- auto uaOutputDX11 = reinterpret_cast(uaOutput);
-
- ID3D11RenderTargetView* rtv = rtDX11 ? rtDX11->RTV() : nullptr;
- ID3D11UnorderedAccessView* uav = uaOutputDX11 ? ((GPUBufferViewDX11*)uaOutputDX11->View())->UAV() : nullptr;
- int32 newRtCount = rtv ? 1 : 0;
-
- if (_rtCount != newRtCount || _rtHandles[0] != rtv || _rtDepth != nullptr || _uaOutput != uav)
- {
- _omDirtyFlag = true;
- _rtCount = newRtCount;
- _rtDepth = nullptr;
- _rtHandles[0] = rtv;
- _uaOutput = uav;
- }
-}
-
void GPUContextDX11::ResetSR()
{
_srDirtyFlag = false;
@@ -291,7 +266,8 @@ void GPUContextDX11::ResetUA()
_uaDirtyFlag = false;
Platform::MemoryClear(_uaHandles, sizeof(_uaHandles));
- _context->CSSetUnorderedAccessViews(0, _maxUASlotsForCS, _uaHandles, nullptr);
+ _context->CSSetUnorderedAccessViews(0, _maxUASlots, _uaHandles, nullptr);
+ _context->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 0, 0, nullptr, nullptr);
}
void GPUContextDX11::ResetCB()
@@ -791,10 +767,8 @@ void GPUContextDX11::CopySubresource(GPUResource* dstResource, uint32 dstSubreso
void GPUContextDX11::flushSRVs()
{
- // Check if need to flush shader resources
if (_srDirtyFlag)
{
- // Clear flag
_srDirtyFlag = false;
// Flush with the driver
@@ -816,24 +790,23 @@ void GPUContextDX11::flushSRVs()
void GPUContextDX11::flushUAVs()
{
- // Check if need to flush unordered access
if (_uaDirtyFlag)
{
- // Clear flag
_uaDirtyFlag = false;
// Flush with the driver
uint32 initialCounts[GPU_MAX_UA_BINDED] = { 0 };
- _context->CSSetUnorderedAccessViews(0, _maxUASlotsForCS, _uaHandles, initialCounts);
+ if (CurrentCS)
+ _context->CSSetUnorderedAccessViews(0, _maxUASlots, _uaHandles, initialCounts);
+ else
+ _context->OMSetRenderTargetsAndUnorderedAccessViews(D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, _rtCount, _maxUASlots - _rtCount, _uaHandles + _rtCount, initialCounts);
}
}
void GPUContextDX11::flushCBs()
{
- // Check if need to flush constant buffers
if (_cbDirtyFlag)
{
- // Clear flag
_cbDirtyFlag = false;
// Flush with the driver
@@ -855,47 +828,29 @@ void GPUContextDX11::flushCBs()
void GPUContextDX11::flushOM()
{
- // Check if need to flush output merger state or/and unordered access views
if (_omDirtyFlag)
{
-#if _DEBUG
- // Validate binded render targets amount
- int32 rtCount = 0;
- for (int i = 0; i < ARRAY_COUNT(_rtHandles) && i < _rtCount; i++)
- {
- if (_rtHandles[i] != nullptr)
- rtCount++;
- else
- break;
- }
- ASSERT(rtCount == _rtCount);
-#endif
-
- // Check if don't use UAVs and set output merger render targets table
- if (_uaOutput == nullptr)
- {
- _context->OMSetRenderTargets(_rtCount, _rtHandles, _rtDepth);
- }
- else
- {
- // Note: current dx11 content implementation assumes that there must be one or more render targets binded in order to use uav output
- ASSERT(_rtCount > 0);
-
- uint32 initialCounts[1] = { 0 };
- // TODO: set -1 if buffer had no ResetCounter call since last time
- _context->OMSetRenderTargetsAndUnorderedAccessViews(_rtCount, _rtHandles, _rtDepth, _rtCount, 1, &_uaOutput, initialCounts);
- }
-
- // Clear flag
_omDirtyFlag = false;
+ int32 uaCount = 0;
+ for (int32 i = _maxUASlots - 1; i >= 0; i--)
+ {
+ if (_uaHandles[i])
+ {
+ uaCount = i + 1;
+ break;
+ }
+ }
+
+ // Flush with the driver
+ if (uaCount > 0)
+ _context->OMSetRenderTargetsAndUnorderedAccessViews(_rtCount, _rtHandles, _rtDepth, 0, D3D11_KEEP_UNORDERED_ACCESS_VIEWS, nullptr, nullptr);
+ else
+ _context->OMSetRenderTargets(_rtCount, _rtHandles, _rtDepth);
}
}
void GPUContextDX11::onDrawCall()
{
- ASSERT(_currentState);
-
- // Flush
flushCBs();
flushSRVs();
flushUAVs();
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
index 0ec811565..76b91f26e 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
@@ -23,14 +23,13 @@ private:
#if GPU_ALLOW_PROFILE_EVENTS
ID3DUserDefinedAnnotation* _userDefinedAnnotations;
#endif
- int32 _maxUASlotsForCS;
+ int32 _maxUASlots;
// Output Merger
bool _omDirtyFlag;
int32 _rtCount;
ID3D11DepthStencilView* _rtDepth;
ID3D11RenderTargetView* _rtHandles[GPU_MAX_RT_BINDED];
- ID3D11UnorderedAccessView* _uaOutput;
// Shader Resources
bool _srDirtyFlag;
@@ -113,7 +112,6 @@ public:
void SetRenderTarget(GPUTextureView* rt) override;
void SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureView* rt) override;
void SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) override;
- void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override;
void ResetSR() override;
void ResetUA() override;
void ResetCB() override;
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp
index 42b51f993..45df37e4d 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp
@@ -417,11 +417,11 @@ void GPUContextDX12::flushUAVs()
// Count UAVs required to be bind to the pipeline (the index of the most significant bit that's set)
const uint32 uaCount = Math::FloorLog2(uaMask) + 1;
- ASSERT(uaCount <= GPU_MAX_UA_BINDED + 1);
+ ASSERT(uaCount <= GPU_MAX_UA_BINDED);
// Fill table with source descriptors
DxShaderHeader& header = _currentCompute ? ((GPUShaderProgramCSDX12*)_currentCompute)->Header : _currentState->Header;
- D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[GPU_MAX_UA_BINDED + 1];
+ D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[GPU_MAX_UA_BINDED];
for (uint32 i = 0; i < uaCount; i++)
{
const auto handle = _uaHandles[i];
@@ -716,7 +716,6 @@ void GPUContextDX12::ClearDepth(GPUTextureView* depthBuffer, float depthValue)
void GPUContextDX12::ClearUA(GPUBuffer* buf, const Vector4& value)
{
ASSERT(buf != nullptr && buf->IsUnorderedAccess());
-
auto bufDX12 = reinterpret_cast(buf);
SetResourceState(bufDX12, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
@@ -797,17 +796,6 @@ void GPUContextDX12::SetRenderTarget(GPUTextureView* depthBuffer, const SpanView() : nullptr;
-
- // Set render target normally
- SetRenderTarget(nullptr, rt);
-
- // Bind UAV output to the 2nd slot (after render target to match DX11 binding model)
- _uaHandles[1] = uaOutputDX12;
-}
-
void GPUContextDX12::ResetSR()
{
for (int32 slot = 0; slot < GPU_MAX_SR_BINDED; slot++)
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h
index 1d5fa2f42..52e6b8226 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h
@@ -158,11 +158,12 @@ public:
void Clear(GPUTextureView* rt, const Color& color) override;
void ClearDepth(GPUTextureView* depthBuffer, float depthValue) override;
void ClearUA(GPUBuffer* buf, const Vector4& value) override;
+ void ClearUA(GPUBuffer* buf, const uint32 value[4]) override;
+ void ClearUA(GPUTexture* texture, const uint32 value[4]) override;
void ResetRenderTarget() override;
void SetRenderTarget(GPUTextureView* rt) override;
void SetRenderTarget(GPUTextureView* depthBuffer, GPUTextureView* rt) override;
void SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) override;
- void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override;
void ResetSR() override;
void ResetUA() override;
void ResetCB() override;
diff --git a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h
index 96f2e9485..5d47b51bc 100644
--- a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h
+++ b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h
@@ -72,10 +72,6 @@ public:
{
}
- void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override
- {
- }
-
void ResetSR() override
{
}
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
index 1ad2c948b..42b93ca4c 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
@@ -912,12 +912,6 @@ void GPUContextVulkan::SetRenderTarget(GPUTextureView* depthBuffer, const Span& rts) override;
- void SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) override;
void ResetSR() override;
void ResetUA() override;
void ResetCB() override;
diff --git a/Source/Engine/Renderer/DepthOfFieldPass.cpp b/Source/Engine/Renderer/DepthOfFieldPass.cpp
index 3affcdfda..4ad3fdaf5 100644
--- a/Source/Engine/Renderer/DepthOfFieldPass.cpp
+++ b/Source/Engine/Renderer/DepthOfFieldPass.cpp
@@ -349,7 +349,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
// Generate bokeh points
context->BindSR(0, input);
context->BindSR(1, depthBlurTarget);
- context->SetRenderTarget(*dofInput, _bokehBuffer);
+ context->BindUA(1, _bokehBuffer->View());
+ context->SetRenderTarget(*dofInput);
context->SetViewportAndScissors((float)dofWidth, (float)dofHeight);
context->SetState(_psBokehGeneration);
context->DrawFullscreenTriangle();
@@ -390,9 +391,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
// Cleanup
context->ResetRenderTarget();
- context->UnBindSR(0);
- context->UnBindUA(0);
- context->FlushState();
+ context->ResetUA();
+ context->ResetSR();
// Vertical pass
context->BindUA(0, dofTargetV->View());
@@ -408,10 +408,8 @@ GPUTexture* DepthOfFieldPass::Render(RenderContext& renderContext, GPUTexture* i
context->ResetRenderTarget();
// Cleanup
- context->UnBindSR(0);
- context->UnBindSR(1);
- context->UnBindUA(0);
- context->FlushState();
+ context->ResetUA();
+ context->ResetSR();
RenderTargetPool::Release(dofTargetH);
dofOutput = dofTargetV;