Add explicit GPU resource transitions, memory and compute barriers
This commit is contained in:
@@ -297,7 +297,7 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, const Span<GPU
|
||||
__declspec(align(16)) ID3D11RenderTargetView* rtvs[GPU_MAX_RT_BINDED];
|
||||
for (int32 i = 0; i < rts.Length(); i++)
|
||||
{
|
||||
auto rtDX11 = reinterpret_cast<GPUTextureViewDX11*>(rts[i]);
|
||||
auto rtDX11 = reinterpret_cast<GPUTextureViewDX11*>(rts.Get()[i]);
|
||||
rtvs[i] = rtDX11 ? rtDX11->RTV() : nullptr;
|
||||
}
|
||||
int32 rtvsSize = sizeof(ID3D11RenderTargetView*) * rts.Length();
|
||||
@@ -431,7 +431,7 @@ void GPUContextDX11::BindVB(const Span<GPUBuffer*>& vertexBuffers, const uint32*
|
||||
bool vbEdited = false;
|
||||
for (int32 i = 0; i < vertexBuffers.Length(); i++)
|
||||
{
|
||||
const auto vbDX11 = static_cast<GPUBufferDX11*>(vertexBuffers[i]);
|
||||
const auto vbDX11 = static_cast<GPUBufferDX11*>(vertexBuffers.Get()[i]);
|
||||
const auto vb = vbDX11 ? vbDX11->GetBuffer() : nullptr;
|
||||
vbEdited |= vb != _vbHandles[i];
|
||||
_vbHandles[i] = vb;
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "GPUShaderProgramDX12.h"
|
||||
#include "CommandSignatureDX12.h"
|
||||
#include "Engine/Profiler/RenderStats.h"
|
||||
#include "Engine/Graphics/GPUResourceAccess.h"
|
||||
#include "Engine/Graphics/Shaders/GPUShader.h"
|
||||
#include "Engine/Threading/Threading.h"
|
||||
|
||||
@@ -51,6 +52,47 @@ inline bool operator!=(const D3D12_INDEX_BUFFER_VIEW& l, const D3D12_INDEX_BUFFE
|
||||
return l.SizeInBytes != r.SizeInBytes || l.Format != r.Format || l.BufferLocation != r.BufferLocation;
|
||||
}
|
||||
|
||||
FORCE_INLINE D3D12_RESOURCE_STATES GetResourceState(GPUResourceAccess access)
|
||||
{
|
||||
switch (access)
|
||||
{
|
||||
case GPUResourceAccess::None:
|
||||
return D3D12_RESOURCE_STATE_COMMON;
|
||||
case GPUResourceAccess::CopyRead:
|
||||
return D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
case GPUResourceAccess::CopyWrite:
|
||||
return D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
case GPUResourceAccess::CpuRead:
|
||||
return D3D12_RESOURCE_STATE_GENERIC_READ;
|
||||
case GPUResourceAccess::CpuWrite:
|
||||
return D3D12_RESOURCE_STATE_COMMON;
|
||||
case GPUResourceAccess::DepthRead:
|
||||
return D3D12_RESOURCE_STATE_DEPTH_READ;
|
||||
case GPUResourceAccess::DepthWrite:
|
||||
return D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||
case GPUResourceAccess::DepthBuffer:
|
||||
return D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||
case GPUResourceAccess::RenderTarget:
|
||||
return D3D12_RESOURCE_STATE_RENDER_TARGET;
|
||||
case GPUResourceAccess::UnorderedAccess:
|
||||
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
case GPUResourceAccess::IndirectArgs:
|
||||
return D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
|
||||
case GPUResourceAccess::ShaderReadPixel:
|
||||
//return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
|
||||
case GPUResourceAccess::ShaderReadCompute:
|
||||
case GPUResourceAccess::ShaderReadNonPixel:
|
||||
//return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
|
||||
case GPUResourceAccess::ShaderReadGraphics:
|
||||
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||
#if !BUILD_RELEASE
|
||||
default:
|
||||
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
|
||||
#endif
|
||||
}
|
||||
return D3D12_RESOURCE_STATE_COMMON;
|
||||
}
|
||||
|
||||
// Ensure to match the indirect commands arguments layout
|
||||
static_assert(sizeof(GPUDispatchIndirectArgs) == sizeof(D3D12_DISPATCH_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs.");
|
||||
static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountX) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountX), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountX");
|
||||
@@ -1124,7 +1166,8 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun
|
||||
_psDirtyFlag = true;
|
||||
|
||||
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
|
||||
AddUAVBarrier();
|
||||
if (_pass == 0)
|
||||
AddUAVBarrier();
|
||||
}
|
||||
|
||||
void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
|
||||
@@ -1158,7 +1201,8 @@ void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf
|
||||
_psDirtyFlag = true;
|
||||
|
||||
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
|
||||
AddUAVBarrier();
|
||||
if (_pass == 0)
|
||||
AddUAVBarrier();
|
||||
}
|
||||
|
||||
void GPUContextDX12::ResolveMultisample(GPUTexture* sourceMultisampleTexture, GPUTexture* destTexture, int32 sourceSubResource, int32 destSubResource, PixelFormat format)
|
||||
@@ -1549,4 +1593,15 @@ void GPUContextDX12::ForceRebindDescriptors()
|
||||
_commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps);
|
||||
}
|
||||
|
||||
void GPUContextDX12::Transition(GPUResource* resource, GPUResourceAccess access)
|
||||
{
|
||||
SetResourceState(dynamic_cast<ResourceOwnerDX12*>(resource), GetResourceState(access));
|
||||
}
|
||||
|
||||
void GPUContextDX12::OverlapUA(bool end)
|
||||
{
|
||||
if (end)
|
||||
AddUAVBarrier();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -21,7 +21,7 @@ class GPUVertexLayoutDX12;
|
||||
/// <summary>
|
||||
/// Size of the resource barriers buffer size (will be flushed on overflow)
|
||||
/// </summary>
|
||||
#define DX12_RB_BUFFER_SIZE 16
|
||||
#define DX12_RB_BUFFER_SIZE 64
|
||||
|
||||
/// <summary>
|
||||
/// GPU Commands Context implementation for DirectX 12
|
||||
@@ -214,6 +214,8 @@ public:
|
||||
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
|
||||
void SetResourceState(GPUResource* resource, uint64 state, int32 subresource) override;
|
||||
void ForceRebindDescriptors() override;
|
||||
void Transition(GPUResource* resource, GPUResourceAccess access) override;
|
||||
void OverlapUA(bool end) override;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user