Add explicit GPU resource transitions, memory and compute barriers

This commit is contained in:
Wojtek Figat
2025-08-09 23:57:43 +02:00
parent 3907bc4957
commit b5a431d2f5
12 changed files with 353 additions and 48 deletions

View File

@@ -297,7 +297,7 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, const Span<GPU
__declspec(align(16)) ID3D11RenderTargetView* rtvs[GPU_MAX_RT_BINDED];
for (int32 i = 0; i < rts.Length(); i++)
{
auto rtDX11 = reinterpret_cast<GPUTextureViewDX11*>(rts[i]);
auto rtDX11 = reinterpret_cast<GPUTextureViewDX11*>(rts.Get()[i]);
rtvs[i] = rtDX11 ? rtDX11->RTV() : nullptr;
}
int32 rtvsSize = sizeof(ID3D11RenderTargetView*) * rts.Length();
@@ -431,7 +431,7 @@ void GPUContextDX11::BindVB(const Span<GPUBuffer*>& vertexBuffers, const uint32*
bool vbEdited = false;
for (int32 i = 0; i < vertexBuffers.Length(); i++)
{
const auto vbDX11 = static_cast<GPUBufferDX11*>(vertexBuffers[i]);
const auto vbDX11 = static_cast<GPUBufferDX11*>(vertexBuffers.Get()[i]);
const auto vb = vbDX11 ? vbDX11->GetBuffer() : nullptr;
vbEdited |= vb != _vbHandles[i];
_vbHandles[i] = vb;

View File

@@ -35,6 +35,7 @@
#include "GPUShaderProgramDX12.h"
#include "CommandSignatureDX12.h"
#include "Engine/Profiler/RenderStats.h"
#include "Engine/Graphics/GPUResourceAccess.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Threading/Threading.h"
@@ -51,6 +52,47 @@ inline bool operator!=(const D3D12_INDEX_BUFFER_VIEW& l, const D3D12_INDEX_BUFFE
return l.SizeInBytes != r.SizeInBytes || l.Format != r.Format || l.BufferLocation != r.BufferLocation;
}
FORCE_INLINE D3D12_RESOURCE_STATES GetResourceState(GPUResourceAccess access)
{
switch (access)
{
case GPUResourceAccess::None:
return D3D12_RESOURCE_STATE_COMMON;
case GPUResourceAccess::CopyRead:
return D3D12_RESOURCE_STATE_COPY_SOURCE;
case GPUResourceAccess::CopyWrite:
return D3D12_RESOURCE_STATE_COPY_DEST;
case GPUResourceAccess::CpuRead:
return D3D12_RESOURCE_STATE_GENERIC_READ;
case GPUResourceAccess::CpuWrite:
return D3D12_RESOURCE_STATE_COMMON;
case GPUResourceAccess::DepthRead:
return D3D12_RESOURCE_STATE_DEPTH_READ;
case GPUResourceAccess::DepthWrite:
return D3D12_RESOURCE_STATE_DEPTH_WRITE;
case GPUResourceAccess::DepthBuffer:
return D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_DEPTH_WRITE;
case GPUResourceAccess::RenderTarget:
return D3D12_RESOURCE_STATE_RENDER_TARGET;
case GPUResourceAccess::UnorderedAccess:
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
case GPUResourceAccess::IndirectArgs:
return D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
case GPUResourceAccess::ShaderReadPixel:
//return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
case GPUResourceAccess::ShaderReadCompute:
case GPUResourceAccess::ShaderReadNonPixel:
//return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
case GPUResourceAccess::ShaderReadGraphics:
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
#if !BUILD_RELEASE
default:
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
#endif
}
return D3D12_RESOURCE_STATE_COMMON;
}
// Ensure to match the indirect commands arguments layout
static_assert(sizeof(GPUDispatchIndirectArgs) == sizeof(D3D12_DISPATCH_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs.");
static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountX) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountX), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountX");
@@ -1124,7 +1166,8 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun
_psDirtyFlag = true;
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
AddUAVBarrier();
if (_pass == 0)
AddUAVBarrier();
}
void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
@@ -1158,7 +1201,8 @@ void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf
_psDirtyFlag = true;
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
AddUAVBarrier();
if (_pass == 0)
AddUAVBarrier();
}
void GPUContextDX12::ResolveMultisample(GPUTexture* sourceMultisampleTexture, GPUTexture* destTexture, int32 sourceSubResource, int32 destSubResource, PixelFormat format)
@@ -1549,4 +1593,15 @@ void GPUContextDX12::ForceRebindDescriptors()
_commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps);
}
void GPUContextDX12::Transition(GPUResource* resource, GPUResourceAccess access)
{
SetResourceState(dynamic_cast<ResourceOwnerDX12*>(resource), GetResourceState(access));
}
void GPUContextDX12::OverlapUA(bool end)
{
if (end)
AddUAVBarrier();
}
#endif

View File

@@ -21,7 +21,7 @@ class GPUVertexLayoutDX12;
/// <summary>
/// Size of the resource barriers buffer size (will be flushed on overflow)
/// </summary>
#define DX12_RB_BUFFER_SIZE 16
#define DX12_RB_BUFFER_SIZE 64
/// <summary>
/// GPU Commands Context implementation for DirectX 12
@@ -214,6 +214,8 @@ public:
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
void SetResourceState(GPUResource* resource, uint64 state, int32 subresource) override;
void ForceRebindDescriptors() override;
void Transition(GPUResource* resource, GPUResourceAccess access) override;
void OverlapUA(bool end) override;
};
#endif