Add explicit GPU resource transitions, memory and compute barriers

This commit is contained in:
Wojtek Figat
2025-08-09 23:57:43 +02:00
parent 3907bc4957
commit b5a431d2f5
12 changed files with 353 additions and 48 deletions

View File

@@ -35,6 +35,7 @@
#include "GPUShaderProgramDX12.h"
#include "CommandSignatureDX12.h"
#include "Engine/Profiler/RenderStats.h"
#include "Engine/Graphics/GPUResourceAccess.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Threading/Threading.h"
@@ -51,6 +52,47 @@ inline bool operator!=(const D3D12_INDEX_BUFFER_VIEW& l, const D3D12_INDEX_BUFFE
return l.SizeInBytes != r.SizeInBytes || l.Format != r.Format || l.BufferLocation != r.BufferLocation;
}
FORCE_INLINE D3D12_RESOURCE_STATES GetResourceState(GPUResourceAccess access)
{
switch (access)
{
case GPUResourceAccess::None:
return D3D12_RESOURCE_STATE_COMMON;
case GPUResourceAccess::CopyRead:
return D3D12_RESOURCE_STATE_COPY_SOURCE;
case GPUResourceAccess::CopyWrite:
return D3D12_RESOURCE_STATE_COPY_DEST;
case GPUResourceAccess::CpuRead:
return D3D12_RESOURCE_STATE_GENERIC_READ;
case GPUResourceAccess::CpuWrite:
return D3D12_RESOURCE_STATE_COMMON;
case GPUResourceAccess::DepthRead:
return D3D12_RESOURCE_STATE_DEPTH_READ;
case GPUResourceAccess::DepthWrite:
return D3D12_RESOURCE_STATE_DEPTH_WRITE;
case GPUResourceAccess::DepthBuffer:
return D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_DEPTH_WRITE;
case GPUResourceAccess::RenderTarget:
return D3D12_RESOURCE_STATE_RENDER_TARGET;
case GPUResourceAccess::UnorderedAccess:
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
case GPUResourceAccess::IndirectArgs:
return D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
case GPUResourceAccess::ShaderReadPixel:
//return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
case GPUResourceAccess::ShaderReadCompute:
case GPUResourceAccess::ShaderReadNonPixel:
//return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
case GPUResourceAccess::ShaderReadGraphics:
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
#if !BUILD_RELEASE
default:
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
#endif
}
return D3D12_RESOURCE_STATE_COMMON;
}
// Ensure to match the indirect commands arguments layout
static_assert(sizeof(GPUDispatchIndirectArgs) == sizeof(D3D12_DISPATCH_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs.");
static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountX) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountX), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountX");
@@ -1124,7 +1166,8 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun
_psDirtyFlag = true;
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
AddUAVBarrier();
if (_pass == 0)
AddUAVBarrier();
}
void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
@@ -1158,7 +1201,8 @@ void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf
_psDirtyFlag = true;
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
AddUAVBarrier();
if (_pass == 0)
AddUAVBarrier();
}
void GPUContextDX12::ResolveMultisample(GPUTexture* sourceMultisampleTexture, GPUTexture* destTexture, int32 sourceSubResource, int32 destSubResource, PixelFormat format)
@@ -1549,4 +1593,15 @@ void GPUContextDX12::ForceRebindDescriptors()
_commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps);
}
void GPUContextDX12::Transition(GPUResource* resource, GPUResourceAccess access)
{
SetResourceState(dynamic_cast<ResourceOwnerDX12*>(resource), GetResourceState(access));
}
void GPUContextDX12::OverlapUA(bool end)
{
if (end)
AddUAVBarrier();
}
#endif