Add explicit GPU resource transitions, memory and compute barriers
This commit is contained in:
@@ -9,6 +9,11 @@
|
|||||||
#include "PixelFormat.h"
|
#include "PixelFormat.h"
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
|
|
||||||
|
#if PLATFORM_WIN32
|
||||||
|
// Fix nasty Win32 define garbage
|
||||||
|
#undef MemoryBarrier
|
||||||
|
#endif
|
||||||
|
|
||||||
class GPUConstantBuffer;
|
class GPUConstantBuffer;
|
||||||
class GPUShaderProgramCS;
|
class GPUShaderProgramCS;
|
||||||
class GPUBuffer;
|
class GPUBuffer;
|
||||||
@@ -21,6 +26,8 @@ class GPUResourceView;
|
|||||||
class GPUTextureView;
|
class GPUTextureView;
|
||||||
class GPUBufferView;
|
class GPUBufferView;
|
||||||
class GPUVertexLayout;
|
class GPUVertexLayout;
|
||||||
|
struct GPUPass;
|
||||||
|
enum class GPUResourceAccess;
|
||||||
|
|
||||||
// Gets the GPU texture view. Checks if pointer is not null and texture has one or more mip levels loaded.
|
// Gets the GPU texture view. Checks if pointer is not null and texture has one or more mip levels loaded.
|
||||||
#define GET_TEXTURE_VIEW_SAFE(t) (t && t->ResidentMipLevels() > 0 ? t->View() : nullptr)
|
#define GET_TEXTURE_VIEW_SAFE(t) (t && t->ResidentMipLevels() > 0 ? t->View() : nullptr)
|
||||||
@@ -632,4 +639,24 @@ public:
|
|||||||
/// Forces graphics backend to rebind descriptors after command list was used by external graphics library.
|
/// Forces graphics backend to rebind descriptors after command list was used by external graphics library.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
virtual void ForceRebindDescriptors();
|
virtual void ForceRebindDescriptors();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
friend GPUPass;
|
||||||
|
int32 _pass = 0;
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Performs resource state transition into a specific access (mask).
|
||||||
|
virtual void Transition(GPUResource* resource, GPUResourceAccess access)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inserts a global memory barrier on data copies between resources.
|
||||||
|
virtual void MemoryBarrier()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// Begins or ends unordered access resource overlap region that allows running different compute shader dispatches simultaneously.
|
||||||
|
virtual void OverlapUA(bool end)
|
||||||
|
{
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
68
Source/Engine/Graphics/GPUPass.h
Normal file
68
Source/Engine/Graphics/GPUPass.h
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
// Copyright (c) Wojciech Figat. All rights reserved.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "GPUContext.h"
|
||||||
|
#include "Engine/Graphics/GPUResourceAccess.h"
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Base for GPU rendering passes that control low-level memory access and GPU resources states with usage to optimize rendering.
|
||||||
|
/// </summary>
|
||||||
|
struct FLAXENGINE_API GPUPass
|
||||||
|
{
|
||||||
|
NON_COPYABLE(GPUPass);
|
||||||
|
|
||||||
|
GPUContext* Context;
|
||||||
|
|
||||||
|
GPUPass(GPUContext* context)
|
||||||
|
: Context(context)
|
||||||
|
{
|
||||||
|
Context->_pass++;
|
||||||
|
}
|
||||||
|
|
||||||
|
~GPUPass()
|
||||||
|
{
|
||||||
|
Context->_pass--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Performs resource state transition into a specific access (mask). Can be done preemptively in the prologue of the pass to execute more efficient barriers.
|
||||||
|
void Transition(GPUResource* resource, GPUResourceAccess access)
|
||||||
|
{
|
||||||
|
Context->Transition(resource, access);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// GPU pass that manually controls memory barriers and cache flushes when performing batched copy/upload operations with GPU context. Can be used to optimize GPU buffers usage by running different copy operations simultaneously.
|
||||||
|
/// </summary>
|
||||||
|
struct FLAXENGINE_API GPUMemoryPass : GPUPass
|
||||||
|
{
|
||||||
|
GPUMemoryPass(GPUContext* context)
|
||||||
|
: GPUPass(context)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
~GPUMemoryPass()
|
||||||
|
{
|
||||||
|
Context->MemoryBarrier();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// GPU pass that controls memory barriers when performing batched Compute shader dispatches with GPU context. Can be used to optimize GPU utilization by running different dispatches simultaneously (by overlapping work).
|
||||||
|
/// </summary>
|
||||||
|
struct FLAXENGINE_API GPUComputePass : GPUPass
|
||||||
|
{
|
||||||
|
GPUComputePass(GPUContext* context)
|
||||||
|
: GPUPass(context)
|
||||||
|
{
|
||||||
|
Context->OverlapUA(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
~GPUComputePass()
|
||||||
|
{
|
||||||
|
Context->OverlapUA(true);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: add GPUDrawPass for render targets and depth/stencil setup with optimized clear for faster drawing on tiled-GPUs (mobile)
|
||||||
29
Source/Engine/Graphics/GPUResourceAccess.h
Normal file
29
Source/Engine/Graphics/GPUResourceAccess.h
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// Copyright (c) Wojciech Figat. All rights reserved.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Engine/Core/Types/BaseTypes.h"
|
||||||
|
|
||||||
|
// GPU resource access flags. Used to describe how resource can be accessed which allows GPU to optimize data layout and memory access.
|
||||||
|
enum class GPUResourceAccess
|
||||||
|
{
|
||||||
|
None = 0,
|
||||||
|
CopyRead = 1 << 0,
|
||||||
|
CopyWrite = 1 << 1,
|
||||||
|
CpuRead = 1 << 2,
|
||||||
|
CpuWrite = 1 << 3,
|
||||||
|
DepthRead = 1 << 4,
|
||||||
|
DepthWrite = 1 << 5,
|
||||||
|
DepthBuffer = DepthRead | DepthWrite,
|
||||||
|
RenderTarget = 1 << 6,
|
||||||
|
UnorderedAccess = 1 << 7,
|
||||||
|
IndirectArgs = 1 << 8,
|
||||||
|
ShaderReadCompute = 1 << 9,
|
||||||
|
ShaderReadPixel = 1 << 10,
|
||||||
|
ShaderReadNonPixel = 1 << 11,
|
||||||
|
ShaderReadGraphics = ShaderReadPixel | ShaderReadNonPixel,
|
||||||
|
Last,
|
||||||
|
All = (Last << 1) - 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
DECLARE_ENUM_OPERATORS(GPUResourceAccess);
|
||||||
@@ -297,7 +297,7 @@ void GPUContextDX11::SetRenderTarget(GPUTextureView* depthBuffer, const Span<GPU
|
|||||||
__declspec(align(16)) ID3D11RenderTargetView* rtvs[GPU_MAX_RT_BINDED];
|
__declspec(align(16)) ID3D11RenderTargetView* rtvs[GPU_MAX_RT_BINDED];
|
||||||
for (int32 i = 0; i < rts.Length(); i++)
|
for (int32 i = 0; i < rts.Length(); i++)
|
||||||
{
|
{
|
||||||
auto rtDX11 = reinterpret_cast<GPUTextureViewDX11*>(rts[i]);
|
auto rtDX11 = reinterpret_cast<GPUTextureViewDX11*>(rts.Get()[i]);
|
||||||
rtvs[i] = rtDX11 ? rtDX11->RTV() : nullptr;
|
rtvs[i] = rtDX11 ? rtDX11->RTV() : nullptr;
|
||||||
}
|
}
|
||||||
int32 rtvsSize = sizeof(ID3D11RenderTargetView*) * rts.Length();
|
int32 rtvsSize = sizeof(ID3D11RenderTargetView*) * rts.Length();
|
||||||
@@ -431,7 +431,7 @@ void GPUContextDX11::BindVB(const Span<GPUBuffer*>& vertexBuffers, const uint32*
|
|||||||
bool vbEdited = false;
|
bool vbEdited = false;
|
||||||
for (int32 i = 0; i < vertexBuffers.Length(); i++)
|
for (int32 i = 0; i < vertexBuffers.Length(); i++)
|
||||||
{
|
{
|
||||||
const auto vbDX11 = static_cast<GPUBufferDX11*>(vertexBuffers[i]);
|
const auto vbDX11 = static_cast<GPUBufferDX11*>(vertexBuffers.Get()[i]);
|
||||||
const auto vb = vbDX11 ? vbDX11->GetBuffer() : nullptr;
|
const auto vb = vbDX11 ? vbDX11->GetBuffer() : nullptr;
|
||||||
vbEdited |= vb != _vbHandles[i];
|
vbEdited |= vb != _vbHandles[i];
|
||||||
_vbHandles[i] = vb;
|
_vbHandles[i] = vb;
|
||||||
|
|||||||
@@ -35,6 +35,7 @@
|
|||||||
#include "GPUShaderProgramDX12.h"
|
#include "GPUShaderProgramDX12.h"
|
||||||
#include "CommandSignatureDX12.h"
|
#include "CommandSignatureDX12.h"
|
||||||
#include "Engine/Profiler/RenderStats.h"
|
#include "Engine/Profiler/RenderStats.h"
|
||||||
|
#include "Engine/Graphics/GPUResourceAccess.h"
|
||||||
#include "Engine/Graphics/Shaders/GPUShader.h"
|
#include "Engine/Graphics/Shaders/GPUShader.h"
|
||||||
#include "Engine/Threading/Threading.h"
|
#include "Engine/Threading/Threading.h"
|
||||||
|
|
||||||
@@ -51,6 +52,47 @@ inline bool operator!=(const D3D12_INDEX_BUFFER_VIEW& l, const D3D12_INDEX_BUFFE
|
|||||||
return l.SizeInBytes != r.SizeInBytes || l.Format != r.Format || l.BufferLocation != r.BufferLocation;
|
return l.SizeInBytes != r.SizeInBytes || l.Format != r.Format || l.BufferLocation != r.BufferLocation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FORCE_INLINE D3D12_RESOURCE_STATES GetResourceState(GPUResourceAccess access)
|
||||||
|
{
|
||||||
|
switch (access)
|
||||||
|
{
|
||||||
|
case GPUResourceAccess::None:
|
||||||
|
return D3D12_RESOURCE_STATE_COMMON;
|
||||||
|
case GPUResourceAccess::CopyRead:
|
||||||
|
return D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||||
|
case GPUResourceAccess::CopyWrite:
|
||||||
|
return D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
case GPUResourceAccess::CpuRead:
|
||||||
|
return D3D12_RESOURCE_STATE_GENERIC_READ;
|
||||||
|
case GPUResourceAccess::CpuWrite:
|
||||||
|
return D3D12_RESOURCE_STATE_COMMON;
|
||||||
|
case GPUResourceAccess::DepthRead:
|
||||||
|
return D3D12_RESOURCE_STATE_DEPTH_READ;
|
||||||
|
case GPUResourceAccess::DepthWrite:
|
||||||
|
return D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||||
|
case GPUResourceAccess::DepthBuffer:
|
||||||
|
return D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||||
|
case GPUResourceAccess::RenderTarget:
|
||||||
|
return D3D12_RESOURCE_STATE_RENDER_TARGET;
|
||||||
|
case GPUResourceAccess::UnorderedAccess:
|
||||||
|
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
case GPUResourceAccess::IndirectArgs:
|
||||||
|
return D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
|
||||||
|
case GPUResourceAccess::ShaderReadPixel:
|
||||||
|
//return D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
|
||||||
|
case GPUResourceAccess::ShaderReadCompute:
|
||||||
|
case GPUResourceAccess::ShaderReadNonPixel:
|
||||||
|
//return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; // TODO: optimize SRV states in flushSRVs to be based on current binding usage slots
|
||||||
|
case GPUResourceAccess::ShaderReadGraphics:
|
||||||
|
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||||
|
#if !BUILD_RELEASE
|
||||||
|
default:
|
||||||
|
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return D3D12_RESOURCE_STATE_COMMON;
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure to match the indirect commands arguments layout
|
// Ensure to match the indirect commands arguments layout
|
||||||
static_assert(sizeof(GPUDispatchIndirectArgs) == sizeof(D3D12_DISPATCH_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs.");
|
static_assert(sizeof(GPUDispatchIndirectArgs) == sizeof(D3D12_DISPATCH_ARGUMENTS), "Wrong size of GPUDrawIndirectArgs.");
|
||||||
static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountX) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountX), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountX");
|
static_assert(OFFSET_OF(GPUDispatchIndirectArgs, ThreadGroupCountX) == OFFSET_OF(D3D12_DISPATCH_ARGUMENTS, ThreadGroupCountX), "Wrong offset for GPUDrawIndirectArgs::ThreadGroupCountX");
|
||||||
@@ -1124,7 +1166,8 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun
|
|||||||
_psDirtyFlag = true;
|
_psDirtyFlag = true;
|
||||||
|
|
||||||
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
|
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
|
||||||
AddUAVBarrier();
|
if (_pass == 0)
|
||||||
|
AddUAVBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
|
void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
|
||||||
@@ -1158,7 +1201,8 @@ void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf
|
|||||||
_psDirtyFlag = true;
|
_psDirtyFlag = true;
|
||||||
|
|
||||||
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
|
// Insert UAV barrier to ensure proper memory access for multiple sequential dispatches
|
||||||
AddUAVBarrier();
|
if (_pass == 0)
|
||||||
|
AddUAVBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUContextDX12::ResolveMultisample(GPUTexture* sourceMultisampleTexture, GPUTexture* destTexture, int32 sourceSubResource, int32 destSubResource, PixelFormat format)
|
void GPUContextDX12::ResolveMultisample(GPUTexture* sourceMultisampleTexture, GPUTexture* destTexture, int32 sourceSubResource, int32 destSubResource, PixelFormat format)
|
||||||
@@ -1549,4 +1593,15 @@ void GPUContextDX12::ForceRebindDescriptors()
|
|||||||
_commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps);
|
_commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUContextDX12::Transition(GPUResource* resource, GPUResourceAccess access)
|
||||||
|
{
|
||||||
|
SetResourceState(dynamic_cast<ResourceOwnerDX12*>(resource), GetResourceState(access));
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUContextDX12::OverlapUA(bool end)
|
||||||
|
{
|
||||||
|
if (end)
|
||||||
|
AddUAVBarrier();
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class GPUVertexLayoutDX12;
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Size of the resource barriers buffer size (will be flushed on overflow)
|
/// Size of the resource barriers buffer size (will be flushed on overflow)
|
||||||
/// </summary>
|
/// </summary>
|
||||||
#define DX12_RB_BUFFER_SIZE 16
|
#define DX12_RB_BUFFER_SIZE 64
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// GPU Commands Context implementation for DirectX 12
|
/// GPU Commands Context implementation for DirectX 12
|
||||||
@@ -214,6 +214,8 @@ public:
|
|||||||
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
|
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
|
||||||
void SetResourceState(GPUResource* resource, uint64 state, int32 subresource) override;
|
void SetResourceState(GPUResource* resource, uint64 state, int32 subresource) override;
|
||||||
void ForceRebindDescriptors() override;
|
void ForceRebindDescriptors() override;
|
||||||
|
void Transition(GPUResource* resource, GPUResourceAccess access) override;
|
||||||
|
void OverlapUA(bool end) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ void GPUBufferViewVulkan::Init(GPUDeviceVulkan* device, GPUBufferVulkan* owner,
|
|||||||
Buffer = buffer;
|
Buffer = buffer;
|
||||||
Size = size;
|
Size = size;
|
||||||
|
|
||||||
if ((owner->IsShaderResource() && !(owner->GetDescription().Flags & GPUBufferFlags::Structured)) || (usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) == VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)
|
if ((EnumHasAnyFlags(owner->GetDescription().Flags, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess) && !(owner->GetDescription().Flags & GPUBufferFlags::Structured)) || (usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) == VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)
|
||||||
{
|
{
|
||||||
VkBufferViewCreateInfo viewInfo;
|
VkBufferViewCreateInfo viewInfo;
|
||||||
RenderToolsVulkan::ZeroStruct(viewInfo, VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
|
RenderToolsVulkan::ZeroStruct(viewInfo, VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
|
||||||
@@ -103,7 +103,7 @@ bool GPUBufferVulkan::OnInit()
|
|||||||
bufferInfo.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
|
bufferInfo.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
|
||||||
if (useUAV || EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::Structured))
|
if (useUAV || EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::Structured))
|
||||||
bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||||
if (useUAV && useSRV)
|
if (useUAV)
|
||||||
bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
|
bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
|
||||||
if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::Argument))
|
if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::Argument))
|
||||||
bufferInfo.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
bufferInfo.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
||||||
|
|||||||
@@ -78,13 +78,14 @@ const Char* ToString(VkImageLayout layout)
|
|||||||
void PipelineBarrierVulkan::Execute(const CmdBufferVulkan* cmdBuffer)
|
void PipelineBarrierVulkan::Execute(const CmdBufferVulkan* cmdBuffer)
|
||||||
{
|
{
|
||||||
ASSERT(cmdBuffer->IsOutsideRenderPass());
|
ASSERT(cmdBuffer->IsOutsideRenderPass());
|
||||||
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), SourceStage, DestStage, 0, 0, nullptr, BufferBarriers.Count(), BufferBarriers.Get(), ImageBarriers.Count(), ImageBarriers.Get());
|
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), SourceStage, DestStage, 0, MemoryBarriers.Count(), MemoryBarriers.Get(), BufferBarriers.Count(), BufferBarriers.Get(), ImageBarriers.Count(), ImageBarriers.Get());
|
||||||
|
|
||||||
// Reset
|
// Reset
|
||||||
SourceStage = 0;
|
SourceStage = 0;
|
||||||
DestStage = 0;
|
DestStage = 0;
|
||||||
ImageBarriers.Clear();
|
ImageBarriers.Clear();
|
||||||
BufferBarriers.Clear();
|
BufferBarriers.Clear();
|
||||||
|
MemoryBarriers.Clear();
|
||||||
#if VK_ENABLE_BARRIERS_DEBUG
|
#if VK_ENABLE_BARRIERS_DEBUG
|
||||||
ImageBarriersDebug.Clear();
|
ImageBarriersDebug.Clear();
|
||||||
#endif
|
#endif
|
||||||
@@ -153,12 +154,7 @@ void GPUContextVulkan::AddImageBarrier(VkImage image, VkImageLayout srcLayout, V
|
|||||||
#if VK_ENABLE_BARRIERS_BATCHING
|
#if VK_ENABLE_BARRIERS_BATCHING
|
||||||
// Auto-flush on overflow
|
// Auto-flush on overflow
|
||||||
if (_barriers.IsFull())
|
if (_barriers.IsFull())
|
||||||
{
|
FlushBarriers();
|
||||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
|
||||||
if (cmdBuffer->IsInsideRenderPass())
|
|
||||||
EndRenderPass();
|
|
||||||
_barriers.Execute(cmdBuffer);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Insert barrier
|
// Insert barrier
|
||||||
@@ -190,10 +186,7 @@ void GPUContextVulkan::AddImageBarrier(VkImage image, VkImageLayout srcLayout, V
|
|||||||
|
|
||||||
#if !VK_ENABLE_BARRIERS_BATCHING
|
#if !VK_ENABLE_BARRIERS_BATCHING
|
||||||
// Auto-flush without batching
|
// Auto-flush without batching
|
||||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
FlushBarriers();
|
||||||
if (cmdBuffer->IsInsideRenderPass())
|
|
||||||
EndRenderPass();
|
|
||||||
_barriers.Execute(cmdBuffer);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -315,12 +308,7 @@ void GPUContextVulkan::AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags d
|
|||||||
#if VK_ENABLE_BARRIERS_BATCHING
|
#if VK_ENABLE_BARRIERS_BATCHING
|
||||||
// Auto-flush on overflow
|
// Auto-flush on overflow
|
||||||
if (_barriers.IsFull())
|
if (_barriers.IsFull())
|
||||||
{
|
FlushBarriers();
|
||||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
|
||||||
if (cmdBuffer->IsInsideRenderPass())
|
|
||||||
EndRenderPass();
|
|
||||||
_barriers.Execute(cmdBuffer);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Insert barrier
|
// Insert barrier
|
||||||
@@ -339,13 +327,38 @@ void GPUContextVulkan::AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags d
|
|||||||
|
|
||||||
#if !VK_ENABLE_BARRIERS_BATCHING
|
#if !VK_ENABLE_BARRIERS_BATCHING
|
||||||
// Auto-flush without batching
|
// Auto-flush without batching
|
||||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
FlushBarriers();
|
||||||
if (cmdBuffer->IsInsideRenderPass())
|
|
||||||
EndRenderPass();
|
|
||||||
_barriers.Execute(cmdBuffer);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUContextVulkan::AddMemoryBarrier()
|
||||||
|
{
|
||||||
|
#if VK_ENABLE_BARRIERS_BATCHING
|
||||||
|
// Auto-flush on overflow
|
||||||
|
if (_barriers.IsFull())
|
||||||
|
FlushBarriers();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Insert barrier
|
||||||
|
VkMemoryBarrier& memoryBarrier = _barriers.MemoryBarriers.AddOne();
|
||||||
|
RenderToolsVulkan::ZeroStruct(memoryBarrier, VK_STRUCTURE_TYPE_MEMORY_BARRIER);
|
||||||
|
memoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||||
|
memoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
|
||||||
|
_barriers.SourceStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||||
|
_barriers.DestStage |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||||
|
|
||||||
|
#if !VK_ENABLE_BARRIERS_BATCHING
|
||||||
|
// Auto-flush without batching
|
||||||
|
FlushBarriers();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUContextVulkan::AddUABarrier()
|
||||||
|
{
|
||||||
|
_barriers.SourceStage |= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
||||||
|
_barriers.DestStage |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
void GPUContextVulkan::FlushBarriers()
|
void GPUContextVulkan::FlushBarriers()
|
||||||
{
|
{
|
||||||
#if VK_ENABLE_BARRIERS_BATCHING
|
#if VK_ENABLE_BARRIERS_BATCHING
|
||||||
@@ -475,7 +488,7 @@ void GPUContextVulkan::EndRenderPass()
|
|||||||
cmdBuffer->EndRenderPass();
|
cmdBuffer->EndRenderPass();
|
||||||
_renderPass = nullptr;
|
_renderPass = nullptr;
|
||||||
|
|
||||||
// Place a barrier between RenderPasses, so that color / depth outputs can be read in subsequent passes
|
// Place a barrier between RenderPasses, so that color/depth outputs can be read in subsequent passes
|
||||||
// TODO: remove it in future and use proper barriers without whole pipeline stalls
|
// TODO: remove it in future and use proper barriers without whole pipeline stalls
|
||||||
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
|
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
|
||||||
}
|
}
|
||||||
@@ -1155,8 +1168,8 @@ void GPUContextVulkan::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCo
|
|||||||
RENDER_STAT_DISPATCH_CALL();
|
RENDER_STAT_DISPATCH_CALL();
|
||||||
|
|
||||||
// Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes
|
// Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes
|
||||||
// TODO: optimize it by moving inputs/outputs into higher-layer so eg. Global SDF can manually optimize it
|
if (_pass == 0)
|
||||||
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
|
AddUABarrier();
|
||||||
|
|
||||||
#if VK_ENABLE_BARRIERS_DEBUG
|
#if VK_ENABLE_BARRIERS_DEBUG
|
||||||
LOG(Warning, "Dispatch");
|
LOG(Warning, "Dispatch");
|
||||||
@@ -1191,8 +1204,8 @@ void GPUContextVulkan::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b
|
|||||||
RENDER_STAT_DISPATCH_CALL();
|
RENDER_STAT_DISPATCH_CALL();
|
||||||
|
|
||||||
// Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes
|
// Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes
|
||||||
// TODO: optimize it by moving inputs/outputs into higher-layer so eg. Global SDF can manually optimize it
|
if (_pass == 0)
|
||||||
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
|
AddUABarrier();
|
||||||
|
|
||||||
#if VK_ENABLE_BARRIERS_DEBUG
|
#if VK_ENABLE_BARRIERS_DEBUG
|
||||||
LOG(Warning, "DispatchIndirect");
|
LOG(Warning, "DispatchIndirect");
|
||||||
@@ -1351,18 +1364,14 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32
|
|||||||
|
|
||||||
const auto bufferVulkan = static_cast<GPUBufferVulkan*>(buffer);
|
const auto bufferVulkan = static_cast<GPUBufferVulkan*>(buffer);
|
||||||
|
|
||||||
// Memory transfer barrier
|
// Transition resource
|
||||||
// TODO: batch pipeline barriers
|
AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||||
const VkMemoryBarrier barrierBefore = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT };
|
FlushBarriers();
|
||||||
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrierBefore, 0, nullptr, 0, nullptr);
|
|
||||||
|
|
||||||
// Use direct update for small buffers
|
// Use direct update for small buffers
|
||||||
const uint32 alignedSize = Math::AlignUp<uint32>(size, 4);
|
const uint32 alignedSize = Math::AlignUp<uint32>(size, 4);
|
||||||
if (size <= 4 * 1024 && alignedSize <= buffer->GetSize())
|
if (size <= 4 * 1024 && alignedSize <= buffer->GetSize())
|
||||||
{
|
{
|
||||||
//AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT);
|
|
||||||
//FlushBarriers();
|
|
||||||
|
|
||||||
vkCmdUpdateBuffer(cmdBuffer->GetHandle(), bufferVulkan->GetHandle(), offset, alignedSize, data);
|
vkCmdUpdateBuffer(cmdBuffer->GetHandle(), bufferVulkan->GetHandle(), offset, alignedSize, data);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1379,10 +1388,9 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32
|
|||||||
_device->StagingManager.ReleaseBuffer(cmdBuffer, staging);
|
_device->StagingManager.ReleaseBuffer(cmdBuffer, staging);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Memory transfer barrier
|
// Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch)
|
||||||
// TODO: batch pipeline barriers
|
if (_pass == 0)
|
||||||
const VkMemoryBarrier barrierAfter = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT };
|
AddMemoryBarrier();
|
||||||
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &barrierAfter, 0, nullptr, 0, nullptr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUContextVulkan::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset)
|
void GPUContextVulkan::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset)
|
||||||
@@ -1407,6 +1415,10 @@ void GPUContextVulkan::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, ui
|
|||||||
bufferCopy.dstOffset = dstOffset;
|
bufferCopy.dstOffset = dstOffset;
|
||||||
bufferCopy.size = size;
|
bufferCopy.size = size;
|
||||||
vkCmdCopyBuffer(cmdBuffer->GetHandle(), srcBufferVulkan->GetHandle(), dstBufferVulkan->GetHandle(), 1, &bufferCopy);
|
vkCmdCopyBuffer(cmdBuffer->GetHandle(), srcBufferVulkan->GetHandle(), dstBufferVulkan->GetHandle(), 1, &bufferCopy);
|
||||||
|
|
||||||
|
// Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch)
|
||||||
|
if (_pass == 0)
|
||||||
|
AddMemoryBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 mipIndex, const void* data, uint32 rowPitch, uint32 slicePitch)
|
void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 mipIndex, const void* data, uint32 rowPitch, uint32 slicePitch)
|
||||||
@@ -1816,4 +1828,27 @@ void GPUContextVulkan::CopySubresource(GPUResource* dstResource, uint32 dstSubre
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUContextVulkan::Transition(GPUResource* resource, GPUResourceAccess access)
|
||||||
|
{
|
||||||
|
if (auto buffer = dynamic_cast<GPUBufferVulkan*>(resource))
|
||||||
|
{
|
||||||
|
AddBufferBarrier(buffer, RenderToolsVulkan::GetAccess(access));
|
||||||
|
}
|
||||||
|
else if (auto texture = dynamic_cast<GPUTextureVulkan*>(resource))
|
||||||
|
{
|
||||||
|
AddImageBarrier(texture, RenderToolsVulkan::GetImageLayout(access));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUContextVulkan::MemoryBarrier()
|
||||||
|
{
|
||||||
|
AddMemoryBarrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUContextVulkan::OverlapUA(bool end)
|
||||||
|
{
|
||||||
|
if (end)
|
||||||
|
AddUABarrier();
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ class DescriptorSetLayoutVulkan;
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Size of the pipeline barriers buffer size (will be auto-flushed on overflow).
|
/// Size of the pipeline barriers buffer size (will be auto-flushed on overflow).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
#define VK_BARRIER_BUFFER_SIZE 16
|
#define VK_BARRIER_BUFFER_SIZE 64
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The Vulkan pipeline resources layout barrier batching structure.
|
/// The Vulkan pipeline resources layout barrier batching structure.
|
||||||
@@ -45,18 +45,19 @@ struct PipelineBarrierVulkan
|
|||||||
VkPipelineStageFlags DestStage = 0;
|
VkPipelineStageFlags DestStage = 0;
|
||||||
Array<VkImageMemoryBarrier, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> ImageBarriers;
|
Array<VkImageMemoryBarrier, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> ImageBarriers;
|
||||||
Array<VkBufferMemoryBarrier, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> BufferBarriers;
|
Array<VkBufferMemoryBarrier, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> BufferBarriers;
|
||||||
|
Array<VkMemoryBarrier, FixedAllocation<4>> MemoryBarriers;
|
||||||
#if VK_ENABLE_BARRIERS_DEBUG
|
#if VK_ENABLE_BARRIERS_DEBUG
|
||||||
Array<GPUTextureViewVulkan*, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> ImageBarriersDebug;
|
Array<GPUTextureViewVulkan*, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> ImageBarriersDebug;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FORCE_INLINE bool IsFull() const
|
FORCE_INLINE bool IsFull() const
|
||||||
{
|
{
|
||||||
return ImageBarriers.Count() == VK_BARRIER_BUFFER_SIZE || BufferBarriers.Count() == VK_BARRIER_BUFFER_SIZE;
|
return ImageBarriers.Count() == VK_BARRIER_BUFFER_SIZE || BufferBarriers.Count() == VK_BARRIER_BUFFER_SIZE || MemoryBarriers.Count() == 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCE_INLINE bool HasBarrier() const
|
FORCE_INLINE bool HasBarrier() const
|
||||||
{
|
{
|
||||||
return ImageBarriers.Count() + BufferBarriers.Count() != 0;
|
return ImageBarriers.Count() + BufferBarriers.Count() + MemoryBarriers.Count() != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Execute(const CmdBufferVulkan* cmdBuffer);
|
void Execute(const CmdBufferVulkan* cmdBuffer);
|
||||||
@@ -130,6 +131,8 @@ public:
|
|||||||
void AddImageBarrier(GPUTextureVulkan* texture, int32 mipSlice, int32 arraySlice, VkImageLayout dstLayout);
|
void AddImageBarrier(GPUTextureVulkan* texture, int32 mipSlice, int32 arraySlice, VkImageLayout dstLayout);
|
||||||
void AddImageBarrier(GPUTextureVulkan* texture, VkImageLayout dstLayout);
|
void AddImageBarrier(GPUTextureVulkan* texture, VkImageLayout dstLayout);
|
||||||
void AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags dstAccess);
|
void AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags dstAccess);
|
||||||
|
void AddMemoryBarrier();
|
||||||
|
void AddUABarrier();
|
||||||
|
|
||||||
void FlushBarriers();
|
void FlushBarriers();
|
||||||
|
|
||||||
@@ -199,6 +202,9 @@ public:
|
|||||||
void CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBuffer* srcBuffer) override;
|
void CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBuffer* srcBuffer) override;
|
||||||
void CopyResource(GPUResource* dstResource, GPUResource* srcResource) override;
|
void CopyResource(GPUResource* dstResource, GPUResource* srcResource) override;
|
||||||
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
|
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
|
||||||
|
void Transition(GPUResource* resource, GPUResourceAccess access) override;
|
||||||
|
void MemoryBarrier() override;
|
||||||
|
void OverlapUA(bool end) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
#include "RenderToolsVulkan.h"
|
#include "RenderToolsVulkan.h"
|
||||||
#include "Engine/Core/Types/StringBuilder.h"
|
#include "Engine/Core/Types/StringBuilder.h"
|
||||||
#include "Engine/Core/Log.h"
|
#include "Engine/Core/Log.h"
|
||||||
|
#include "Engine/Graphics/GPUResourceAccess.h"
|
||||||
|
|
||||||
// @formatter:off
|
// @formatter:off
|
||||||
|
|
||||||
@@ -258,6 +259,80 @@ void RenderToolsVulkan::LogVkResult(VkResult result, const char* file, uint32 li
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VkAccessFlags RenderToolsVulkan::GetAccess(GPUResourceAccess access)
|
||||||
|
{
|
||||||
|
switch (access)
|
||||||
|
{
|
||||||
|
case GPUResourceAccess::None:
|
||||||
|
return VK_ACCESS_NONE;
|
||||||
|
case GPUResourceAccess::CopyRead:
|
||||||
|
return VK_ACCESS_TRANSFER_READ_BIT;
|
||||||
|
case GPUResourceAccess::CopyWrite:
|
||||||
|
return VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||||
|
case GPUResourceAccess::CpuRead:
|
||||||
|
return VK_ACCESS_HOST_READ_BIT;
|
||||||
|
case GPUResourceAccess::CpuWrite:
|
||||||
|
return VK_ACCESS_HOST_WRITE_BIT;
|
||||||
|
case GPUResourceAccess::DepthRead:
|
||||||
|
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||||
|
case GPUResourceAccess::DepthWrite:
|
||||||
|
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||||
|
case GPUResourceAccess::DepthBuffer:
|
||||||
|
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||||
|
case GPUResourceAccess::RenderTarget:
|
||||||
|
return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||||
|
case GPUResourceAccess::UnorderedAccess:
|
||||||
|
return VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
case GPUResourceAccess::IndirectArgs:
|
||||||
|
return VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
|
||||||
|
case GPUResourceAccess::ShaderReadCompute:
|
||||||
|
case GPUResourceAccess::ShaderReadPixel:
|
||||||
|
case GPUResourceAccess::ShaderReadNonPixel:
|
||||||
|
case GPUResourceAccess::ShaderReadGraphics:
|
||||||
|
return VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
#if !BUILD_RELEASE
|
||||||
|
default:
|
||||||
|
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return VK_ACCESS_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkImageLayout RenderToolsVulkan::GetImageLayout(GPUResourceAccess access)
|
||||||
|
{
|
||||||
|
switch (access)
|
||||||
|
{
|
||||||
|
case GPUResourceAccess::None:
|
||||||
|
return VK_IMAGE_LAYOUT_UNDEFINED;
|
||||||
|
case GPUResourceAccess::CopyRead:
|
||||||
|
return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||||
|
case GPUResourceAccess::CopyWrite:
|
||||||
|
return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||||
|
case GPUResourceAccess::CpuRead:
|
||||||
|
case GPUResourceAccess::CpuWrite:
|
||||||
|
return VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
case GPUResourceAccess::DepthRead:
|
||||||
|
return VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL;
|
||||||
|
case GPUResourceAccess::DepthWrite:
|
||||||
|
case GPUResourceAccess::DepthBuffer:
|
||||||
|
return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
|
||||||
|
return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
|
||||||
|
case GPUResourceAccess::RenderTarget:
|
||||||
|
return VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
|
||||||
|
case GPUResourceAccess::UnorderedAccess:
|
||||||
|
case GPUResourceAccess::ShaderReadCompute:
|
||||||
|
case GPUResourceAccess::ShaderReadPixel:
|
||||||
|
case GPUResourceAccess::ShaderReadNonPixel:
|
||||||
|
case GPUResourceAccess::ShaderReadGraphics:
|
||||||
|
return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||||
|
#if !BUILD_RELEASE
|
||||||
|
default:
|
||||||
|
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return VK_IMAGE_LAYOUT_UNDEFINED;
|
||||||
|
}
|
||||||
|
|
||||||
bool RenderToolsVulkan::HasExtension(const Array<const char*>& extensions, const char* name)
|
bool RenderToolsVulkan::HasExtension(const Array<const char*>& extensions, const char* name)
|
||||||
{
|
{
|
||||||
for (int32 i = 0; i < extensions.Count(); i++)
|
for (int32 i = 0; i < extensions.Count(); i++)
|
||||||
|
|||||||
@@ -20,6 +20,8 @@
|
|||||||
#define VK_SET_DEBUG_NAME(device, handle, type, name)
|
#define VK_SET_DEBUG_NAME(device, handle, type, name)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
enum class GPUResourceAccess;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Set of utilities for rendering on Vulkan platform.
|
/// Set of utilities for rendering on Vulkan platform.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -40,6 +42,9 @@ public:
|
|||||||
static String GetVkErrorString(VkResult result);
|
static String GetVkErrorString(VkResult result);
|
||||||
static void LogVkResult(VkResult result, const char* file = nullptr, uint32 line = 0, bool fatal = false);
|
static void LogVkResult(VkResult result, const char* file = nullptr, uint32 line = 0, bool fatal = false);
|
||||||
|
|
||||||
|
static VkAccessFlags GetAccess(GPUResourceAccess access);
|
||||||
|
static VkImageLayout GetImageLayout(GPUResourceAccess access);
|
||||||
|
|
||||||
static inline VkPipelineStageFlags GetBufferBarrierFlags(VkAccessFlags accessFlags)
|
static inline VkPipelineStageFlags GetBufferBarrierFlags(VkAccessFlags accessFlags)
|
||||||
{
|
{
|
||||||
VkPipelineStageFlags stageFlags = (VkPipelineStageFlags)0;
|
VkPipelineStageFlags stageFlags = (VkPipelineStageFlags)0;
|
||||||
@@ -67,6 +72,9 @@ public:
|
|||||||
case VK_ACCESS_SHADER_WRITE_BIT:
|
case VK_ACCESS_SHADER_WRITE_BIT:
|
||||||
stageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
stageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||||
break;
|
break;
|
||||||
|
case VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT:
|
||||||
|
stageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||||
|
break;
|
||||||
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
|
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
|
||||||
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
|
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
|
||||||
stageFlags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
stageFlags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* indicesBuffer, GPUBuffer*
|
|||||||
if (checkIfSkipPass())
|
if (checkIfSkipPass())
|
||||||
return;
|
return;
|
||||||
PROFILE_GPU_CPU("Bitonic Sort");
|
PROFILE_GPU_CPU("Bitonic Sort");
|
||||||
uint32 maxNumElements = indicesBuffer->GetElementsCount();
|
int32 maxNumElements = (int32)indicesBuffer->GetElementsCount();
|
||||||
if (maxElements > 0 && maxElements < maxNumElements)
|
if (maxElements > 0 && maxElements < maxNumElements)
|
||||||
maxNumElements = maxElements;
|
maxNumElements = maxElements;
|
||||||
const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements);
|
const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements);
|
||||||
|
|||||||
Reference in New Issue
Block a user