Optimize textures/buffers uploading on Vulkan with page allocator

This commit is contained in:
Wojtek Figat
2025-09-04 21:38:07 +02:00
parent cd22cd059d
commit 831fb0f442
10 changed files with 427 additions and 441 deletions

View File

@@ -1119,7 +1119,7 @@ void GPUContextDX12::UpdateCB(GPUConstantBuffer* cb, const void* data)
return;
// Allocate bytes for the buffer
DynamicAllocation allocation = _device->UploadBuffer->Allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
auto allocation = _device->UploadBuffer.Allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
// Copy data
Platform::MemoryCopy(allocation.CPUAddress, data, allocation.Size);
@@ -1343,7 +1343,7 @@ void GPUContextDX12::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 si
SetResourceState(bufferDX12, D3D12_RESOURCE_STATE_COPY_DEST);
flushRBs();
_device->UploadBuffer->UploadBuffer(this, bufferDX12->GetResource(), offset, data, size);
_device->UploadBuffer.UploadBuffer(GetCommandList(), bufferDX12->GetResource(), offset, data, size);
}
void GPUContextDX12::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset)
@@ -1369,7 +1369,7 @@ void GPUContextDX12::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32
SetResourceState(textureDX12, D3D12_RESOURCE_STATE_COPY_DEST);
flushRBs();
_device->UploadBuffer->UploadTexture(this, textureDX12->GetResource(), data, rowPitch, slicePitch, mipIndex, arrayIndex);
_device->UploadBuffer.UploadTexture(GetCommandList(), textureDX12->GetResource(), data, rowPitch, slicePitch, mipIndex, arrayIndex);
}
void GPUContextDX12::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource)
@@ -1424,7 +1424,7 @@ void GPUContextDX12::ResetCounter(GPUBuffer* buffer)
flushRBs();
uint32 value = 0;
_device->UploadBuffer->UploadBuffer(this, counter->GetResource(), 0, &value, 4);
_device->UploadBuffer.UploadBuffer(GetCommandList(), counter->GetResource(), 0, &value, 4);
SetResourceState(counter, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}

View File

@@ -244,7 +244,7 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
, _rootSignature(nullptr)
, _commandQueue(nullptr)
, _mainContext(nullptr)
, UploadBuffer(nullptr)
, UploadBuffer(this)
, TimestampQueryHeap(this, D3D12_QUERY_HEAP_TYPE_TIMESTAMP, DX12_BACK_BUFFER_COUNT * 1024)
, Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false)
, Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false)
@@ -701,9 +701,6 @@ bool GPUDeviceDX12::Init()
VALIDATE_DIRECTX_CALL(_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&_rootSignature)));
}
// Upload buffer
UploadBuffer = New<UploadBufferDX12>(this);
if (TimestampQueryHeap.Init())
return true;
@@ -740,7 +737,7 @@ void GPUDeviceDX12::DrawBegin()
GPUDeviceDX::DrawBegin();
updateRes2Dispose();
UploadBuffer->BeginGeneration(Engine::FrameCount);
UploadBuffer.BeginGeneration(Engine::FrameCount);
}
void GPUDeviceDX12::RenderEnd()
@@ -811,7 +808,7 @@ void GPUDeviceDX12::Dispose()
Heap_Sampler.ReleaseGPU();
RingHeap_CBV_SRV_UAV.ReleaseGPU();
RingHeap_Sampler.ReleaseGPU();
SAFE_DELETE(UploadBuffer);
UploadBuffer.ReleaseGPU();
SAFE_DELETE(DrawIndirectCommandSignature);
SAFE_DELETE(_mainContext);
SAFE_DELETE(_commandQueue);

View File

@@ -8,6 +8,7 @@
#include "Engine/Graphics/GPUResource.h"
#include "../IncludeDirectXHeaders.h"
#include "ResourceOwnerDX12.h"
#include "UploadBufferDX12.h"
#include "QueryHeapDX12.h"
#include "DescriptorHeapDX12.h"
@@ -26,7 +27,6 @@ class Engine;
class WindowsWindow;
class GPUContextDX12;
class GPUSwapChainDX12;
class UploadBufferDX12;
class CommandQueueDX12;
class CommandSignatureDX12;
@@ -70,11 +70,10 @@ public:
~GPUDeviceDX12();
public:
/// <summary>
/// Upload buffer for general purpose
/// Data uploading utility via pages.
/// </summary>
UploadBufferDX12* UploadBuffer;
UploadBufferDX12 UploadBuffer;
/// <summary>
/// The timestamp queries heap.

View File

@@ -4,10 +4,57 @@
#include "UploadBufferDX12.h"
#include "GPUTextureDX12.h"
#include "GPUContextDX12.h"
#include "../RenderToolsDX.h"
#include "Engine/Graphics/GPUResource.h"
#include "Engine/Profiler/ProfilerMemory.h"
/// <summary>
/// Single page for the upload buffer
/// </summary>
class UploadBufferPageDX12 : public GPUResourceBase<GPUDeviceDX12, GPUResource>, public ResourceOwnerDX12
{
public:
UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size);
public:
/// <summary>
/// Last generation that has been using that page
/// </summary>
uint64 LastGen;
/// <summary>
/// CPU memory address of the page
/// </summary>
void* CPUAddress;
/// <summary>
/// GPU memory address of the page
/// </summary>
D3D12_GPU_VIRTUAL_ADDRESS GPUAddress;
/// <summary>
/// Page size in bytes
/// </summary>
uint64 Size;
public:
// [GPUResourceDX12]
GPUResourceType GetResourceType() const final override
{
return GPUResourceType::Buffer;
}
// [ResourceOwnerDX12]
GPUResource* AsGPUResource() const override
{
return (GPUResource*)this;
}
protected:
// [GPUResourceDX12]
void OnReleaseGPU() final override;
};
UploadBufferDX12::UploadBufferDX12(GPUDeviceDX12* device)
: _device(device)
, _currentPage(nullptr)
@@ -16,24 +63,11 @@ UploadBufferDX12::UploadBufferDX12(GPUDeviceDX12* device)
{
}
UploadBufferDX12::~UploadBufferDX12()
{
_freePages.Add(_usedPages);
for (auto page : _freePages)
{
page->ReleaseGPU();
Delete(page);
}
}
DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align)
UploadBufferDX12::Allocation UploadBufferDX12::Allocate(uint64 size, uint64 align)
{
const uint64 alignmentMask = align - 1;
ASSERT((alignmentMask & align) == 0);
// Check if use default or bigger page
const bool useDefaultSize = size <= DX12_DEFAULT_UPLOAD_PAGE_SIZE;
const uint64 pageSize = useDefaultSize ? DX12_DEFAULT_UPLOAD_PAGE_SIZE : size;
ASSERT_LOW_LAYER((alignmentMask & align) == 0);
const uint64 pageSize = Math::Max<uint64>(size, DX12_DEFAULT_UPLOAD_PAGE_SIZE);
const uint64 alignedSize = Math::AlignUpWithMask(size, alignmentMask);
// Align the allocation
@@ -41,14 +75,26 @@ DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align)
// Check if there is enough space for that chunk of the data in the current page
if (_currentPage && _currentOffset + alignedSize > _currentPage->Size)
{
_currentPage = nullptr;
}
// Check if need to get new page
if (_currentPage == nullptr)
{
_currentPage = requestPage(pageSize);
// Try reusing existing page
for (int32 i = 0; i < _freePages.Count(); i++)
{
UploadBufferPageDX12* page = _freePages.Get()[i];
if (page->Size == pageSize)
{
_freePages.RemoveAt(i);
_currentPage = page;
break;
}
}
if (_currentPage == nullptr)
_currentPage = New<UploadBufferPageDX12>(_device, pageSize);
_usedPages.Add(_currentPage);
ASSERT_LOW_LAYER(_currentPage->GetResource());
_currentOffset = 0;
}
@@ -56,32 +102,27 @@ DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align)
_currentPage->LastGen = _currentGeneration;
// Create allocation result
const DynamicAllocation result(static_cast<byte*>(_currentPage->CPUAddress) + _currentOffset, _currentOffset, size, _currentPage->GPUAddress + _currentOffset, _currentPage, _currentGeneration);
const Allocation result { (byte*)_currentPage->CPUAddress + _currentOffset, _currentOffset, size, _currentPage->GPUAddress + _currentOffset, _currentPage->GetResource(), _currentGeneration };
// Move in the page
// Move within a page
_currentOffset += size;
ASSERT(_currentPage->GetResource());
return result;
}
bool UploadBufferDX12::UploadBuffer(GPUContextDX12* context, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size)
void UploadBufferDX12::UploadBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size)
{
// Allocate data
const DynamicAllocation allocation = Allocate(size, 4);
if (allocation.IsInvalid())
return true;
const auto allocation = Allocate(size, GPU_SHADER_DATA_ALIGNMENT);
// Copy data
Platform::MemoryCopy(allocation.CPUAddress, data, static_cast<size_t>(size));
Platform::MemoryCopy(allocation.CPUAddress, data, size);
// Copy buffer region
context->GetCommandList()->CopyBufferRegion(buffer, bufferOffset, allocation.Page->GetResource(), allocation.Offset, size);
return false;
commandList->CopyBufferRegion(buffer, bufferOffset, allocation.Resource, allocation.Offset, size);
}
bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex)
void UploadBufferDX12::UploadTexture(ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex)
{
D3D12_RESOURCE_DESC resourceDesc = texture->GetDesc();
const UINT subresourceIndex = RenderToolsDX::CalcSubresourceIndex(mipIndex, arrayIndex, resourceDesc.MipLevels);
@@ -95,9 +136,7 @@ bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* te
const uint64 sliceSizeAligned = numSlices * mipSizeAligned;
// Allocate data
const DynamicAllocation allocation = Allocate(sliceSizeAligned, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
if (allocation.Size != sliceSizeAligned)
return true;
const auto allocation = Allocate(sliceSizeAligned, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
byte* ptr = (byte*)srcData;
ASSERT(srcSlicePitch <= sliceSizeAligned);
@@ -128,15 +167,13 @@ bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* te
// Source buffer copy location description
D3D12_TEXTURE_COPY_LOCATION srcLocation;
srcLocation.pResource = allocation.Page->GetResource();
srcLocation.pResource = allocation.Resource;
srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
srcLocation.PlacedFootprint.Offset = allocation.Offset;
srcLocation.PlacedFootprint.Footprint = footprint.Footprint;
// Copy texture region
context->GetCommandList()->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr);
return false;
commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr);
}
void UploadBufferDX12::BeginGeneration(uint64 generation)
@@ -170,41 +207,18 @@ void UploadBufferDX12::BeginGeneration(uint64 generation)
_currentGeneration = generation;
}
UploadBufferPageDX12* UploadBufferDX12::requestPage(uint64 size)
void UploadBufferDX12::ReleaseGPU()
{
// Try to find valid page
int32 freePageIndex = -1;
for (int32 i = 0; i < _freePages.Count(); i++)
_freePages.Add(_usedPages);
for (auto page : _freePages)
{
if (_freePages[i]->Size == size)
{
freePageIndex = i;
break;
}
page->ReleaseGPU();
Delete(page);
}
// Check if create a new page
UploadBufferPageDX12* page;
if (freePageIndex == -1)
{
// Get a new page to use
page = New<UploadBufferPageDX12>(_device, size);
}
else
{
// Remove from free pages
page = _freePages[freePageIndex];
_freePages.RemoveAt(freePageIndex);
}
// Mark page as used
_usedPages.Add(page);
return page;
}
UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size)
: GPUResourceDX12(device, TEXT("Upload Buffer Page"))
: GPUResourceBase(device, TEXT("Upload Buffer Page"))
, LastGen(0)
, CPUAddress(nullptr)
, GPUAddress(0)
@@ -234,7 +248,7 @@ UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size)
// Set state
initResource(resource, D3D12_RESOURCE_STATE_GENERIC_READ, 1);
DX_SET_DEBUG_NAME(_resource, GPUResourceDX12::GetName());
DX_SET_DEBUG_NAME(_resource, GetName());
_memoryUsage = size;
PROFILE_MEM_INC(GraphicsCommands, _memoryUsage);
GPUAddress = _resource->GetGPUVirtualAddress();
@@ -249,9 +263,7 @@ void UploadBufferPageDX12::OnReleaseGPU()
// Unmap
if (_resource && CPUAddress)
{
_resource->Unmap(0, nullptr);
}
GPUAddress = 0;
CPUAddress = nullptr;

View File

@@ -2,11 +2,15 @@
#pragma once
#include "GPUDeviceDX12.h"
#include "Engine/Graphics/GPUDevice.h"
#include "ResourceOwnerDX12.h"
#if GRAPHICS_API_DIRECTX12
class GPUDeviceDX12;
class UploadBufferPageDX12;
// Upload buffer page size
#define DX12_DEFAULT_UPLOAD_PAGE_SIZE (4 * 1014 * 1024) // 4 MB
// Upload buffer generations timeout to dispose
@@ -15,223 +19,93 @@
// Upload buffer pages that are not used for a few frames are disposed
#define DX12_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT 60
class GPUTextureDX12;
/// <summary>
/// Single page for the upload buffer
/// </summary>
class UploadBufferPageDX12 : public GPUResourceDX12<GPUResource>, public ResourceOwnerDX12
{
public:
/// <summary>
/// Init
/// </summary>
/// <param name="device">Graphics Device</param>
/// <param name="size">Page size</param>
UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size);
public:
/// <summary>
/// Last generation that has been using that page
/// </summary>
uint64 LastGen;
/// <summary>
/// CPU memory address of the page
/// </summary>
void* CPUAddress;
/// <summary>
/// GPU memory address of the page
/// </summary>
D3D12_GPU_VIRTUAL_ADDRESS GPUAddress;
/// <summary>
/// Page size in bytes
/// </summary>
uint64 Size;
public:
// [GPUResourceDX12]
GPUResourceType GetResourceType() const final override
{
return GPUResourceType::Buffer;
}
// [ResourceOwnerDX12]
GPUResource* AsGPUResource() const override
{
return (GPUResource*)this;
}
protected:
// [GPUResourceDX12]
void OnReleaseGPU() final override;
};
/// <summary>
/// Upload buffer allocation
/// </summary>
struct DynamicAllocation
{
/// <summary>
/// CPU memory address of the allocation start.
/// </summary>
void* CPUAddress;
/// <summary>
/// Allocation offset in bytes (from the start of the heap buffer).
/// </summary>
uint64 Offset;
/// <summary>
/// Allocation size in bytes
/// </summary>
uint64 Size;
/// <summary>
/// GPU virtual memory address of the allocation start.
/// </summary>
D3D12_GPU_VIRTUAL_ADDRESS GPUAddress;
/// <summary>
/// Upload buffer page that owns that allocation
/// </summary>
UploadBufferPageDX12* Page;
/// <summary>
/// Generation number of that allocation (generally allocation is invalid after one or two generations)
/// </summary>
uint64 Generation;
/// <summary>
/// Init
/// </summary>
DynamicAllocation()
: CPUAddress(nullptr)
, Offset(0)
, Size(0)
, GPUAddress(0)
, Page(nullptr)
, Generation(0)
{
}
/// <summary>
/// Init
/// </summary>
/// <param name="address">CPU memory address</param>
/// <param name="offset">Offset in byes</param>
/// <param name="size">Size in byes</param>
/// <param name="gpuAddress">GPU memory address</param>
/// <param name="page">Parent page</param>
/// <param name="generation">Generation</param>
DynamicAllocation(void* address, uint64 offset, uint64 size, D3D12_GPU_VIRTUAL_ADDRESS gpuAddress, UploadBufferPageDX12* page, uint64 generation)
: CPUAddress(address)
, Offset(offset)
, Size(size)
, GPUAddress(gpuAddress)
, Page(page)
, Generation(generation)
{
}
/// <summary>
/// Returns true if allocation is invalid.
/// </summary>
bool IsInvalid() const
{
return CPUAddress == nullptr || Size == 0 || Page == nullptr;
}
};
/// <summary>
/// Uploading data to GPU buffer utility
/// </summary>
class UploadBufferDX12
{
private:
public:
/// <summary>
/// Upload buffer allocation
/// </summary>
struct Allocation
{
/// <summary>
/// CPU memory address of the allocation start.
/// </summary>
void* CPUAddress;
/// <summary>
/// Allocation offset in bytes (from the start of the heap buffer).
/// </summary>
uint64 Offset;
/// <summary>
/// Allocation size in bytes
/// </summary>
uint64 Size;
/// <summary>
/// GPU virtual memory address of the allocation start.
/// </summary>
D3D12_GPU_VIRTUAL_ADDRESS GPUAddress;
/// <summary>
/// Upload buffer page resource that owns that allocation
/// </summary>
ID3D12Resource* Resource;
/// <summary>
/// Generation number of that allocation (generally allocation is invalid after one or two generations)
/// </summary>
uint64 Generation;
};
private:
GPUDeviceDX12* _device;
UploadBufferPageDX12* _currentPage;
uint64 _currentOffset;
uint64 _currentGeneration;
Array<UploadBufferPageDX12*, InlinedAllocation<64>> _freePages;
Array<UploadBufferPageDX12*, InlinedAllocation<64>> _usedPages;
public:
/// <summary>
/// Init
/// </summary>
/// <param name="device">Graphics Device</param>
UploadBufferDX12(GPUDeviceDX12* device);
/// <summary>
/// Destructor
/// </summary>
~UploadBufferDX12();
public:
/// <summary>
/// Gets the current generation number.
/// </summary>
FORCE_INLINE uint64 GetCurrentGeneration() const
{
return _currentGeneration;
}
public:
/// <summary>
/// Allocates memory for custom data in the buffer.
/// </summary>
/// <param name="size">Size of the data in bytes</param>
/// <param name="align">Data alignment in buffer in bytes</param>
/// <returns>Dynamic location</returns>
DynamicAllocation Allocate(uint64 size, uint64 align);
Allocation Allocate(uint64 size, uint64 align);
/// <summary>
/// Uploads data to the buffer.
/// </summary>
/// <param name="context">GPU context to record upload command to it</param>
/// <param name="commandList">GPU command list to record upload command to it</param>
/// <param name="buffer">Destination buffer</param>
/// <param name="bufferOffset">Destination buffer offset in bytes.</param>
/// <param name="data">Data to allocate</param>
/// <param name="size">Size of the data in bytes</param>
/// <returns>True if cannot upload data, otherwise false.</returns>
bool UploadBuffer(GPUContextDX12* context, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size);
void UploadBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size);
/// <summary>
/// Uploads data to the texture.
/// </summary>
/// <param name="context">GPU context to record upload command to it</param>
/// <param name="commandList">GPU command list to record upload command to it</param>
/// <param name="texture">Destination texture</param>
/// <param name="srcData">Data to allocate</param>
/// <param name="srcRowPitch">Source data row pitch value to upload.</param>
/// <param name="srcSlicePitch">Source data slice pitch value to upload.</param>
/// <param name="mipIndex">Mip map to stream index</param>
/// <param name="arrayIndex">Texture array index</param>
/// <returns>True if cannot upload data, otherwise false.</returns>
bool UploadTexture(GPUContextDX12* context, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex);
void UploadTexture(ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex);
public:
/// <summary>
/// Begins new generation.
/// </summary>
/// <param name="generation">The generation ID to begin.</param>
void BeginGeneration(uint64 generation);
private:
UploadBufferPageDX12* requestPage(uint64 size);
void ReleaseGPU();
};
#endif

View File

@@ -1386,16 +1386,13 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32
}
else
{
auto staging = _device->StagingManager.AcquireBuffer(size, GPUResourceUsage::StagingUpload);
staging->SetData(data, size);
auto allocation = _device->UploadBuffer.Upload(data, size, 4);
VkBufferCopy region;
region.size = size;
region.srcOffset = 0;
region.srcOffset = allocation.Offset;
region.dstOffset = offset;
vkCmdCopyBuffer(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)staging)->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), 1, &region);
_device->StagingManager.ReleaseBuffer(cmdBuffer, staging);
vkCmdCopyBuffer(cmdBuffer->GetHandle(), allocation.Buffer, ((GPUBufferVulkan*)buffer)->GetHandle(), 1, &region);
}
// Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch)
@@ -1444,14 +1441,14 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3
AddImageBarrier(textureVulkan, mipIndex, arrayIndex, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
FlushBarriers();
auto buffer = _device->StagingManager.AcquireBuffer(slicePitch, GPUResourceUsage::StagingUpload);
buffer->SetData(data, slicePitch);
auto allocation = _device->UploadBuffer.Upload(data, slicePitch, 512);
// Setup buffer copy region
int32 mipWidth, mipHeight, mipDepth;
texture->GetMipSize(mipIndex, mipWidth, mipHeight, mipDepth);
VkBufferImageCopy bufferCopyRegion;
Platform::MemoryClear(&bufferCopyRegion, sizeof(bufferCopyRegion));
bufferCopyRegion.bufferOffset = allocation.Offset;
bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
bufferCopyRegion.imageSubresource.mipLevel = mipIndex;
bufferCopyRegion.imageSubresource.baseArrayLayer = arrayIndex;
@@ -1461,9 +1458,7 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3
bufferCopyRegion.imageExtent.depth = static_cast<uint32_t>(mipDepth);
// Copy mip level from staging buffer
vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion);
_device->StagingManager.ReleaseBuffer(cmdBuffer, buffer);
vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), allocation.Buffer, textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion);
}
void GPUContextVulkan::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource)

View File

@@ -972,133 +972,6 @@ void HelperResourcesVulkan::Dispose()
}
}
StagingManagerVulkan::StagingManagerVulkan(GPUDeviceVulkan* device)
: _device(device)
{
}
GPUBuffer* StagingManagerVulkan::AcquireBuffer(uint32 size, GPUResourceUsage usage)
{
// Try reuse free buffer
{
ScopeLock lock(_locker);
for (int32 i = 0; i < _freeBuffers.Count(); i++)
{
auto& freeBuffer = _freeBuffers[i];
if (freeBuffer.Buffer->GetSize() == size && freeBuffer.Buffer->GetDescription().Usage == usage)
{
const auto buffer = freeBuffer.Buffer;
_freeBuffers.RemoveAt(i);
return buffer;
}
}
}
// Allocate new buffer
auto buffer = _device->CreateBuffer(TEXT("Pooled Staging"));
if (buffer->Init(GPUBufferDescription::Buffer(size, GPUBufferFlags::None, PixelFormat::Unknown, nullptr, 0, usage)))
{
LOG(Warning, "Failed to create pooled staging buffer.");
return nullptr;
}
// Cache buffer
{
ScopeLock lock(_locker);
_allBuffers.Add(buffer);
#if !BUILD_RELEASE
_allBuffersAllocSize += size;
_allBuffersTotalSize += size;
_allBuffersPeekSize = Math::Max(_allBuffersTotalSize, _allBuffersPeekSize);
#endif
}
return buffer;
}
void StagingManagerVulkan::ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer)
{
ScopeLock lock(_locker);
if (cmdBuffer)
{
// Return to pending pool (need to wait until command buffer will be executed and buffer will be reusable)
auto& item = _pendingBuffers.AddOne();
item.Buffer = buffer;
item.CmdBuffer = cmdBuffer;
item.FenceCounter = cmdBuffer->GetFenceSignaledCounter();
}
else
{
// Return to pool
_freeBuffers.Add({ buffer, Engine::FrameCount });
}
// Clear reference
buffer = nullptr;
}
void StagingManagerVulkan::ProcessPendingFree()
{
ScopeLock lock(_locker);
// Find staging buffers that has been processed by the GPU and can be reused
for (int32 i = _pendingBuffers.Count() - 1; i >= 0; i--)
{
auto& e = _pendingBuffers[i];
if (e.FenceCounter < e.CmdBuffer->GetFenceSignaledCounter())
{
// Return to pool
_freeBuffers.Add({ e.Buffer, Engine::FrameCount });
_pendingBuffers.RemoveAt(i);
}
}
// Free staging buffers that has not been used for a few frames
for (int32 i = _freeBuffers.Count() - 1; i >= 0; i--)
{
auto& e = _freeBuffers.Get()[i];
if (e.FrameNumber + VULKAN_RESOURCE_DELETE_SAFE_FRAMES_COUNT < Engine::FrameCount)
{
auto buffer = e.Buffer;
// Remove buffer from lists
_allBuffers.Remove(buffer);
_freeBuffers.RemoveAt(i);
#if !BUILD_RELEASE
// Update stats
_allBuffersFreeSize += buffer->GetSize();
_allBuffersTotalSize -= buffer->GetSize();
#endif
// Release memory
buffer->ReleaseGPU();
Delete(buffer);
}
}
}
void StagingManagerVulkan::Dispose()
{
ScopeLock lock(_locker);
#if BUILD_DEBUG
LOG(Info, "Vulkan staging buffers peek memory usage: {0}, allocs: {1}, frees: {2}", Utilities::BytesToText(_allBuffersPeekSize), Utilities::BytesToText(_allBuffersAllocSize), Utilities::BytesToText(_allBuffersFreeSize));
#endif
// Release buffers and clear memory
for (auto buffer : _allBuffers)
{
buffer->ReleaseGPU();
Delete(buffer);
}
_allBuffers.Resize(0);
_pendingBuffers.Resize(0);
}
GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan* adapter)
: GPUDevice(RendererType::Vulkan, shaderProfile)
, _renderPasses(512)
@@ -1106,7 +979,7 @@ GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan*
, _layouts(4096)
, Adapter(adapter)
, DeferredDeletionQueue(this)
, StagingManager(this)
, UploadBuffer(this)
, HelperResources(this)
{
}
@@ -2088,8 +1961,8 @@ void GPUDeviceVulkan::DrawBegin()
// Flush resources
DeferredDeletionQueue.ReleaseResources();
StagingManager.ProcessPendingFree();
DescriptorPoolsManager->GC();
UploadBuffer.BeginGeneration(Engine::FrameCount);
#if VULKAN_USE_PIPELINE_CACHE
// Serialize pipeline cache periodically for less PSO hitches on next app run
@@ -2125,7 +1998,7 @@ void GPUDeviceVulkan::Dispose()
_renderPasses.ClearDelete();
_layouts.ClearDelete();
HelperResources.Dispose();
StagingManager.Dispose();
UploadBuffer.Dispose();
TimestampQueryPools.ClearDelete();
OcclusionQueryPools.ClearDelete();
SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader);

View File

@@ -7,6 +7,7 @@
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUResource.h"
#include "DescriptorSetVulkan.h"
#include "UploadBufferVulkan.h"
#include "IncludeVulkanHeaders.h"
#include "Config.h"
@@ -326,45 +327,6 @@ public:
void Dispose();
};
/// <summary>
/// Vulkan staging buffers manager.
/// </summary>
class StagingManagerVulkan
{
private:
struct PendingEntry
{
GPUBuffer* Buffer;
CmdBufferVulkan* CmdBuffer;
uint64 FenceCounter;
};
struct FreeEntry
{
GPUBuffer* Buffer;
uint64 FrameNumber;
};
GPUDeviceVulkan* _device;
CriticalSection _locker;
Array<GPUBuffer*> _allBuffers;
Array<FreeEntry> _freeBuffers;
Array<PendingEntry> _pendingBuffers;
#if !BUILD_RELEASE
uint64 _allBuffersTotalSize = 0;
uint64 _allBuffersPeekSize = 0;
uint64 _allBuffersAllocSize = 0;
uint64 _allBuffersFreeSize = 0;
#endif
public:
StagingManagerVulkan(GPUDeviceVulkan* device);
GPUBuffer* AcquireBuffer(uint32 size, GPUResourceUsage usage);
void ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer);
void ProcessPendingFree();
void Dispose();
};
/// <summary>
/// Implementation of Graphics Device for Vulkan backend.
/// </summary>
@@ -464,9 +426,9 @@ public:
DeferredDeletionQueueVulkan DeferredDeletionQueue;
/// <summary>
/// The staging buffers manager.
/// Data uploading utility via pages.
/// </summary>
StagingManagerVulkan StagingManager;
UploadBufferVulkan UploadBuffer;
/// <summary>
/// The helper device resources manager.

View File

@@ -0,0 +1,195 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#if GRAPHICS_API_VULKAN
#include "UploadBufferVulkan.h"
#include "GPUDeviceVulkan.h"
#include "RenderToolsVulkan.h"
#include "Engine/Graphics/GPUResource.h"
#include "Engine/Profiler/ProfilerMemory.h"
/// <summary>
/// Single page for the upload buffer
/// </summary>
class UploadBufferPageVulkan : public GPUResourceBase<GPUDeviceVulkan, GPUResource>, public ResourceOwnerVulkan
{
public:
UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size);
public:
/// <summary>
/// Last generation that has been using that page
/// </summary>
uint64 LastGen;
/// <summary>
/// Page size in bytes
/// </summary>
uint64 Size;
/// <summary>
/// CPU memory address of the page
/// </summary>
void* Mapped;
/// <summary>
/// Buffer that stored the page data
/// </summary>
VkBuffer Buffer;
/// <summary>
/// Buffer memory allocation
/// </summary>
VmaAllocation Allocation;
public:
// [GPUResourceVulkan]
GPUResourceType GetResourceType() const final override
{
return GPUResourceType::Buffer;
}
// [ResourceOwnerVulkan]
GPUResource* AsGPUResource() const override
{
return (GPUResource*)this;
}
protected:
// [GPUResourceVulkan]
void OnReleaseGPU() final override;
};
UploadBufferVulkan::UploadBufferVulkan(GPUDeviceVulkan* device)
: _device(device)
, _currentPage(nullptr)
, _currentOffset(0)
, _currentGeneration(0)
{
}
UploadBufferVulkan::Allocation UploadBufferVulkan::Allocate(uint64 size, uint64 align)
{
const uint64 alignmentMask = align - 1;
ASSERT_LOW_LAYER((alignmentMask & align) == 0);
const uint64 pageSize = Math::Max<uint64>(size, VULKAN_DEFAULT_UPLOAD_PAGE_SIZE);
const uint64 alignedSize = Math::AlignUpWithMask(size, alignmentMask);
// Align the allocation
_currentOffset = Math::AlignUpWithMask(_currentOffset, alignmentMask);
// Check if there is enough space for that chunk of the data in the current page
if (_currentPage && _currentOffset + alignedSize > _currentPage->Size)
_currentPage = nullptr;
// Check if need to get new page
if (_currentPage == nullptr)
{
// Try reusing existing page
for (int32 i = 0; i < _freePages.Count(); i++)
{
UploadBufferPageVulkan* page = _freePages.Get()[i];
if (page->Size == pageSize)
{
_freePages.RemoveAt(i);
_currentPage = page;
break;
}
}
if (_currentPage == nullptr)
_currentPage = New<UploadBufferPageVulkan>(_device, pageSize);
_usedPages.Add(_currentPage);
ASSERT_LOW_LAYER(_currentPage->Buffer);
_currentOffset = 0;
}
// Mark page as used in this generation
_currentPage->LastGen = _currentGeneration;
// Create allocation result
const Allocation result{ (byte*)_currentPage->Mapped + _currentOffset, _currentOffset, size, _currentPage->Buffer, _currentGeneration };
// Move within a page
_currentOffset += size;
return result;
}
UploadBufferVulkan::Allocation UploadBufferVulkan::Upload(const void* data, uint64 size, uint64 align)
{
auto allocation = Allocate(size, align);
Platform::MemoryCopy(allocation.Mapped, data, size);
return allocation;
}
void UploadBufferVulkan::BeginGeneration(uint64 generation)
{
// Restore ready pages to be reused
for (int32 i = 0; _usedPages.HasItems() && i < _usedPages.Count(); i++)
{
auto page = _usedPages[i];
if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT < generation)
{
_usedPages.RemoveAt(i);
i--;
_freePages.Add(page);
}
}
// Remove old pages
for (int32 i = _freePages.Count() - 1; i >= 0 && _freePages.HasItems(); i--)
{
auto page = _freePages[i];
if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT + VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT < generation)
{
_freePages.RemoveAt(i);
i--;
page->ReleaseGPU();
Delete(page);
}
}
// Set new generation
_currentGeneration = generation;
}
void UploadBufferVulkan::Dispose()
{
_freePages.Add(_usedPages);
for (auto page : _freePages)
{
page->ReleaseGPU();
Delete(page);
}
}
UploadBufferPageVulkan::UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size)
: GPUResourceBase(device, TEXT("Upload Buffer Page"))
, LastGen(0)
, Size(size)
{
VkBufferCreateInfo bufferInfo;
RenderToolsVulkan::ZeroStruct(bufferInfo, VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
bufferInfo.size = size;
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo;
vmaCreateBuffer(_device->Allocator, &bufferInfo, &allocCreateInfo, &Buffer, &Allocation, &allocInfo);
Mapped = allocInfo.pMappedData;
ASSERT_LOW_LAYER(Mapped);
_memoryUsage = size;
PROFILE_MEM_INC(GraphicsCommands, _memoryUsage);
}
void UploadBufferPageVulkan::OnReleaseGPU()
{
PROFILE_MEM_DEC(GraphicsCommands, _memoryUsage);
vmaDestroyBuffer(_device->Allocator, Buffer, Allocation);
Buffer = VK_NULL_HANDLE;
Allocation = VK_NULL_HANDLE;
Mapped = nullptr;
}
#endif

View File

@@ -0,0 +1,79 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#pragma once
#include "Engine/Graphics/GPUDevice.h"
#include "ResourceOwnerVulkan.h"
#if GRAPHICS_API_VULKAN
class GPUDeviceVulkan;
class UploadBufferPageVulkan;
// Upload buffer page size
#define VULKAN_DEFAULT_UPLOAD_PAGE_SIZE (4 * 1014 * 1024) // 4 MB
// Upload buffer generations timeout to dispose
#define VULKAN_UPLOAD_PAGE_GEN_TIMEOUT 3
// Upload buffer pages that are not used for a few frames are disposed
#define VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT 60
/// <summary>
/// Uploading data to GPU buffer utility
/// </summary>
class UploadBufferVulkan
{
public:
/// <summary>
/// Upload buffer allocation
/// </summary>
struct Allocation
{
/// <summary>
/// CPU memory address of the allocation start.
/// </summary>
void* Mapped;
/// <summary>
/// Allocation offset in bytes (from the start of the heap buffer).
/// </summary>
uint64 Offset;
/// <summary>
/// Allocation size in bytes
/// </summary>
uint64 Size;
/// <summary>
/// Upload buffer page resource that owns that allocation
/// </summary>
VkBuffer Buffer;
/// <summary>
/// Generation number of that allocation (generally allocation is invalid after one or two generations)
/// </summary>
uint64 Generation;
};
private:
GPUDeviceVulkan* _device;
UploadBufferPageVulkan* _currentPage;
uint64 _currentOffset;
uint64 _currentGeneration;
Array<UploadBufferPageVulkan*, InlinedAllocation<64>> _freePages;
Array<UploadBufferPageVulkan*, InlinedAllocation<64>> _usedPages;
public:
UploadBufferVulkan(GPUDeviceVulkan* device);
public:
Allocation Allocate(uint64 size, uint64 align);
Allocation Upload(const void* data, uint64 size, uint64 align);
public:
void BeginGeneration(uint64 generation);
void Dispose();
};
#endif