From 831fb0f4425aebd3bd6b2713fd08faa66a699681 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 4 Sep 2025 21:38:07 +0200 Subject: [PATCH] Optimize textures/buffers uploading on Vulkan with page allocator --- .../DirectX/DX12/GPUContextDX12.cpp | 8 +- .../DirectX/DX12/GPUDeviceDX12.cpp | 9 +- .../DirectX/DX12/GPUDeviceDX12.h | 7 +- .../DirectX/DX12/UploadBufferDX12.cpp | 154 ++++++------ .../DirectX/DX12/UploadBufferDX12.h | 222 ++++-------------- .../Vulkan/GPUContextVulkan.cpp | 17 +- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 133 +---------- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.h | 44 +--- .../Vulkan/UploadBufferVulkan.cpp | 195 +++++++++++++++ .../Vulkan/UploadBufferVulkan.h | 79 +++++++ 10 files changed, 427 insertions(+), 441 deletions(-) create mode 100644 Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp create mode 100644 Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index c132c7343..c68e7f262 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -1119,7 +1119,7 @@ void GPUContextDX12::UpdateCB(GPUConstantBuffer* cb, const void* data) return; // Allocate bytes for the buffer - DynamicAllocation allocation = _device->UploadBuffer->Allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); + auto allocation = _device->UploadBuffer.Allocate(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); // Copy data Platform::MemoryCopy(allocation.CPUAddress, data, allocation.Size); @@ -1343,7 +1343,7 @@ void GPUContextDX12::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 si SetResourceState(bufferDX12, D3D12_RESOURCE_STATE_COPY_DEST); flushRBs(); - _device->UploadBuffer->UploadBuffer(this, bufferDX12->GetResource(), offset, data, size); + _device->UploadBuffer.UploadBuffer(GetCommandList(), bufferDX12->GetResource(), offset, data, size); } void GPUContextDX12::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) @@ -1369,7 +1369,7 @@ void GPUContextDX12::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 SetResourceState(textureDX12, D3D12_RESOURCE_STATE_COPY_DEST); flushRBs(); - _device->UploadBuffer->UploadTexture(this, textureDX12->GetResource(), data, rowPitch, slicePitch, mipIndex, arrayIndex); + _device->UploadBuffer.UploadTexture(GetCommandList(), textureDX12->GetResource(), data, rowPitch, slicePitch, mipIndex, arrayIndex); } void GPUContextDX12::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource) @@ -1424,7 +1424,7 @@ void GPUContextDX12::ResetCounter(GPUBuffer* buffer) flushRBs(); uint32 value = 0; - _device->UploadBuffer->UploadBuffer(this, counter->GetResource(), 0, &value, 4); + _device->UploadBuffer.UploadBuffer(GetCommandList(), counter->GetResource(), 0, &value, 4); SetResourceState(counter, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index e176d199c..e140da37e 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -244,7 +244,7 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter) , _rootSignature(nullptr) , _commandQueue(nullptr) , _mainContext(nullptr) - , UploadBuffer(nullptr) + , UploadBuffer(this) , TimestampQueryHeap(this, D3D12_QUERY_HEAP_TYPE_TIMESTAMP, DX12_BACK_BUFFER_COUNT * 1024) , Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false) , Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false) @@ -701,9 +701,6 @@ bool GPUDeviceDX12::Init() VALIDATE_DIRECTX_CALL(_device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&_rootSignature))); } - // Upload buffer - UploadBuffer = New(this); - if (TimestampQueryHeap.Init()) return true; @@ -740,7 +737,7 @@ void GPUDeviceDX12::DrawBegin() GPUDeviceDX::DrawBegin(); updateRes2Dispose(); - UploadBuffer->BeginGeneration(Engine::FrameCount); + UploadBuffer.BeginGeneration(Engine::FrameCount); } void GPUDeviceDX12::RenderEnd() @@ -811,7 +808,7 @@ void GPUDeviceDX12::Dispose() Heap_Sampler.ReleaseGPU(); RingHeap_CBV_SRV_UAV.ReleaseGPU(); RingHeap_Sampler.ReleaseGPU(); - SAFE_DELETE(UploadBuffer); + UploadBuffer.ReleaseGPU(); SAFE_DELETE(DrawIndirectCommandSignature); SAFE_DELETE(_mainContext); SAFE_DELETE(_commandQueue); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h index d2ddeb1a6..064ed9a01 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h @@ -8,6 +8,7 @@ #include "Engine/Graphics/GPUResource.h" #include "../IncludeDirectXHeaders.h" #include "ResourceOwnerDX12.h" +#include "UploadBufferDX12.h" #include "QueryHeapDX12.h" #include "DescriptorHeapDX12.h" @@ -26,7 +27,6 @@ class Engine; class WindowsWindow; class GPUContextDX12; class GPUSwapChainDX12; -class UploadBufferDX12; class CommandQueueDX12; class CommandSignatureDX12; @@ -70,11 +70,10 @@ public: ~GPUDeviceDX12(); public: - /// - /// Upload buffer for general purpose + /// Data uploading utility via pages. /// - UploadBufferDX12* UploadBuffer; + UploadBufferDX12 UploadBuffer; /// /// The timestamp queries heap. diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp index 8fdfd5ac3..a0ae79f51 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.cpp @@ -4,10 +4,57 @@ #include "UploadBufferDX12.h" #include "GPUTextureDX12.h" -#include "GPUContextDX12.h" #include "../RenderToolsDX.h" +#include "Engine/Graphics/GPUResource.h" #include "Engine/Profiler/ProfilerMemory.h" +/// +/// Single page for the upload buffer +/// +class UploadBufferPageDX12 : public GPUResourceBase, public ResourceOwnerDX12 +{ +public: + UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size); + +public: + /// + /// Last generation that has been using that page + /// + uint64 LastGen; + + /// + /// CPU memory address of the page + /// + void* CPUAddress; + + /// + /// GPU memory address of the page + /// + D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; + + /// + /// Page size in bytes + /// + uint64 Size; + +public: + // [GPUResourceDX12] + GPUResourceType GetResourceType() const final override + { + return GPUResourceType::Buffer; + } + + // [ResourceOwnerDX12] + GPUResource* AsGPUResource() const override + { + return (GPUResource*)this; + } + +protected: + // [GPUResourceDX12] + void OnReleaseGPU() final override; +}; + UploadBufferDX12::UploadBufferDX12(GPUDeviceDX12* device) : _device(device) , _currentPage(nullptr) @@ -16,24 +63,11 @@ UploadBufferDX12::UploadBufferDX12(GPUDeviceDX12* device) { } -UploadBufferDX12::~UploadBufferDX12() -{ - _freePages.Add(_usedPages); - for (auto page : _freePages) - { - page->ReleaseGPU(); - Delete(page); - } -} - -DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align) +UploadBufferDX12::Allocation UploadBufferDX12::Allocate(uint64 size, uint64 align) { const uint64 alignmentMask = align - 1; - ASSERT((alignmentMask & align) == 0); - - // Check if use default or bigger page - const bool useDefaultSize = size <= DX12_DEFAULT_UPLOAD_PAGE_SIZE; - const uint64 pageSize = useDefaultSize ? DX12_DEFAULT_UPLOAD_PAGE_SIZE : size; + ASSERT_LOW_LAYER((alignmentMask & align) == 0); + const uint64 pageSize = Math::Max(size, DX12_DEFAULT_UPLOAD_PAGE_SIZE); const uint64 alignedSize = Math::AlignUpWithMask(size, alignmentMask); // Align the allocation @@ -41,14 +75,26 @@ DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align) // Check if there is enough space for that chunk of the data in the current page if (_currentPage && _currentOffset + alignedSize > _currentPage->Size) - { _currentPage = nullptr; - } // Check if need to get new page if (_currentPage == nullptr) { - _currentPage = requestPage(pageSize); + // Try reusing existing page + for (int32 i = 0; i < _freePages.Count(); i++) + { + UploadBufferPageDX12* page = _freePages.Get()[i]; + if (page->Size == pageSize) + { + _freePages.RemoveAt(i); + _currentPage = page; + break; + } + } + if (_currentPage == nullptr) + _currentPage = New(_device, pageSize); + _usedPages.Add(_currentPage); + ASSERT_LOW_LAYER(_currentPage->GetResource()); _currentOffset = 0; } @@ -56,32 +102,27 @@ DynamicAllocation UploadBufferDX12::Allocate(uint64 size, uint64 align) _currentPage->LastGen = _currentGeneration; // Create allocation result - const DynamicAllocation result(static_cast(_currentPage->CPUAddress) + _currentOffset, _currentOffset, size, _currentPage->GPUAddress + _currentOffset, _currentPage, _currentGeneration); + const Allocation result { (byte*)_currentPage->CPUAddress + _currentOffset, _currentOffset, size, _currentPage->GPUAddress + _currentOffset, _currentPage->GetResource(), _currentGeneration }; - // Move in the page + // Move within a page _currentOffset += size; - ASSERT(_currentPage->GetResource()); return result; } -bool UploadBufferDX12::UploadBuffer(GPUContextDX12* context, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size) +void UploadBufferDX12::UploadBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size) { // Allocate data - const DynamicAllocation allocation = Allocate(size, 4); - if (allocation.IsInvalid()) - return true; + const auto allocation = Allocate(size, GPU_SHADER_DATA_ALIGNMENT); // Copy data - Platform::MemoryCopy(allocation.CPUAddress, data, static_cast(size)); + Platform::MemoryCopy(allocation.CPUAddress, data, size); // Copy buffer region - context->GetCommandList()->CopyBufferRegion(buffer, bufferOffset, allocation.Page->GetResource(), allocation.Offset, size); - - return false; + commandList->CopyBufferRegion(buffer, bufferOffset, allocation.Resource, allocation.Offset, size); } -bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex) +void UploadBufferDX12::UploadTexture(ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex) { D3D12_RESOURCE_DESC resourceDesc = texture->GetDesc(); const UINT subresourceIndex = RenderToolsDX::CalcSubresourceIndex(mipIndex, arrayIndex, resourceDesc.MipLevels); @@ -95,9 +136,7 @@ bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* te const uint64 sliceSizeAligned = numSlices * mipSizeAligned; // Allocate data - const DynamicAllocation allocation = Allocate(sliceSizeAligned, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); - if (allocation.Size != sliceSizeAligned) - return true; + const auto allocation = Allocate(sliceSizeAligned, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); byte* ptr = (byte*)srcData; ASSERT(srcSlicePitch <= sliceSizeAligned); @@ -128,15 +167,13 @@ bool UploadBufferDX12::UploadTexture(GPUContextDX12* context, ID3D12Resource* te // Source buffer copy location description D3D12_TEXTURE_COPY_LOCATION srcLocation; - srcLocation.pResource = allocation.Page->GetResource(); + srcLocation.pResource = allocation.Resource; srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; srcLocation.PlacedFootprint.Offset = allocation.Offset; srcLocation.PlacedFootprint.Footprint = footprint.Footprint; // Copy texture region - context->GetCommandList()->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); - - return false; + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); } void UploadBufferDX12::BeginGeneration(uint64 generation) @@ -170,41 +207,18 @@ void UploadBufferDX12::BeginGeneration(uint64 generation) _currentGeneration = generation; } -UploadBufferPageDX12* UploadBufferDX12::requestPage(uint64 size) +void UploadBufferDX12::ReleaseGPU() { - // Try to find valid page - int32 freePageIndex = -1; - for (int32 i = 0; i < _freePages.Count(); i++) + _freePages.Add(_usedPages); + for (auto page : _freePages) { - if (_freePages[i]->Size == size) - { - freePageIndex = i; - break; - } + page->ReleaseGPU(); + Delete(page); } - - // Check if create a new page - UploadBufferPageDX12* page; - if (freePageIndex == -1) - { - // Get a new page to use - page = New(_device, size); - } - else - { - // Remove from free pages - page = _freePages[freePageIndex]; - _freePages.RemoveAt(freePageIndex); - } - - // Mark page as used - _usedPages.Add(page); - - return page; } UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size) - : GPUResourceDX12(device, TEXT("Upload Buffer Page")) + : GPUResourceBase(device, TEXT("Upload Buffer Page")) , LastGen(0) , CPUAddress(nullptr) , GPUAddress(0) @@ -234,7 +248,7 @@ UploadBufferPageDX12::UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size) // Set state initResource(resource, D3D12_RESOURCE_STATE_GENERIC_READ, 1); - DX_SET_DEBUG_NAME(_resource, GPUResourceDX12::GetName()); + DX_SET_DEBUG_NAME(_resource, GetName()); _memoryUsage = size; PROFILE_MEM_INC(GraphicsCommands, _memoryUsage); GPUAddress = _resource->GetGPUVirtualAddress(); @@ -249,9 +263,7 @@ void UploadBufferPageDX12::OnReleaseGPU() // Unmap if (_resource && CPUAddress) - { _resource->Unmap(0, nullptr); - } GPUAddress = 0; CPUAddress = nullptr; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h index a2b18a7a5..a43b25a01 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/UploadBufferDX12.h @@ -2,11 +2,15 @@ #pragma once -#include "GPUDeviceDX12.h" +#include "Engine/Graphics/GPUDevice.h" #include "ResourceOwnerDX12.h" #if GRAPHICS_API_DIRECTX12 +class GPUDeviceDX12; +class UploadBufferPageDX12; + +// Upload buffer page size #define DX12_DEFAULT_UPLOAD_PAGE_SIZE (4 * 1014 * 1024) // 4 MB // Upload buffer generations timeout to dispose @@ -15,223 +19,93 @@ // Upload buffer pages that are not used for a few frames are disposed #define DX12_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT 60 -class GPUTextureDX12; - -/// -/// Single page for the upload buffer -/// -class UploadBufferPageDX12 : public GPUResourceDX12, public ResourceOwnerDX12 -{ -public: - - /// - /// Init - /// - /// Graphics Device - /// Page size - UploadBufferPageDX12(GPUDeviceDX12* device, uint64 size); - -public: - - /// - /// Last generation that has been using that page - /// - uint64 LastGen; - - /// - /// CPU memory address of the page - /// - void* CPUAddress; - - /// - /// GPU memory address of the page - /// - D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; - - /// - /// Page size in bytes - /// - uint64 Size; - -public: - - // [GPUResourceDX12] - GPUResourceType GetResourceType() const final override - { - return GPUResourceType::Buffer; - } - - // [ResourceOwnerDX12] - GPUResource* AsGPUResource() const override - { - return (GPUResource*)this; - } - -protected: - - // [GPUResourceDX12] - void OnReleaseGPU() final override; -}; - -/// -/// Upload buffer allocation -/// -struct DynamicAllocation -{ - /// - /// CPU memory address of the allocation start. - /// - void* CPUAddress; - - /// - /// Allocation offset in bytes (from the start of the heap buffer). - /// - uint64 Offset; - - /// - /// Allocation size in bytes - /// - uint64 Size; - - /// - /// GPU virtual memory address of the allocation start. - /// - D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; - - /// - /// Upload buffer page that owns that allocation - /// - UploadBufferPageDX12* Page; - - /// - /// Generation number of that allocation (generally allocation is invalid after one or two generations) - /// - uint64 Generation; - - /// - /// Init - /// - DynamicAllocation() - : CPUAddress(nullptr) - , Offset(0) - , Size(0) - , GPUAddress(0) - , Page(nullptr) - , Generation(0) - { - } - - /// - /// Init - /// - /// CPU memory address - /// Offset in byes - /// Size in byes - /// GPU memory address - /// Parent page - /// Generation - DynamicAllocation(void* address, uint64 offset, uint64 size, D3D12_GPU_VIRTUAL_ADDRESS gpuAddress, UploadBufferPageDX12* page, uint64 generation) - : CPUAddress(address) - , Offset(offset) - , Size(size) - , GPUAddress(gpuAddress) - , Page(page) - , Generation(generation) - { - } - - /// - /// Returns true if allocation is invalid. - /// - bool IsInvalid() const - { - return CPUAddress == nullptr || Size == 0 || Page == nullptr; - } -}; - /// /// Uploading data to GPU buffer utility /// class UploadBufferDX12 { -private: +public: + /// + /// Upload buffer allocation + /// + struct Allocation + { + /// + /// CPU memory address of the allocation start. + /// + void* CPUAddress; + /// + /// Allocation offset in bytes (from the start of the heap buffer). + /// + uint64 Offset; + + /// + /// Allocation size in bytes + /// + uint64 Size; + + /// + /// GPU virtual memory address of the allocation start. + /// + D3D12_GPU_VIRTUAL_ADDRESS GPUAddress; + + /// + /// Upload buffer page resource that owns that allocation + /// + ID3D12Resource* Resource; + + /// + /// Generation number of that allocation (generally allocation is invalid after one or two generations) + /// + uint64 Generation; + }; + +private: GPUDeviceDX12* _device; UploadBufferPageDX12* _currentPage; uint64 _currentOffset; uint64 _currentGeneration; - Array> _freePages; Array> _usedPages; public: - - /// - /// Init - /// - /// Graphics Device UploadBufferDX12(GPUDeviceDX12* device); - /// - /// Destructor - /// - ~UploadBufferDX12(); - public: - - /// - /// Gets the current generation number. - /// - FORCE_INLINE uint64 GetCurrentGeneration() const - { - return _currentGeneration; - } - -public: - /// /// Allocates memory for custom data in the buffer. /// /// Size of the data in bytes /// Data alignment in buffer in bytes /// Dynamic location - DynamicAllocation Allocate(uint64 size, uint64 align); + Allocation Allocate(uint64 size, uint64 align); /// /// Uploads data to the buffer. /// - /// GPU context to record upload command to it + /// GPU command list to record upload command to it /// Destination buffer /// Destination buffer offset in bytes. /// Data to allocate /// Size of the data in bytes - /// True if cannot upload data, otherwise false. - bool UploadBuffer(GPUContextDX12* context, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size); + void UploadBuffer(ID3D12GraphicsCommandList* commandList, ID3D12Resource* buffer, uint32 bufferOffset, const void* data, uint64 size); /// /// Uploads data to the texture. /// - /// GPU context to record upload command to it + /// GPU command list to record upload command to it /// Destination texture /// Data to allocate /// Source data row pitch value to upload. /// Source data slice pitch value to upload. /// Mip map to stream index /// Texture array index - /// True if cannot upload data, otherwise false. - bool UploadTexture(GPUContextDX12* context, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex); + void UploadTexture(ID3D12GraphicsCommandList* commandList, ID3D12Resource* texture, const void* srcData, uint32 srcRowPitch, uint32 srcSlicePitch, int32 mipIndex, int32 arrayIndex); public: - - /// - /// Begins new generation. - /// - /// The generation ID to begin. void BeginGeneration(uint64 generation); - -private: - - UploadBufferPageDX12* requestPage(uint64 size); + void ReleaseGPU(); }; #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 34ad63a66..45dccf467 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -1386,16 +1386,13 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 } else { - auto staging = _device->StagingManager.AcquireBuffer(size, GPUResourceUsage::StagingUpload); - staging->SetData(data, size); + auto allocation = _device->UploadBuffer.Upload(data, size, 4); VkBufferCopy region; region.size = size; - region.srcOffset = 0; + region.srcOffset = allocation.Offset; region.dstOffset = offset; - vkCmdCopyBuffer(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)staging)->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), 1, ®ion); - - _device->StagingManager.ReleaseBuffer(cmdBuffer, staging); + vkCmdCopyBuffer(cmdBuffer->GetHandle(), allocation.Buffer, ((GPUBufferVulkan*)buffer)->GetHandle(), 1, ®ion); } // Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch) @@ -1444,14 +1441,14 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3 AddImageBarrier(textureVulkan, mipIndex, arrayIndex, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); FlushBarriers(); - auto buffer = _device->StagingManager.AcquireBuffer(slicePitch, GPUResourceUsage::StagingUpload); - buffer->SetData(data, slicePitch); + auto allocation = _device->UploadBuffer.Upload(data, slicePitch, 512); // Setup buffer copy region int32 mipWidth, mipHeight, mipDepth; texture->GetMipSize(mipIndex, mipWidth, mipHeight, mipDepth); VkBufferImageCopy bufferCopyRegion; Platform::MemoryClear(&bufferCopyRegion, sizeof(bufferCopyRegion)); + bufferCopyRegion.bufferOffset = allocation.Offset; bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; bufferCopyRegion.imageSubresource.mipLevel = mipIndex; bufferCopyRegion.imageSubresource.baseArrayLayer = arrayIndex; @@ -1461,9 +1458,7 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3 bufferCopyRegion.imageExtent.depth = static_cast(mipDepth); // Copy mip level from staging buffer - vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion); - - _device->StagingManager.ReleaseBuffer(cmdBuffer, buffer); + vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), allocation.Buffer, textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion); } void GPUContextVulkan::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 9ff9ab77b..dfed9bbb8 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -972,133 +972,6 @@ void HelperResourcesVulkan::Dispose() } } -StagingManagerVulkan::StagingManagerVulkan(GPUDeviceVulkan* device) - : _device(device) -{ -} - -GPUBuffer* StagingManagerVulkan::AcquireBuffer(uint32 size, GPUResourceUsage usage) -{ - // Try reuse free buffer - { - ScopeLock lock(_locker); - - for (int32 i = 0; i < _freeBuffers.Count(); i++) - { - auto& freeBuffer = _freeBuffers[i]; - if (freeBuffer.Buffer->GetSize() == size && freeBuffer.Buffer->GetDescription().Usage == usage) - { - const auto buffer = freeBuffer.Buffer; - _freeBuffers.RemoveAt(i); - return buffer; - } - } - } - - // Allocate new buffer - auto buffer = _device->CreateBuffer(TEXT("Pooled Staging")); - if (buffer->Init(GPUBufferDescription::Buffer(size, GPUBufferFlags::None, PixelFormat::Unknown, nullptr, 0, usage))) - { - LOG(Warning, "Failed to create pooled staging buffer."); - return nullptr; - } - - // Cache buffer - { - ScopeLock lock(_locker); - - _allBuffers.Add(buffer); -#if !BUILD_RELEASE - _allBuffersAllocSize += size; - _allBuffersTotalSize += size; - _allBuffersPeekSize = Math::Max(_allBuffersTotalSize, _allBuffersPeekSize); -#endif - } - - return buffer; -} - -void StagingManagerVulkan::ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer) -{ - ScopeLock lock(_locker); - - if (cmdBuffer) - { - // Return to pending pool (need to wait until command buffer will be executed and buffer will be reusable) - auto& item = _pendingBuffers.AddOne(); - item.Buffer = buffer; - item.CmdBuffer = cmdBuffer; - item.FenceCounter = cmdBuffer->GetFenceSignaledCounter(); - } - else - { - // Return to pool - _freeBuffers.Add({ buffer, Engine::FrameCount }); - } - - // Clear reference - buffer = nullptr; -} - -void StagingManagerVulkan::ProcessPendingFree() -{ - ScopeLock lock(_locker); - - // Find staging buffers that has been processed by the GPU and can be reused - for (int32 i = _pendingBuffers.Count() - 1; i >= 0; i--) - { - auto& e = _pendingBuffers[i]; - if (e.FenceCounter < e.CmdBuffer->GetFenceSignaledCounter()) - { - // Return to pool - _freeBuffers.Add({ e.Buffer, Engine::FrameCount }); - _pendingBuffers.RemoveAt(i); - } - } - - // Free staging buffers that has not been used for a few frames - for (int32 i = _freeBuffers.Count() - 1; i >= 0; i--) - { - auto& e = _freeBuffers.Get()[i]; - if (e.FrameNumber + VULKAN_RESOURCE_DELETE_SAFE_FRAMES_COUNT < Engine::FrameCount) - { - auto buffer = e.Buffer; - - // Remove buffer from lists - _allBuffers.Remove(buffer); - _freeBuffers.RemoveAt(i); - -#if !BUILD_RELEASE - // Update stats - _allBuffersFreeSize += buffer->GetSize(); - _allBuffersTotalSize -= buffer->GetSize(); -#endif - - // Release memory - buffer->ReleaseGPU(); - Delete(buffer); - } - } -} - -void StagingManagerVulkan::Dispose() -{ - ScopeLock lock(_locker); - -#if BUILD_DEBUG - LOG(Info, "Vulkan staging buffers peek memory usage: {0}, allocs: {1}, frees: {2}", Utilities::BytesToText(_allBuffersPeekSize), Utilities::BytesToText(_allBuffersAllocSize), Utilities::BytesToText(_allBuffersFreeSize)); -#endif - - // Release buffers and clear memory - for (auto buffer : _allBuffers) - { - buffer->ReleaseGPU(); - Delete(buffer); - } - _allBuffers.Resize(0); - _pendingBuffers.Resize(0); -} - GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan* adapter) : GPUDevice(RendererType::Vulkan, shaderProfile) , _renderPasses(512) @@ -1106,7 +979,7 @@ GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan* , _layouts(4096) , Adapter(adapter) , DeferredDeletionQueue(this) - , StagingManager(this) + , UploadBuffer(this) , HelperResources(this) { } @@ -2088,8 +1961,8 @@ void GPUDeviceVulkan::DrawBegin() // Flush resources DeferredDeletionQueue.ReleaseResources(); - StagingManager.ProcessPendingFree(); DescriptorPoolsManager->GC(); + UploadBuffer.BeginGeneration(Engine::FrameCount); #if VULKAN_USE_PIPELINE_CACHE // Serialize pipeline cache periodically for less PSO hitches on next app run @@ -2125,7 +1998,7 @@ void GPUDeviceVulkan::Dispose() _renderPasses.ClearDelete(); _layouts.ClearDelete(); HelperResources.Dispose(); - StagingManager.Dispose(); + UploadBuffer.Dispose(); TimestampQueryPools.ClearDelete(); OcclusionQueryPools.ClearDelete(); SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h index a30dbda1c..ae265100c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h @@ -7,6 +7,7 @@ #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUResource.h" #include "DescriptorSetVulkan.h" +#include "UploadBufferVulkan.h" #include "IncludeVulkanHeaders.h" #include "Config.h" @@ -326,45 +327,6 @@ public: void Dispose(); }; -/// -/// Vulkan staging buffers manager. -/// -class StagingManagerVulkan -{ -private: - struct PendingEntry - { - GPUBuffer* Buffer; - CmdBufferVulkan* CmdBuffer; - uint64 FenceCounter; - }; - - struct FreeEntry - { - GPUBuffer* Buffer; - uint64 FrameNumber; - }; - - GPUDeviceVulkan* _device; - CriticalSection _locker; - Array _allBuffers; - Array _freeBuffers; - Array _pendingBuffers; -#if !BUILD_RELEASE - uint64 _allBuffersTotalSize = 0; - uint64 _allBuffersPeekSize = 0; - uint64 _allBuffersAllocSize = 0; - uint64 _allBuffersFreeSize = 0; -#endif - -public: - StagingManagerVulkan(GPUDeviceVulkan* device); - GPUBuffer* AcquireBuffer(uint32 size, GPUResourceUsage usage); - void ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer); - void ProcessPendingFree(); - void Dispose(); -}; - /// /// Implementation of Graphics Device for Vulkan backend. /// @@ -464,9 +426,9 @@ public: DeferredDeletionQueueVulkan DeferredDeletionQueue; /// - /// The staging buffers manager. + /// Data uploading utility via pages. /// - StagingManagerVulkan StagingManager; + UploadBufferVulkan UploadBuffer; /// /// The helper device resources manager. diff --git a/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp new file mode 100644 index 000000000..15b58a8b4 --- /dev/null +++ b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.cpp @@ -0,0 +1,195 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#if GRAPHICS_API_VULKAN + +#include "UploadBufferVulkan.h" +#include "GPUDeviceVulkan.h" +#include "RenderToolsVulkan.h" +#include "Engine/Graphics/GPUResource.h" +#include "Engine/Profiler/ProfilerMemory.h" + +/// +/// Single page for the upload buffer +/// +class UploadBufferPageVulkan : public GPUResourceBase, public ResourceOwnerVulkan +{ +public: + UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size); + +public: + /// + /// Last generation that has been using that page + /// + uint64 LastGen; + + /// + /// Page size in bytes + /// + uint64 Size; + + /// + /// CPU memory address of the page + /// + void* Mapped; + + /// + /// Buffer that stored the page data + /// + VkBuffer Buffer; + + /// + /// Buffer memory allocation + /// + VmaAllocation Allocation; + +public: + // [GPUResourceVulkan] + GPUResourceType GetResourceType() const final override + { + return GPUResourceType::Buffer; + } + + // [ResourceOwnerVulkan] + GPUResource* AsGPUResource() const override + { + return (GPUResource*)this; + } + +protected: + // [GPUResourceVulkan] + void OnReleaseGPU() final override; +}; + +UploadBufferVulkan::UploadBufferVulkan(GPUDeviceVulkan* device) + : _device(device) + , _currentPage(nullptr) + , _currentOffset(0) + , _currentGeneration(0) +{ +} + +UploadBufferVulkan::Allocation UploadBufferVulkan::Allocate(uint64 size, uint64 align) +{ + const uint64 alignmentMask = align - 1; + ASSERT_LOW_LAYER((alignmentMask & align) == 0); + const uint64 pageSize = Math::Max(size, VULKAN_DEFAULT_UPLOAD_PAGE_SIZE); + const uint64 alignedSize = Math::AlignUpWithMask(size, alignmentMask); + + // Align the allocation + _currentOffset = Math::AlignUpWithMask(_currentOffset, alignmentMask); + + // Check if there is enough space for that chunk of the data in the current page + if (_currentPage && _currentOffset + alignedSize > _currentPage->Size) + _currentPage = nullptr; + + // Check if need to get new page + if (_currentPage == nullptr) + { + // Try reusing existing page + for (int32 i = 0; i < _freePages.Count(); i++) + { + UploadBufferPageVulkan* page = _freePages.Get()[i]; + if (page->Size == pageSize) + { + _freePages.RemoveAt(i); + _currentPage = page; + break; + } + } + if (_currentPage == nullptr) + _currentPage = New(_device, pageSize); + _usedPages.Add(_currentPage); + ASSERT_LOW_LAYER(_currentPage->Buffer); + _currentOffset = 0; + } + + // Mark page as used in this generation + _currentPage->LastGen = _currentGeneration; + + // Create allocation result + const Allocation result{ (byte*)_currentPage->Mapped + _currentOffset, _currentOffset, size, _currentPage->Buffer, _currentGeneration }; + + // Move within a page + _currentOffset += size; + + return result; +} + +UploadBufferVulkan::Allocation UploadBufferVulkan::Upload(const void* data, uint64 size, uint64 align) +{ + auto allocation = Allocate(size, align); + Platform::MemoryCopy(allocation.Mapped, data, size); + return allocation; +} + +void UploadBufferVulkan::BeginGeneration(uint64 generation) +{ + // Restore ready pages to be reused + for (int32 i = 0; _usedPages.HasItems() && i < _usedPages.Count(); i++) + { + auto page = _usedPages[i]; + if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT < generation) + { + _usedPages.RemoveAt(i); + i--; + _freePages.Add(page); + } + } + + // Remove old pages + for (int32 i = _freePages.Count() - 1; i >= 0 && _freePages.HasItems(); i--) + { + auto page = _freePages[i]; + if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT + VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT < generation) + { + _freePages.RemoveAt(i); + i--; + page->ReleaseGPU(); + Delete(page); + } + } + + // Set new generation + _currentGeneration = generation; +} + +void UploadBufferVulkan::Dispose() +{ + _freePages.Add(_usedPages); + for (auto page : _freePages) + { + page->ReleaseGPU(); + Delete(page); + } +} + +UploadBufferPageVulkan::UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size) + : GPUResourceBase(device, TEXT("Upload Buffer Page")) + , LastGen(0) + , Size(size) +{ + VkBufferCreateInfo bufferInfo; + RenderToolsVulkan::ZeroStruct(bufferInfo, VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + bufferInfo.size = size; + bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + VmaAllocationInfo allocInfo; + vmaCreateBuffer(_device->Allocator, &bufferInfo, &allocCreateInfo, &Buffer, &Allocation, &allocInfo); + Mapped = allocInfo.pMappedData; + ASSERT_LOW_LAYER(Mapped); + _memoryUsage = size; + PROFILE_MEM_INC(GraphicsCommands, _memoryUsage); +} + +void UploadBufferPageVulkan::OnReleaseGPU() +{ + PROFILE_MEM_DEC(GraphicsCommands, _memoryUsage); + vmaDestroyBuffer(_device->Allocator, Buffer, Allocation); + Buffer = VK_NULL_HANDLE; + Allocation = VK_NULL_HANDLE; + Mapped = nullptr; +} + +#endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h new file mode 100644 index 000000000..bf13340d1 --- /dev/null +++ b/Source/Engine/GraphicsDevice/Vulkan/UploadBufferVulkan.h @@ -0,0 +1,79 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Graphics/GPUDevice.h" +#include "ResourceOwnerVulkan.h" + +#if GRAPHICS_API_VULKAN + +class GPUDeviceVulkan; +class UploadBufferPageVulkan; + +// Upload buffer page size +#define VULKAN_DEFAULT_UPLOAD_PAGE_SIZE (4 * 1014 * 1024) // 4 MB + +// Upload buffer generations timeout to dispose +#define VULKAN_UPLOAD_PAGE_GEN_TIMEOUT 3 + +// Upload buffer pages that are not used for a few frames are disposed +#define VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT 60 + +/// +/// Uploading data to GPU buffer utility +/// +class UploadBufferVulkan +{ +public: + /// + /// Upload buffer allocation + /// + struct Allocation + { + /// + /// CPU memory address of the allocation start. + /// + void* Mapped; + + /// + /// Allocation offset in bytes (from the start of the heap buffer). + /// + uint64 Offset; + + /// + /// Allocation size in bytes + /// + uint64 Size; + + /// + /// Upload buffer page resource that owns that allocation + /// + VkBuffer Buffer; + + /// + /// Generation number of that allocation (generally allocation is invalid after one or two generations) + /// + uint64 Generation; + }; + +private: + GPUDeviceVulkan* _device; + UploadBufferPageVulkan* _currentPage; + uint64 _currentOffset; + uint64 _currentGeneration; + Array> _freePages; + Array> _usedPages; + +public: + UploadBufferVulkan(GPUDeviceVulkan* device); + +public: + Allocation Allocate(uint64 size, uint64 align); + Allocation Upload(const void* data, uint64 size, uint64 align); + +public: + void BeginGeneration(uint64 generation); + void Dispose(); +}; + +#endif