From 9d95bbaa8cdfc1a6344fefbdfb9c774b1aa5ac55 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 24 Feb 2026 23:18:07 +0100 Subject: [PATCH] Add GPU data upload allocator with shared page pool --- .../WebGPU/GPUContextWebGPU.cpp | 24 +++- .../GraphicsDevice/WebGPU/GPUContextWebGPU.h | 1 + .../GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp | 104 +++++++++++++++--- .../GraphicsDevice/WebGPU/GPUDeviceWebGPU.h | 35 ++++++ .../GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp | 18 +-- .../GraphicsDevice/WebGPU/GPUShaderWebGPU.h | 9 +- 6 files changed, 147 insertions(+), 44 deletions(-) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp index 48f5c0fc6..2ec86e47a 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp @@ -41,6 +41,7 @@ GPUContextWebGPU::GPUContextWebGPU(GPUDeviceWebGPU* device) , _device(device) { _vertexBufferNullLayout = WGPU_VERTEX_BUFFER_LAYOUT_INIT; + _minUniformBufferOffsetAlignment = device->MinUniformBufferOffsetAlignment; } GPUContextWebGPU::~GPUContextWebGPU() @@ -309,7 +310,12 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data) const uint32 size = cbWebGPU->GetSize(); if (size != 0) { - wgpuQueueWriteBuffer(_device->Queue, cbWebGPU->Buffer, 0, data, size); + // Allocate a chunk of memory in a shared page allocator + auto allocation = _device->DataUploader.Allocate(size, _minUniformBufferOffsetAlignment, WGPUBufferUsage_Uniform); + cbWebGPU->Allocation = allocation; + // TODO: consider holding CPU-side staging buffer and copying data to the GPU buffer in a single batch for all uniforms (before flushing the active command encoder) + wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size); + _bindGroupDirty = true; } } @@ -461,9 +467,21 @@ void GPUContextWebGPU::Flush() void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 size, uint32 offset) { ASSERT(data); - ASSERT(buffer && buffer->GetSize() >= size); + ASSERT(buffer && buffer->GetSize() >= size + offset); auto bufferWebGPU = (GPUBufferWebGPU*)buffer; - wgpuQueueWriteBuffer(_device->Queue, bufferWebGPU->Buffer, offset, data, size); + if (bufferWebGPU->IsDynamic()) + { + // Synchronous upload via shared buffer + // TODO: test using map/unmap sequence + auto allocation = _device->DataUploader.Allocate(size - offset); + wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size); + wgpuCommandEncoderCopyBufferToBuffer(Encoder, allocation.Buffer, allocation.Offset, bufferWebGPU->Buffer, offset, size); + } + else + { + // Efficient upload via queue + wgpuQueueWriteBuffer(_device->Queue, bufferWebGPU->Buffer, offset, data, size); + } } void GPUContextWebGPU::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h index ae7093627..a9e8be54f 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h @@ -43,6 +43,7 @@ private: GPUDeviceWebGPU* _device; WGPUVertexBufferLayout _vertexBufferNullLayout; + uint32 _minUniformBufferOffsetAlignment; // State tracking int32 _renderPassDirty : 1; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp index 772335a66..27e0e9e59 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp @@ -19,6 +19,7 @@ #include "Engine/Core/Collections/Sorting.h" #endif #include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Engine/Engine.h" #include "Engine/Profiler/ProfilerMemory.h" #include @@ -44,6 +45,81 @@ GPUVertexLayoutWebGPU::GPUVertexLayoutWebGPU(GPUDeviceWebGPU* device, const Elem } } +GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, uint32 alignment, WGPUBufferUsage usage) +{ + // Find a free buffer from the current frame + for (auto& e : _entries) + { + uint32 alignedOffset = Math::AlignUp(e.ActiveOffset, alignment); + if (e.ActiveFrame == _frame && (usage ? (e.Usage & usage) == usage : e.Usage == WGPUBufferUsage_CopyDst) && alignedOffset + size <= e.Size) + { + e.ActiveOffset = alignedOffset + size; + return { e.Buffer, alignedOffset }; + } + } + + // Find an unused buffer from the old frames + for (auto& e : _entries) + { + if (e.ActiveFrame < _frame - 3 && (e.Usage & usage) == usage && size <= e.Size) + { + e.ActiveOffset = size; + e.ActiveFrame = _frame; + return { e.Buffer, 0 }; + } + } + + // Allocate a new buffer + { + WGPUBufferDescriptor desc = WGPU_BUFFER_DESCRIPTOR_INIT; +#if GPU_ENABLE_RESOURCE_NAMING + if (usage & WGPUBufferUsage_Uniform) + desc.label = WEBGPU_STR("Upload Uniforms"); + else + desc.label = WEBGPU_STR("Upload Buffer"); +#endif + desc.size = Math::Max(16 * 1024, Math::RoundUpToPowerOf2(size)); // Allocate larger pages for good suballocations + desc.usage = WGPUBufferUsage_CopyDst | usage; + WGPUBuffer buffer = wgpuDeviceCreateBuffer(_device, &desc); + if (buffer == nullptr) + { + LOG(Error, "Failed to create buffer of size {} bytes", size); + return { nullptr, 0 }; + } + _entries.Insert(0, { buffer, (uint32)desc.size, size, _frame, desc.usage }); + PROFILE_MEM_INC(GraphicsBuffers, desc.usage); + return { buffer, 0 }; + } +} + +void GPUDataUploaderWebGPU::DrawBegin() +{ + // Free old buffers and recycle unused ones + uint64 frame = Engine::FrameCount; + for (int32 i = _entries.Count() - 1; i >= 0; i--) + { + auto& e = _entries[i]; + if (frame - e.ActiveFrame > 100) + { + wgpuBufferRelease(e.Buffer); + PROFILE_MEM_DEC(GraphicsBuffers, e.Size); + _entries.RemoveAt(i); + } + } + _frame = frame; +} + +void GPUDataUploaderWebGPU::ReleaseGPU() +{ + // Free data + for (auto& e : _entries) + { + wgpuBufferRelease(e.Buffer); + PROFILE_MEM_DEC(GraphicsBuffers, e.Size); + } + _entries.Clear(); +} + GPUDeviceWebGPU::GPUDeviceWebGPU(WGPUInstance instance, GPUAdapterWebGPU* adapter) : GPUDevice(RendererType::WebGPU, ShaderProfile::WebGPU) , Adapter(adapter) @@ -105,6 +181,7 @@ bool GPUDeviceWebGPU::Init() WGPULimits limits = WGPU_LIMITS_INIT; if (wgpuAdapterGetLimits(Adapter->Adapter, &limits) == WGPUStatus_Success) { + MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment; Limits.HasDepthClip = features.Contains(WGPUFeatureName_DepthClipControl); Limits.HasReadOnlyDepth = true; Limits.MaximumTexture1DSize = Math::Min(GPU_MAX_TEXTURE_SIZE, limits.maxTextureDimension1D); @@ -400,6 +477,7 @@ bool GPUDeviceWebGPU::Init() #undef INIT_SAMPLER // Setup commands processing + DataUploader._device = Device; Queue = wgpuDeviceGetQueue(Device); _mainContext = New(this); @@ -407,6 +485,13 @@ bool GPUDeviceWebGPU::Init() return GPUDevice::Init(); } +void GPUDeviceWebGPU::DrawBegin() +{ + GPUDevice::DrawBegin(); + + DataUploader.DrawBegin(); +} + GPUDeviceWebGPU::~GPUDeviceWebGPU() { // Ensure to be disposed @@ -479,6 +564,7 @@ void GPUDeviceWebGPU::Dispose() preDispose(); // Clear device resources + DataUploader.ReleaseGPU(); SAFE_DELETE_GPU_RESOURCES(DefaultSamplers); SAFE_DELETE(_mainContext); SAFE_DELETE(Adapter); @@ -556,23 +642,7 @@ GPUSwapChain* GPUDeviceWebGPU::CreateSwapChain(Window* window) GPUConstantBuffer* GPUDeviceWebGPU::CreateConstantBuffer(uint32 size, const StringView& name) { PROFILE_MEM(GraphicsShaders); - WGPUBuffer buffer = nullptr; - if (size) - { - WGPUBufferDescriptor desc = WGPU_BUFFER_DESCRIPTOR_INIT; -#if GPU_ENABLE_RESOURCE_NAMING - desc.label = WEBGPU_STR("Uniform"); -#endif - desc.size = size; - desc.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_Uniform; - buffer = wgpuDeviceCreateBuffer(Device, &desc); - if (buffer == nullptr) - { - LOG(Error, "Failed to create uniform buffer '{}' of size {} bytes", name, size); - return nullptr; - } - } - return New(this, size, buffer, name); + return New(this, size, name); } #endif diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h index 325d0166c..8731388b0 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h @@ -12,6 +12,38 @@ class GPUContextWebGPU; class GPUAdapterWebGPU; class GPUSamplerWebGPU; +/// +/// Pool for uploading data to GPU buffers. It manages large buffers and suballocates for multiple small updates, minimizing the number of buffer creations and copies. +/// +class GPUDataUploaderWebGPU +{ + friend class GPUDeviceWebGPU; +private: + struct Entry + { + WGPUBuffer Buffer; + uint32 Size; + uint32 ActiveOffset; + uint64 ActiveFrame; + WGPUBufferUsage Usage; + }; + + uint64 _frame = 0; + WGPUDevice _device; + Array _entries; + +public: + struct Allocation + { + WGPUBuffer Buffer = nullptr; + uint32 Offset = 0; + }; + + Allocation Allocate(uint32 size, uint32 alignment = 16, WGPUBufferUsage usage = 0); + void DrawBegin(); + void ReleaseGPU(); +}; + /// /// Implementation of Graphics Device for Web GPU backend. /// @@ -30,6 +62,8 @@ public: WGPUDevice Device = nullptr; WGPUQueue Queue = nullptr; GPUSamplerWebGPU* DefaultSamplers[6] = {}; + GPUDataUploaderWebGPU DataUploader; + uint32 MinUniformBufferOffsetAlignment = 1; public: // [GPUDeviceDX] @@ -46,6 +80,7 @@ public: return Device; } bool Init() override; + void DrawBegin() override; void Dispose() override; void WaitForGPU() override; bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp index 0de1478d3..66f537538 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp @@ -10,26 +10,10 @@ #include "Engine/GraphicsDevice/Vulkan/Types.h" #include "Engine/Serialization/MemoryReadStream.h" -GPUConstantBufferWebGPU::GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, WGPUBuffer buffer, const StringView& name) noexcept +GPUConstantBufferWebGPU::GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, const StringView& name) noexcept : GPUResourceWebGPU(device, name) { _size = _memoryUsage = size; - Buffer = buffer; -} - -GPUConstantBufferWebGPU::~GPUConstantBufferWebGPU() -{ - if (Buffer) - wgpuBufferRelease(Buffer); -} - -void GPUConstantBufferWebGPU::OnReleaseGPU() -{ - if (Buffer) - { - wgpuBufferRelease(Buffer); - Buffer = nullptr; - } } GPUShaderProgram* GPUShaderWebGPU::CreateGPUShaderProgram(ShaderStage type, const GPUShaderProgramInitializer& initializer, Span bytecode, MemoryReadStream& stream) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.h index c50cba515..62aa5b925 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.h @@ -14,15 +14,10 @@ class GPUConstantBufferWebGPU : public GPUResourceWebGPU { public: - GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, WGPUBuffer buffer, const StringView& name) noexcept; - ~GPUConstantBufferWebGPU(); + GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, const StringView& name) noexcept; public: - WGPUBuffer Buffer; - -public: - // [GPUResourceWebGPU] - void OnReleaseGPU() final override; + GPUDataUploaderWebGPU::Allocation Allocation; }; ///