Add GPU data upload allocator with shared page pool

This commit is contained in:
Wojtek Figat
2026-02-24 23:18:07 +01:00
parent e41ec4ebfd
commit 9d95bbaa8c
6 changed files with 147 additions and 44 deletions

View File

@@ -41,6 +41,7 @@ GPUContextWebGPU::GPUContextWebGPU(GPUDeviceWebGPU* device)
, _device(device)
{
_vertexBufferNullLayout = WGPU_VERTEX_BUFFER_LAYOUT_INIT;
_minUniformBufferOffsetAlignment = device->MinUniformBufferOffsetAlignment;
}
GPUContextWebGPU::~GPUContextWebGPU()
@@ -309,7 +310,12 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data)
const uint32 size = cbWebGPU->GetSize();
if (size != 0)
{
wgpuQueueWriteBuffer(_device->Queue, cbWebGPU->Buffer, 0, data, size);
// Allocate a chunk of memory in a shared page allocator
auto allocation = _device->DataUploader.Allocate(size, _minUniformBufferOffsetAlignment, WGPUBufferUsage_Uniform);
cbWebGPU->Allocation = allocation;
// TODO: consider holding CPU-side staging buffer and copying data to the GPU buffer in a single batch for all uniforms (before flushing the active command encoder)
wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size);
_bindGroupDirty = true;
}
}
@@ -461,9 +467,21 @@ void GPUContextWebGPU::Flush()
void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 size, uint32 offset)
{
ASSERT(data);
ASSERT(buffer && buffer->GetSize() >= size);
ASSERT(buffer && buffer->GetSize() >= size + offset);
auto bufferWebGPU = (GPUBufferWebGPU*)buffer;
wgpuQueueWriteBuffer(_device->Queue, bufferWebGPU->Buffer, offset, data, size);
if (bufferWebGPU->IsDynamic())
{
// Synchronous upload via shared buffer
// TODO: test using map/unmap sequence
auto allocation = _device->DataUploader.Allocate(size - offset);
wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size);
wgpuCommandEncoderCopyBufferToBuffer(Encoder, allocation.Buffer, allocation.Offset, bufferWebGPU->Buffer, offset, size);
}
else
{
// Efficient upload via queue
wgpuQueueWriteBuffer(_device->Queue, bufferWebGPU->Buffer, offset, data, size);
}
}
void GPUContextWebGPU::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset)

View File

@@ -43,6 +43,7 @@ private:
GPUDeviceWebGPU* _device;
WGPUVertexBufferLayout _vertexBufferNullLayout;
uint32 _minUniformBufferOffsetAlignment;
// State tracking
int32 _renderPassDirty : 1;

View File

@@ -19,6 +19,7 @@
#include "Engine/Core/Collections/Sorting.h"
#endif
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Profiler/ProfilerMemory.h"
#include <emscripten/emscripten.h>
@@ -44,6 +45,81 @@ GPUVertexLayoutWebGPU::GPUVertexLayoutWebGPU(GPUDeviceWebGPU* device, const Elem
}
}
GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, uint32 alignment, WGPUBufferUsage usage)
{
// Find a free buffer from the current frame
for (auto& e : _entries)
{
uint32 alignedOffset = Math::AlignUp(e.ActiveOffset, alignment);
if (e.ActiveFrame == _frame && (usage ? (e.Usage & usage) == usage : e.Usage == WGPUBufferUsage_CopyDst) && alignedOffset + size <= e.Size)
{
e.ActiveOffset = alignedOffset + size;
return { e.Buffer, alignedOffset };
}
}
// Find an unused buffer from the old frames
for (auto& e : _entries)
{
if (e.ActiveFrame < _frame - 3 && (e.Usage & usage) == usage && size <= e.Size)
{
e.ActiveOffset = size;
e.ActiveFrame = _frame;
return { e.Buffer, 0 };
}
}
// Allocate a new buffer
{
WGPUBufferDescriptor desc = WGPU_BUFFER_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
if (usage & WGPUBufferUsage_Uniform)
desc.label = WEBGPU_STR("Upload Uniforms");
else
desc.label = WEBGPU_STR("Upload Buffer");
#endif
desc.size = Math::Max<uint32>(16 * 1024, Math::RoundUpToPowerOf2(size)); // Allocate larger pages for good suballocations
desc.usage = WGPUBufferUsage_CopyDst | usage;
WGPUBuffer buffer = wgpuDeviceCreateBuffer(_device, &desc);
if (buffer == nullptr)
{
LOG(Error, "Failed to create buffer of size {} bytes", size);
return { nullptr, 0 };
}
_entries.Insert(0, { buffer, (uint32)desc.size, size, _frame, desc.usage });
PROFILE_MEM_INC(GraphicsBuffers, desc.usage);
return { buffer, 0 };
}
}
void GPUDataUploaderWebGPU::DrawBegin()
{
// Free old buffers and recycle unused ones
uint64 frame = Engine::FrameCount;
for (int32 i = _entries.Count() - 1; i >= 0; i--)
{
auto& e = _entries[i];
if (frame - e.ActiveFrame > 100)
{
wgpuBufferRelease(e.Buffer);
PROFILE_MEM_DEC(GraphicsBuffers, e.Size);
_entries.RemoveAt(i);
}
}
_frame = frame;
}
void GPUDataUploaderWebGPU::ReleaseGPU()
{
// Free data
for (auto& e : _entries)
{
wgpuBufferRelease(e.Buffer);
PROFILE_MEM_DEC(GraphicsBuffers, e.Size);
}
_entries.Clear();
}
GPUDeviceWebGPU::GPUDeviceWebGPU(WGPUInstance instance, GPUAdapterWebGPU* adapter)
: GPUDevice(RendererType::WebGPU, ShaderProfile::WebGPU)
, Adapter(adapter)
@@ -105,6 +181,7 @@ bool GPUDeviceWebGPU::Init()
WGPULimits limits = WGPU_LIMITS_INIT;
if (wgpuAdapterGetLimits(Adapter->Adapter, &limits) == WGPUStatus_Success)
{
MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment;
Limits.HasDepthClip = features.Contains(WGPUFeatureName_DepthClipControl);
Limits.HasReadOnlyDepth = true;
Limits.MaximumTexture1DSize = Math::Min<int32>(GPU_MAX_TEXTURE_SIZE, limits.maxTextureDimension1D);
@@ -400,6 +477,7 @@ bool GPUDeviceWebGPU::Init()
#undef INIT_SAMPLER
// Setup commands processing
DataUploader._device = Device;
Queue = wgpuDeviceGetQueue(Device);
_mainContext = New<GPUContextWebGPU>(this);
@@ -407,6 +485,13 @@ bool GPUDeviceWebGPU::Init()
return GPUDevice::Init();
}
void GPUDeviceWebGPU::DrawBegin()
{
GPUDevice::DrawBegin();
DataUploader.DrawBegin();
}
GPUDeviceWebGPU::~GPUDeviceWebGPU()
{
// Ensure to be disposed
@@ -479,6 +564,7 @@ void GPUDeviceWebGPU::Dispose()
preDispose();
// Clear device resources
DataUploader.ReleaseGPU();
SAFE_DELETE_GPU_RESOURCES(DefaultSamplers);
SAFE_DELETE(_mainContext);
SAFE_DELETE(Adapter);
@@ -556,23 +642,7 @@ GPUSwapChain* GPUDeviceWebGPU::CreateSwapChain(Window* window)
GPUConstantBuffer* GPUDeviceWebGPU::CreateConstantBuffer(uint32 size, const StringView& name)
{
PROFILE_MEM(GraphicsShaders);
WGPUBuffer buffer = nullptr;
if (size)
{
WGPUBufferDescriptor desc = WGPU_BUFFER_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
desc.label = WEBGPU_STR("Uniform");
#endif
desc.size = size;
desc.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_Uniform;
buffer = wgpuDeviceCreateBuffer(Device, &desc);
if (buffer == nullptr)
{
LOG(Error, "Failed to create uniform buffer '{}' of size {} bytes", name, size);
return nullptr;
}
}
return New<GPUConstantBufferWebGPU>(this, size, buffer, name);
return New<GPUConstantBufferWebGPU>(this, size, name);
}
#endif

View File

@@ -12,6 +12,38 @@ class GPUContextWebGPU;
class GPUAdapterWebGPU;
class GPUSamplerWebGPU;
/// <summary>
/// Pool for uploading data to GPU buffers. It manages large buffers and suballocates for multiple small updates, minimizing the number of buffer creations and copies.
/// </summary>
class GPUDataUploaderWebGPU
{
friend class GPUDeviceWebGPU;
private:
struct Entry
{
WGPUBuffer Buffer;
uint32 Size;
uint32 ActiveOffset;
uint64 ActiveFrame;
WGPUBufferUsage Usage;
};
uint64 _frame = 0;
WGPUDevice _device;
Array<Entry> _entries;
public:
struct Allocation
{
WGPUBuffer Buffer = nullptr;
uint32 Offset = 0;
};
Allocation Allocate(uint32 size, uint32 alignment = 16, WGPUBufferUsage usage = 0);
void DrawBegin();
void ReleaseGPU();
};
/// <summary>
/// Implementation of Graphics Device for Web GPU backend.
/// </summary>
@@ -30,6 +62,8 @@ public:
WGPUDevice Device = nullptr;
WGPUQueue Queue = nullptr;
GPUSamplerWebGPU* DefaultSamplers[6] = {};
GPUDataUploaderWebGPU DataUploader;
uint32 MinUniformBufferOffsetAlignment = 1;
public:
// [GPUDeviceDX]
@@ -46,6 +80,7 @@ public:
return Device;
}
bool Init() override;
void DrawBegin() override;
void Dispose() override;
void WaitForGPU() override;
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;

View File

@@ -10,26 +10,10 @@
#include "Engine/GraphicsDevice/Vulkan/Types.h"
#include "Engine/Serialization/MemoryReadStream.h"
GPUConstantBufferWebGPU::GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, WGPUBuffer buffer, const StringView& name) noexcept
GPUConstantBufferWebGPU::GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, const StringView& name) noexcept
: GPUResourceWebGPU(device, name)
{
_size = _memoryUsage = size;
Buffer = buffer;
}
GPUConstantBufferWebGPU::~GPUConstantBufferWebGPU()
{
if (Buffer)
wgpuBufferRelease(Buffer);
}
void GPUConstantBufferWebGPU::OnReleaseGPU()
{
if (Buffer)
{
wgpuBufferRelease(Buffer);
Buffer = nullptr;
}
}
GPUShaderProgram* GPUShaderWebGPU::CreateGPUShaderProgram(ShaderStage type, const GPUShaderProgramInitializer& initializer, Span<byte> bytecode, MemoryReadStream& stream)

View File

@@ -14,15 +14,10 @@
class GPUConstantBufferWebGPU : public GPUResourceWebGPU<GPUConstantBuffer>
{
public:
GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, WGPUBuffer buffer, const StringView& name) noexcept;
~GPUConstantBufferWebGPU();
GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, const StringView& name) noexcept;
public:
WGPUBuffer Buffer;
public:
// [GPUResourceWebGPU]
void OnReleaseGPU() final override;
GPUDataUploaderWebGPU::Allocation Allocation;
};
/// <summary>