Add GPU data upload allocator with shared page pool
This commit is contained in:
@@ -41,6 +41,7 @@ GPUContextWebGPU::GPUContextWebGPU(GPUDeviceWebGPU* device)
|
||||
, _device(device)
|
||||
{
|
||||
_vertexBufferNullLayout = WGPU_VERTEX_BUFFER_LAYOUT_INIT;
|
||||
_minUniformBufferOffsetAlignment = device->MinUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
GPUContextWebGPU::~GPUContextWebGPU()
|
||||
@@ -309,7 +310,12 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data)
|
||||
const uint32 size = cbWebGPU->GetSize();
|
||||
if (size != 0)
|
||||
{
|
||||
wgpuQueueWriteBuffer(_device->Queue, cbWebGPU->Buffer, 0, data, size);
|
||||
// Allocate a chunk of memory in a shared page allocator
|
||||
auto allocation = _device->DataUploader.Allocate(size, _minUniformBufferOffsetAlignment, WGPUBufferUsage_Uniform);
|
||||
cbWebGPU->Allocation = allocation;
|
||||
// TODO: consider holding CPU-side staging buffer and copying data to the GPU buffer in a single batch for all uniforms (before flushing the active command encoder)
|
||||
wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size);
|
||||
_bindGroupDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -461,9 +467,21 @@ void GPUContextWebGPU::Flush()
|
||||
void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 size, uint32 offset)
|
||||
{
|
||||
ASSERT(data);
|
||||
ASSERT(buffer && buffer->GetSize() >= size);
|
||||
ASSERT(buffer && buffer->GetSize() >= size + offset);
|
||||
auto bufferWebGPU = (GPUBufferWebGPU*)buffer;
|
||||
wgpuQueueWriteBuffer(_device->Queue, bufferWebGPU->Buffer, offset, data, size);
|
||||
if (bufferWebGPU->IsDynamic())
|
||||
{
|
||||
// Synchronous upload via shared buffer
|
||||
// TODO: test using map/unmap sequence
|
||||
auto allocation = _device->DataUploader.Allocate(size - offset);
|
||||
wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size);
|
||||
wgpuCommandEncoderCopyBufferToBuffer(Encoder, allocation.Buffer, allocation.Offset, bufferWebGPU->Buffer, offset, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Efficient upload via queue
|
||||
wgpuQueueWriteBuffer(_device->Queue, bufferWebGPU->Buffer, offset, data, size);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUContextWebGPU::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset)
|
||||
|
||||
@@ -43,6 +43,7 @@ private:
|
||||
|
||||
GPUDeviceWebGPU* _device;
|
||||
WGPUVertexBufferLayout _vertexBufferNullLayout;
|
||||
uint32 _minUniformBufferOffsetAlignment;
|
||||
|
||||
// State tracking
|
||||
int32 _renderPassDirty : 1;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "Engine/Core/Collections/Sorting.h"
|
||||
#endif
|
||||
#include "Engine/Graphics/PixelFormatExtensions.h"
|
||||
#include "Engine/Engine/Engine.h"
|
||||
#include "Engine/Profiler/ProfilerMemory.h"
|
||||
#include <emscripten/emscripten.h>
|
||||
|
||||
@@ -44,6 +45,81 @@ GPUVertexLayoutWebGPU::GPUVertexLayoutWebGPU(GPUDeviceWebGPU* device, const Elem
|
||||
}
|
||||
}
|
||||
|
||||
GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, uint32 alignment, WGPUBufferUsage usage)
|
||||
{
|
||||
// Find a free buffer from the current frame
|
||||
for (auto& e : _entries)
|
||||
{
|
||||
uint32 alignedOffset = Math::AlignUp(e.ActiveOffset, alignment);
|
||||
if (e.ActiveFrame == _frame && (usage ? (e.Usage & usage) == usage : e.Usage == WGPUBufferUsage_CopyDst) && alignedOffset + size <= e.Size)
|
||||
{
|
||||
e.ActiveOffset = alignedOffset + size;
|
||||
return { e.Buffer, alignedOffset };
|
||||
}
|
||||
}
|
||||
|
||||
// Find an unused buffer from the old frames
|
||||
for (auto& e : _entries)
|
||||
{
|
||||
if (e.ActiveFrame < _frame - 3 && (e.Usage & usage) == usage && size <= e.Size)
|
||||
{
|
||||
e.ActiveOffset = size;
|
||||
e.ActiveFrame = _frame;
|
||||
return { e.Buffer, 0 };
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate a new buffer
|
||||
{
|
||||
WGPUBufferDescriptor desc = WGPU_BUFFER_DESCRIPTOR_INIT;
|
||||
#if GPU_ENABLE_RESOURCE_NAMING
|
||||
if (usage & WGPUBufferUsage_Uniform)
|
||||
desc.label = WEBGPU_STR("Upload Uniforms");
|
||||
else
|
||||
desc.label = WEBGPU_STR("Upload Buffer");
|
||||
#endif
|
||||
desc.size = Math::Max<uint32>(16 * 1024, Math::RoundUpToPowerOf2(size)); // Allocate larger pages for good suballocations
|
||||
desc.usage = WGPUBufferUsage_CopyDst | usage;
|
||||
WGPUBuffer buffer = wgpuDeviceCreateBuffer(_device, &desc);
|
||||
if (buffer == nullptr)
|
||||
{
|
||||
LOG(Error, "Failed to create buffer of size {} bytes", size);
|
||||
return { nullptr, 0 };
|
||||
}
|
||||
_entries.Insert(0, { buffer, (uint32)desc.size, size, _frame, desc.usage });
|
||||
PROFILE_MEM_INC(GraphicsBuffers, desc.usage);
|
||||
return { buffer, 0 };
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDataUploaderWebGPU::DrawBegin()
|
||||
{
|
||||
// Free old buffers and recycle unused ones
|
||||
uint64 frame = Engine::FrameCount;
|
||||
for (int32 i = _entries.Count() - 1; i >= 0; i--)
|
||||
{
|
||||
auto& e = _entries[i];
|
||||
if (frame - e.ActiveFrame > 100)
|
||||
{
|
||||
wgpuBufferRelease(e.Buffer);
|
||||
PROFILE_MEM_DEC(GraphicsBuffers, e.Size);
|
||||
_entries.RemoveAt(i);
|
||||
}
|
||||
}
|
||||
_frame = frame;
|
||||
}
|
||||
|
||||
void GPUDataUploaderWebGPU::ReleaseGPU()
|
||||
{
|
||||
// Free data
|
||||
for (auto& e : _entries)
|
||||
{
|
||||
wgpuBufferRelease(e.Buffer);
|
||||
PROFILE_MEM_DEC(GraphicsBuffers, e.Size);
|
||||
}
|
||||
_entries.Clear();
|
||||
}
|
||||
|
||||
GPUDeviceWebGPU::GPUDeviceWebGPU(WGPUInstance instance, GPUAdapterWebGPU* adapter)
|
||||
: GPUDevice(RendererType::WebGPU, ShaderProfile::WebGPU)
|
||||
, Adapter(adapter)
|
||||
@@ -105,6 +181,7 @@ bool GPUDeviceWebGPU::Init()
|
||||
WGPULimits limits = WGPU_LIMITS_INIT;
|
||||
if (wgpuAdapterGetLimits(Adapter->Adapter, &limits) == WGPUStatus_Success)
|
||||
{
|
||||
MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment;
|
||||
Limits.HasDepthClip = features.Contains(WGPUFeatureName_DepthClipControl);
|
||||
Limits.HasReadOnlyDepth = true;
|
||||
Limits.MaximumTexture1DSize = Math::Min<int32>(GPU_MAX_TEXTURE_SIZE, limits.maxTextureDimension1D);
|
||||
@@ -400,6 +477,7 @@ bool GPUDeviceWebGPU::Init()
|
||||
#undef INIT_SAMPLER
|
||||
|
||||
// Setup commands processing
|
||||
DataUploader._device = Device;
|
||||
Queue = wgpuDeviceGetQueue(Device);
|
||||
_mainContext = New<GPUContextWebGPU>(this);
|
||||
|
||||
@@ -407,6 +485,13 @@ bool GPUDeviceWebGPU::Init()
|
||||
return GPUDevice::Init();
|
||||
}
|
||||
|
||||
void GPUDeviceWebGPU::DrawBegin()
|
||||
{
|
||||
GPUDevice::DrawBegin();
|
||||
|
||||
DataUploader.DrawBegin();
|
||||
}
|
||||
|
||||
GPUDeviceWebGPU::~GPUDeviceWebGPU()
|
||||
{
|
||||
// Ensure to be disposed
|
||||
@@ -479,6 +564,7 @@ void GPUDeviceWebGPU::Dispose()
|
||||
preDispose();
|
||||
|
||||
// Clear device resources
|
||||
DataUploader.ReleaseGPU();
|
||||
SAFE_DELETE_GPU_RESOURCES(DefaultSamplers);
|
||||
SAFE_DELETE(_mainContext);
|
||||
SAFE_DELETE(Adapter);
|
||||
@@ -556,23 +642,7 @@ GPUSwapChain* GPUDeviceWebGPU::CreateSwapChain(Window* window)
|
||||
GPUConstantBuffer* GPUDeviceWebGPU::CreateConstantBuffer(uint32 size, const StringView& name)
|
||||
{
|
||||
PROFILE_MEM(GraphicsShaders);
|
||||
WGPUBuffer buffer = nullptr;
|
||||
if (size)
|
||||
{
|
||||
WGPUBufferDescriptor desc = WGPU_BUFFER_DESCRIPTOR_INIT;
|
||||
#if GPU_ENABLE_RESOURCE_NAMING
|
||||
desc.label = WEBGPU_STR("Uniform");
|
||||
#endif
|
||||
desc.size = size;
|
||||
desc.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_Uniform;
|
||||
buffer = wgpuDeviceCreateBuffer(Device, &desc);
|
||||
if (buffer == nullptr)
|
||||
{
|
||||
LOG(Error, "Failed to create uniform buffer '{}' of size {} bytes", name, size);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return New<GPUConstantBufferWebGPU>(this, size, buffer, name);
|
||||
return New<GPUConstantBufferWebGPU>(this, size, name);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -12,6 +12,38 @@ class GPUContextWebGPU;
|
||||
class GPUAdapterWebGPU;
|
||||
class GPUSamplerWebGPU;
|
||||
|
||||
/// <summary>
|
||||
/// Pool for uploading data to GPU buffers. It manages large buffers and suballocates for multiple small updates, minimizing the number of buffer creations and copies.
|
||||
/// </summary>
|
||||
class GPUDataUploaderWebGPU
|
||||
{
|
||||
friend class GPUDeviceWebGPU;
|
||||
private:
|
||||
struct Entry
|
||||
{
|
||||
WGPUBuffer Buffer;
|
||||
uint32 Size;
|
||||
uint32 ActiveOffset;
|
||||
uint64 ActiveFrame;
|
||||
WGPUBufferUsage Usage;
|
||||
};
|
||||
|
||||
uint64 _frame = 0;
|
||||
WGPUDevice _device;
|
||||
Array<Entry> _entries;
|
||||
|
||||
public:
|
||||
struct Allocation
|
||||
{
|
||||
WGPUBuffer Buffer = nullptr;
|
||||
uint32 Offset = 0;
|
||||
};
|
||||
|
||||
Allocation Allocate(uint32 size, uint32 alignment = 16, WGPUBufferUsage usage = 0);
|
||||
void DrawBegin();
|
||||
void ReleaseGPU();
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of Graphics Device for Web GPU backend.
|
||||
/// </summary>
|
||||
@@ -30,6 +62,8 @@ public:
|
||||
WGPUDevice Device = nullptr;
|
||||
WGPUQueue Queue = nullptr;
|
||||
GPUSamplerWebGPU* DefaultSamplers[6] = {};
|
||||
GPUDataUploaderWebGPU DataUploader;
|
||||
uint32 MinUniformBufferOffsetAlignment = 1;
|
||||
|
||||
public:
|
||||
// [GPUDeviceDX]
|
||||
@@ -46,6 +80,7 @@ public:
|
||||
return Device;
|
||||
}
|
||||
bool Init() override;
|
||||
void DrawBegin() override;
|
||||
void Dispose() override;
|
||||
void WaitForGPU() override;
|
||||
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
|
||||
|
||||
@@ -10,26 +10,10 @@
|
||||
#include "Engine/GraphicsDevice/Vulkan/Types.h"
|
||||
#include "Engine/Serialization/MemoryReadStream.h"
|
||||
|
||||
GPUConstantBufferWebGPU::GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, WGPUBuffer buffer, const StringView& name) noexcept
|
||||
GPUConstantBufferWebGPU::GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, const StringView& name) noexcept
|
||||
: GPUResourceWebGPU(device, name)
|
||||
{
|
||||
_size = _memoryUsage = size;
|
||||
Buffer = buffer;
|
||||
}
|
||||
|
||||
GPUConstantBufferWebGPU::~GPUConstantBufferWebGPU()
|
||||
{
|
||||
if (Buffer)
|
||||
wgpuBufferRelease(Buffer);
|
||||
}
|
||||
|
||||
void GPUConstantBufferWebGPU::OnReleaseGPU()
|
||||
{
|
||||
if (Buffer)
|
||||
{
|
||||
wgpuBufferRelease(Buffer);
|
||||
Buffer = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
GPUShaderProgram* GPUShaderWebGPU::CreateGPUShaderProgram(ShaderStage type, const GPUShaderProgramInitializer& initializer, Span<byte> bytecode, MemoryReadStream& stream)
|
||||
|
||||
@@ -14,15 +14,10 @@
|
||||
class GPUConstantBufferWebGPU : public GPUResourceWebGPU<GPUConstantBuffer>
|
||||
{
|
||||
public:
|
||||
GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, WGPUBuffer buffer, const StringView& name) noexcept;
|
||||
~GPUConstantBufferWebGPU();
|
||||
GPUConstantBufferWebGPU(GPUDeviceWebGPU* device, uint32 size, const StringView& name) noexcept;
|
||||
|
||||
public:
|
||||
WGPUBuffer Buffer;
|
||||
|
||||
public:
|
||||
// [GPUResourceWebGPU]
|
||||
void OnReleaseGPU() final override;
|
||||
GPUDataUploaderWebGPU::Allocation Allocation;
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
|
||||
Reference in New Issue
Block a user