From c4f4983f6db77a063a040df56e8c4b8abcc99be7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 3 Mar 2026 09:47:59 +0100 Subject: [PATCH] Add WebGPU bind groups caching --- .../WebGPU/GPUContextWebGPU.cpp | 37 +++--- .../GraphicsDevice/WebGPU/GPUContextWebGPU.h | 4 +- .../GraphicsDevice/WebGPU/GPUDeviceWebGPU.h | 1 + .../WebGPU/GPUPipelineStateWebGPU.cpp | 123 +++++++++++++++++- .../WebGPU/GPUPipelineStateWebGPU.h | 29 ++++- .../WebGPU/GPUTextureWebGPU.cpp | 1 + 6 files changed, 167 insertions(+), 28 deletions(-) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp index 394a8397f..568af4128 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp @@ -14,6 +14,7 @@ #include "Engine/Core/Log.h" #include "Engine/Core/Math/Viewport.h" #include "Engine/Core/Math/Rectangle.h" +#include "Engine/Engine/Engine.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/RenderStats.h" #include "Engine/Graphics/PixelFormatExtensions.h" @@ -504,10 +505,6 @@ void GPUContextWebGPU::Flush() wgpuQueueSubmit(_device->Queue, 1, &commandBuffer); wgpuCommandBufferRelease(commandBuffer); } - - for (auto e : _unusedBindGroups) - wgpuBindGroupRelease(e); - _unusedBindGroups.Clear(); } void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 size, uint32 offset) @@ -962,20 +959,24 @@ void GPUContextWebGPU::FlushBindGroup() _bindGroupDirty = false; // Each shader stage (Vertex, Pixel) uses a separate bind group - WGPUBindGroupDescriptor bindGroupDesc = WGPU_BIND_GROUP_DESCRIPTOR_INIT; + GPUPipelineStateWebGPU::BindGroupKey key; for (int32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++) { auto descriptors = _pipelineState->BindGroupDescriptors[groupIndex]; - bindGroupDesc.layout = _pipelineState->BindGroupLayouts[groupIndex]; - if (!descriptors || !bindGroupDesc.layout) + key.Layout = _pipelineState->BindGroupLayouts[groupIndex]; + if (!descriptors || !key.Layout) continue; // Build descriptors for the bind group auto entriesCount = descriptors->DescriptorTypesCount; _dynamicOffsets.Clear(); - _bindGroupEntries.Resize(entriesCount); - auto entriesPtr = _bindGroupEntries.Get(); + static_assert(ARRAY_COUNT(key.Entries) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group entries array."); + static_assert(ARRAY_COUNT(key.Versions) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group versions array."); + key.EntriesCount = entriesCount; + auto entriesPtr = key.Entries; + auto versionsPtr = key.Versions; Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry)); + Platform::MemoryClear(versionsPtr, ((entriesCount + 3) & ~0x3) * sizeof(uint8)); for (int32 index = 0; index < entriesCount; index++) { auto& descriptor = descriptors->DescriptorTypes[index]; @@ -999,7 +1000,10 @@ void GPUContextWebGPU::FlushBindGroup() auto view = _shaderResources[descriptor.Slot]; auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; if (ptr && ptr->TextureView) + { entry.textureView = ptr->TextureView->View; + versionsPtr[index] = ptr->Version; + } if (!entry.textureView) { // Fallback @@ -1036,7 +1040,11 @@ void GPUContextWebGPU::FlushBindGroup() GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot]; auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; if (ptr && ptr->BufferView) + { entry.buffer = ptr->BufferView->Buffer; + entry.size = ((GPUBufferWebGPU*)view->GetParent())->GetSize(); + versionsPtr[index] = (uint64)ptr->Version; + } if (!entry.buffer) entry.buffer = _device->DefaultBuffer; // Fallback break; @@ -1068,13 +1076,11 @@ void GPUContextWebGPU::FlushBindGroup() } } - // Create a bind group - bindGroupDesc.entryCount = _bindGroupEntries.Count(); - bindGroupDesc.entries = entriesPtr; #if BUILD_DEBUG - for (int32 i = 0; i < bindGroupDesc.entryCount; i++) + // Validate + for (int32 i = 0; i < entriesCount; i++) { - auto& e = bindGroupDesc.entries[i]; + auto& e = entriesPtr[i]; if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1) { LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName()); @@ -1084,10 +1090,9 @@ void GPUContextWebGPU::FlushBindGroup() } } #endif - WGPUBindGroup bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &bindGroupDesc); - _unusedBindGroups.Add(bindGroup); // Bind group + WGPUBindGroup bindGroup = _pipelineState->GetBindGroup(key); wgpuRenderPassEncoderSetBindGroup(_renderPass, groupIndex, bindGroup, _dynamicOffsets.Count(), _dynamicOffsets.Get()); } } diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h index 8d03d11da..00f515e47 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h @@ -43,9 +43,7 @@ private: GPUDeviceWebGPU* _device; uint32 _minUniformBufferOffsetAlignment; - Array _bindGroupEntries; Array _dynamicOffsets; - Array _unusedBindGroups; // State tracking int32 _renderPassDirty : 1; @@ -70,7 +68,7 @@ private: BufferBind _indexBuffer; BufferBind _vertexBuffers[GPU_MAX_VB_BINDED]; GPUPipelineStateWebGPU* _pipelineState; - GPUPipelineStateWebGPU::Key _pipelineKey; + GPUPipelineStateWebGPU::PipelineKey _pipelineKey; Array> _pendingClears; GPUResourceView* _shaderResources[GPU_MAX_SR_BINDED]; GPUResourceView* _storageResources[GPU_MAX_SR_BINDED]; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h index d928bdcf4..d87957775 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h @@ -132,6 +132,7 @@ struct GPUResourceViewPtrWebGPU { class GPUBufferViewWebGPU* BufferView; class GPUTextureViewWebGPU* TextureView; + uint8 Version; }; extern GPUDevice* CreateGPUDeviceWebGPU(); diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp index 7c791dd0e..571837c4b 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp @@ -7,11 +7,14 @@ #include "GPUVertexLayoutWebGPU.h" #include "RenderToolsWebGPU.h" #include "Engine/Core/Log.h" +#include "Engine/Engine/Engine.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Utilities/Crc.h" #define WEBGPU_LOG_PSO 0 +#define WEBGPU_LOG_BIND_GROUPS 0 WGPUCompareFunction ToCompareFunction(ComparisonFunc value) { @@ -139,6 +142,9 @@ void GPUPipelineStateWebGPU::OnReleaseGPU() { VS = nullptr; PS = nullptr; + for (auto& e : _bindGroups) + wgpuBindGroupRelease(e.Value); + _bindGroups.Clear(); for (auto& e : _pipelines) wgpuRenderPipelineRelease(e.Value); _pipelines.Clear(); @@ -158,15 +164,29 @@ void GPUPipelineStateWebGPU::OnReleaseGPU() Platform::MemoryClear(&BindGroupDescriptors, sizeof(BindGroupDescriptors)); } -uint32 GetHash(const GPUPipelineStateWebGPU::Key& key) +uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key) { - static_assert(sizeof(GPUPipelineStateWebGPU::Key) == sizeof(uint64) * 2, "Invalid PSO key size."); + static_assert(sizeof(GPUPipelineStateWebGPU::PipelineKey) == sizeof(uint64) * 2, "Invalid PSO key size."); uint32 hash = GetHash(key.Packed[0]); CombineHash(hash, GetHash(key.Packed[1])); return hash; } -WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]) +uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key) +{ + return key.Hash; +} + +bool GPUPipelineStateWebGPU::BindGroupKey::operator==(const BindGroupKey& other) const +{ + return Hash == other.Hash + && Layout == other.Layout + && EntriesCount == other.EntriesCount + && Platform::MemoryCompare(&Entries, &other.Entries, EntriesCount * sizeof(WGPUBindGroupEntry)) == 0 + && Platform::MemoryCompare(&Versions, &other.Versions, EntriesCount * sizeof(uint8)) == 0; +} + +WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]) { WGPURenderPipeline pipeline; if (_pipelines.TryGet(key, pipeline)) @@ -255,10 +275,105 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key, GPUResour // Cache it _pipelines.Add(key, pipeline); - return pipeline; } +WGPUBindGroup GPUPipelineStateWebGPU::GetBindGroup(BindGroupKey& key) +{ + // Generate a hash for the key + key.LastFrameUsed = Engine::FrameCount; + key.Hash = Crc::MemCrc32(&key.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)); + CombineHash(key.Hash, GetHash(key.EntriesCount)); + CombineHash(key.Hash, GetHash(key.Layout)); + CombineHash(key.Hash, Crc::MemCrc32(&key.Versions, key.EntriesCount * sizeof(uint8))); + + // Lookup for existing bind group + WGPUBindGroup bindGroup; + auto found = _bindGroups.Find(key); + if (found.IsNotEnd()) + { + // Get cached bind group and update the last usage frame + bindGroup = found->Value; + found->Key.LastFrameUsed = key.LastFrameUsed; + + // Periodically remove old bind groups (unused for some time) + if (key.LastFrameUsed - _lastFrameBindGroupsGC > 100) + { + _lastFrameBindGroupsGC = key.LastFrameUsed; + int32 freed = 0; + for (auto it = _bindGroups.Begin(); it.IsNotEnd(); ++it) + { + if (key.LastFrameUsed - it->Key.LastFrameUsed > 50) + { + freed++; + wgpuBindGroupRelease(it->Value); + _bindGroups.Remove(it); + } + } +#if WEBGPU_LOG_BIND_GROUPS + if (freed > 0) + { + LOG(Info, "[WebGPU] Removed {} old entries from '{}'", freed, String(_debugName.Get(), _debugName.Count() - 1)); + } +#endif + } + + return bindGroup; + } + PROFILE_CPU(); + PROFILE_MEM(GraphicsCommands); +#if GPU_ENABLE_RESOURCE_NAMING + ZoneText(_debugName.Get(), _debugName.Count() - 1); +#endif +#if WEBGPU_LOG_BIND_GROUPS + LOG(Info, "[WebGPU] GetBindGroup: '{}', hash: {}", String(_debugName.Get(), _debugName.Count() - 1), key.Hash); +#endif + + // Build description + WGPUBindGroupDescriptor desc = WGPU_BIND_GROUP_DESCRIPTOR_INIT; +#if GPU_ENABLE_RESOURCE_NAMING + desc.label = PipelineDesc.label; +#endif + desc.layout = key.Layout; + desc.entryCount = key.EntriesCount; + desc.entries = key.Entries; + + // Create object + bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &desc); + if (!bindGroup) + { +#if GPU_ENABLE_RESOURCE_NAMING + LOG(Error, "wgpuDeviceCreateBindGroup failed for {}", String(_debugName.Get(), _debugName.Count() - 1)); +#endif + return nullptr; + } + +#if WEBGPU_LOG_BIND_GROUPS + // Debug detection of hash collisions + int32 collisions = 0, equalLayout = 0, equalEntries = 0, equalVersions = 0; + for (auto& e : _bindGroups) + { + auto& other = e.Key; + if (key.Hash == other.Hash) + { + collisions++; + if (key.Layout == other.Layout) + equalLayout++; + if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Entries, &other.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)) == 0) + equalEntries++; + if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Versions, &other.Versions, key.EntriesCount * sizeof(uint8)) == 0) + equalVersions++; + } + } + if (collisions > 1) + LOG(Error, "> Hash colllision! {}/{} (capacity: {}), equalLayout: {}, equalEntries: {}, equalVersions: {}", collisions, _bindGroups.Count(), _bindGroups.Capacity(), equalLayout, equalEntries, equalVersions); +#endif + + // Cache it + _bindGroups.Add(key, bindGroup); + return bindGroup; +} + void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]) { #if WEBGPU_LOG_PSO diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h index 261015797..b36ecccb2 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h @@ -16,7 +16,7 @@ class GPUPipelineStateWebGPU : public GPUResourceWebGPU { public: // Batches render context state for the pipeline state. Used as a key for caching created pipelines. - struct Key + struct PipelineKey { union { @@ -31,12 +31,25 @@ public: uint64 Packed[2]; }; - FORCE_INLINE bool operator==(const Key& other) const + FORCE_INLINE bool operator==(const PipelineKey& other) const { return Platform::MemoryCompare(&Packed, &other.Packed, sizeof(Packed)) == 0; } }; + // Batches bind group description for the pipeline state. Used as a key for caching created bind groups. + struct BindGroupKey + { + uint32 Hash; + WGPUBindGroupLayout Layout; + mutable uint64 LastFrameUsed; + WGPUBindGroupEntry Entries[64]; + uint8 EntriesCount; + uint8 Versions[64]; // Versions of descriptors used to differentiate when texture residency gets changed + + bool operator==(const BindGroupKey& other) const; + }; + private: #if GPU_ENABLE_RESOURCE_NAMING DebugName _debugName; @@ -46,7 +59,9 @@ private: WGPUBlendState _blendState; WGPUColorTargetState _colorTargets[GPU_MAX_RT_BINDED]; WGPUVertexBufferLayout _vertexBuffers[GPU_MAX_VB_BINDED]; - Dictionary _pipelines; + Dictionary _pipelines; + Dictionary _bindGroups; + uint64 _lastFrameBindGroupsGC = 0; public: GPUShaderProgramVSWebGPU* VS = nullptr; @@ -63,7 +78,10 @@ public: public: // Gets the pipeline for the given rendering state. Pipelines are cached and reused for the same key. - WGPURenderPipeline GetPipeline(const Key& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]); + WGPURenderPipeline GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]); + + // Gets the bind group for the given key (unhashed). Bind groups are cached and reused for the same key. + WGPUBindGroup GetBindGroup(BindGroupKey& desc); private: void InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]); @@ -78,6 +96,7 @@ protected: void OnReleaseGPU() final override; }; -uint32 GetHash(const GPUPipelineStateWebGPU::Key& key); +uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key); +uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key); #endif diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp index 6b1dfe626..39c4d7aed 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp @@ -39,6 +39,7 @@ void SetWebGPUTextureViewSampler(GPUTextureView* view, uint32 samplerType) void GPUTextureViewWebGPU::Create(WGPUTexture texture, const WGPUTextureViewDescriptor& desc) { + Ptr.Version++; if (View) wgpuTextureViewRelease(View); Texture = texture;