Implement bind groups in WebGPU to provide resources to shaders

2026-02-25 18:22:55 +01:00
parent 9d95bbaa8c
commit b535791c66
9 changed files with 444 additions and 125 deletions
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp
@@ -71,11 +71,13 @@ bool GPUBufferWebGPU::OnInit()
    switch (_desc.Usage)
    {
    case GPUResourceUsage::Default:
-        if (!_desc.InitData)
-            bufferDesc.usage |= WGPUBufferUsage_CopyDst;
+        bufferDesc.usage |= WGPUBufferUsage_CopyDst;
        break;
    case GPUResourceUsage::Dynamic:
-        bufferDesc.usage |= WGPUBufferUsage_MapWrite;
+        if (bufferDesc.usage == 0) // WebGPU doesn't allow to map-write Index/Vertex/Storage buffers
+            bufferDesc.usage = WGPUBufferUsage_MapWrite;
+        else
+            bufferDesc.usage |= WGPUBufferUsage_CopyDst;
        break;
    case GPUResourceUsage::StagingUpload:
        bufferDesc.usage |= WGPUBufferUsage_MapWrite | WGPUBufferUsage_CopySrc;
@@ -88,19 +90,24 @@ bool GPUBufferWebGPU::OnInit()
        break;
    }
    bufferDesc.size = _desc.Size;
-    bufferDesc.mappedAtCreation = _desc.InitData != nullptr;
+    bufferDesc.mappedAtCreation = _desc.InitData != nullptr && (bufferDesc.usage & WGPUBufferUsage_MapWrite);
    Buffer = wgpuDeviceCreateBuffer(_device->Device, &bufferDesc);
    if (!Buffer)
        return true;
    _memoryUsage = _desc.Size;
+    Usage = bufferDesc.usage;

    // Initialize with a data if provided
-    if (_desc.InitData)
+    if (bufferDesc.mappedAtCreation)
    {
        //wgpuBufferWriteMappedRange(Buffer, 0, _desc.InitData, _desc.Size);
        Platform::MemoryCopy(wgpuBufferGetMappedRange(Buffer, 0, _desc.Size), _desc.InitData, _desc.Size);
        wgpuBufferUnmap(Buffer);
    }
+    else if (_desc.InitData)
+    {
+        wgpuQueueWriteBuffer(_device->Queue, Buffer, 0, _desc.InitData, _desc.Size);
+    }

    // Create view
    _view.Set(this, Buffer);
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.h
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.h
@@ -52,6 +52,8 @@ public:
 public:
    // Handle to the WebGPU buffer object.
    WGPUBuffer Buffer = nullptr;
+    // Usage flags of the buffer.
+    WGPUBufferUsage Usage = 0;

 public:
    // [GPUBuffer]
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp
@@ -11,6 +11,7 @@
 #include "GPUSamplerWebGPU.h"
 #include "GPUVertexLayoutWebGPU.h"
 #include "RenderToolsWebGPU.h"
+#include "Engine/Core/Log.h"
 #include "Engine/Core/Math/Viewport.h"
 #include "Engine/Core/Math/Rectangle.h"
 #include "Engine/Profiler/ProfilerCPU.h"
@@ -42,6 +43,20 @@ GPUContextWebGPU::GPUContextWebGPU(GPUDeviceWebGPU* device)
 {
    _vertexBufferNullLayout = WGPU_VERTEX_BUFFER_LAYOUT_INIT;
    _minUniformBufferOffsetAlignment = device->MinUniformBufferOffsetAlignment;
+
+    // Setup descriptor handles tables lookup cache
+    _resourceTables[(int32)SpirvShaderResourceBindingType::INVALID] = nullptr;
+    _resourceTables[(int32)SpirvShaderResourceBindingType::CB] = nullptr;
+    _resourceTables[(int32)SpirvShaderResourceBindingType::SAMPLER] = nullptr;
+    _resourceTables[(int32)SpirvShaderResourceBindingType::SRV] = _shaderResources;
+    _resourceTables[(int32)SpirvShaderResourceBindingType::UAV] = _storageResources;
+#if ENABLE_ASSERTION
+    _resourceTableSizes[(int32)SpirvShaderResourceBindingType::INVALID] = 0;
+    _resourceTableSizes[(int32)SpirvShaderResourceBindingType::CB] = GPU_MAX_CB_BINDED;
+    _resourceTableSizes[(int32)SpirvShaderResourceBindingType::SAMPLER] = GPU_MAX_SAMPLER_BINDED;
+    _resourceTableSizes[(int32)SpirvShaderResourceBindingType::SRV] = GPU_MAX_SR_BINDED;
+    _resourceTableSizes[(int32)SpirvShaderResourceBindingType::UAV] = GPU_MAX_UA_BINDED;
+#endif
 }

 GPUContextWebGPU::~GPUContextWebGPU()
@@ -311,7 +326,7 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data)
    if (size != 0)
    {
        // Allocate a chunk of memory in a shared page allocator
-        auto allocation = _device->DataUploader.Allocate(size, _minUniformBufferOffsetAlignment, WGPUBufferUsage_Uniform);
+        auto allocation = _device->DataUploader.Allocate(size, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst, _minUniformBufferOffsetAlignment);
        cbWebGPU->Allocation = allocation;
        // TODO: consider holding CPU-side staging buffer and copying data to the GPU buffer in a single batch for all uniforms (before flushing the active command encoder)
        wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size);
@@ -448,10 +463,7 @@ void GPUContextWebGPU::Flush()

    // End existing pass (if any)
    if (_renderPass)
-    {
-        wgpuRenderPassEncoderEnd(_renderPass);
-        wgpuRenderPassEncoderRelease(_renderPass);
-    }
+        EndRenderPass();

    // End commands recording
    WGPUCommandBufferDescriptor commandBufferDesc = WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT;
@@ -462,6 +474,10 @@ void GPUContextWebGPU::Flush()
        wgpuQueueSubmit(_device->Queue, 1, &commandBuffer);
        wgpuCommandBufferRelease(commandBuffer);
    }
+
+    for (auto e : _unusedBindGroups)
+        wgpuBindGroupRelease(e);
+    _unusedBindGroups.Clear();
 }

 void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 size, uint32 offset)
@@ -469,11 +485,19 @@ void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32
    ASSERT(data);
    ASSERT(buffer && buffer->GetSize() >= size + offset);
    auto bufferWebGPU = (GPUBufferWebGPU*)buffer;
-    if (bufferWebGPU->IsDynamic())
+    if (bufferWebGPU->Usage & WGPUBufferUsage_MapWrite)
    {
+        CRASH; // TODO: impl this (map if not mapped yet and memcpy)
+    }
+    else if (bufferWebGPU->IsDynamic())
+    {
+        // Cannot insert copy commands in encoder during render pass
+        if (_renderPass)
+            EndRenderPass();
+
        // Synchronous upload via shared buffer
        // TODO: test using map/unmap sequence
-        auto allocation = _device->DataUploader.Allocate(size - offset);
+        auto allocation = _device->DataUploader.Allocate(size - offset, WGPUBufferUsage_CopySrc | WGPUBufferUsage_CopyDst);
        wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size);
        wgpuCommandEncoderCopyBufferToBuffer(Encoder, allocation.Buffer, allocation.Offset, bufferWebGPU->Buffer, offset, size);
    }
@@ -486,6 +510,10 @@ void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32

 void GPUContextWebGPU::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset)
 {
+    // Cannot insert copy commands in encoder during render pass
+    if (_renderPass)
+        EndRenderPass();
+
    ASSERT(dstBuffer && srcBuffer);
    auto srcBufferWebGPU = (GPUBufferWebGPU*)srcBuffer;
    auto dstBufferWebGPU = (GPUBufferWebGPU*)dstBuffer;
@@ -553,6 +581,10 @@ void GPUContextWebGPU::CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBu

 void GPUContextWebGPU::CopyResource(GPUResource* dstResource, GPUResource* srcResource)
 {
+    // Cannot insert copy commands in encoder during render pass
+    if (_renderPass)
+        EndRenderPass();
+
    ASSERT(dstResource && srcResource);
    auto dstTexture = Cast<GPUTexture>(dstResource);
    auto srcTexture = Cast<GPUTexture>(srcResource);
@@ -590,6 +622,10 @@ void GPUContextWebGPU::CopyResource(GPUResource* dstResource, GPUResource* srcRe

 void GPUContextWebGPU::CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource)
 {
+    // Cannot insert copy commands in encoder during render pass
+    if (_renderPass)
+        EndRenderPass();
+
    ASSERT(dstResource && srcResource);
    auto dstTexture = Cast<GPUTexture>(dstResource);
    auto srcTexture = Cast<GPUTexture>(srcResource);
@@ -640,11 +676,7 @@ void GPUContextWebGPU::ManualClear(const PendingClear& clear)
 {
    // End existing pass (if any)
    if (_renderPass)
-    {
-        wgpuRenderPassEncoderEnd(_renderPass);
-        wgpuRenderPassEncoderRelease(_renderPass);
-        _renderPass = nullptr;
-    }
+        EndRenderPass();

    // Clear with a render pass
    WGPURenderPassColorAttachment colorAttachment;
@@ -699,101 +731,7 @@ void GPUContextWebGPU::OnDrawCall()
    // Check if need to start a new render pass
    if (_renderPassDirty)
    {
-        _renderPassDirty = false;
-
-        // End existing pass (if any)
-        if (_renderPass)
-        {
-            wgpuRenderPassEncoderEnd(_renderPass);
-            wgpuRenderPassEncoderRelease(_renderPass);
-        }
-
-        // Start a new render pass
-        WGPURenderPassColorAttachment colorAttachments[GPU_MAX_RT_BINDED];
-        WGPURenderPassDepthStencilAttachment depthStencilAttachment = WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT;
-        WGPURenderPassDescriptor renderPassDesc = WGPU_RENDER_PASS_DESCRIPTOR_INIT;
-        renderPassDesc.colorAttachmentCount = _renderTargetCount;
-        renderPassDesc.colorAttachments = colorAttachments;
-        PendingClear clear;
-        _pipelineKey.MultiSampleCount = 1;
-        _pipelineKey.RenderTargetCount = _renderTargetCount;
-        for (int32 i = 0; i < renderPassDesc.colorAttachmentCount; i++)
-        {
-            auto& colorAttachment = colorAttachments[i];
-            colorAttachment = WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT;
-            auto renderTarget = _renderTargets[i];
-            colorAttachment.view = renderTarget->View;
-            colorAttachment.depthSlice = renderTarget->DepthSlice;
-            colorAttachment.loadOp = WGPULoadOp_Load;
-            colorAttachment.storeOp = WGPUStoreOp_Store;
-            if (FindClear(_depthStencil, clear))
-            {
-                colorAttachment.loadOp = WGPULoadOp_Clear;
-                colorAttachment.clearValue = { clear.RGBA[0], clear.RGBA[1], clear.RGBA[2], clear.RGBA[3] };
-            }
-            _pipelineKey.MultiSampleCount = (int32)renderTarget->GetMSAA();
-            _pipelineKey.RenderTargetFormats[i] = renderTarget->Format;
-        }
-        if (_depthStencil)
-        {
-            renderPassDesc.depthStencilAttachment = &depthStencilAttachment;
-            depthStencilAttachment.view = _depthStencil->View;
-            depthStencilAttachment.depthLoadOp = WGPULoadOp_Load;
-            depthStencilAttachment.depthStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store;
-            depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly;
-            if (_depthStencil->HasStencil)
-            {
-                depthStencilAttachment.stencilLoadOp = WGPULoadOp_Load;
-                depthStencilAttachment.stencilStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store;
-                depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly;
-            }
-            else
-            {
-                depthStencilAttachment.stencilClearValue = 0;
-                depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear;
-                depthStencilAttachment.stencilStoreOp = WGPUStoreOp_Discard;
-                depthStencilAttachment.stencilReadOnly = true;
-            }
-            if (FindClear(_depthStencil, clear))
-            {
-                depthStencilAttachment.depthLoadOp = WGPULoadOp_Clear;
-                depthStencilAttachment.depthClearValue = clear.Depth;
-                if (_depthStencil->HasStencil)
-                {
-                    depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear;
-                    depthStencilAttachment.stencilClearValue = clear.Stencil;
-                }
-            }
-            _pipelineKey.DepthStencilFormat = _depthStencil->Format;
-        }
-        else
-        {
-            _pipelineKey.DepthStencilFormat = WGPUTextureFormat_Undefined;
-        }
-        _renderPass = wgpuCommandEncoderBeginRenderPass(Encoder, &renderPassDesc);
-        ASSERT(_renderPass);
-
-        // Discard texture clears (done manually or via render pass)
-        _pendingClears.Clear();
-
-        // Apply pending state
-        if (_stencilRef != 0)
-            wgpuRenderPassEncoderSetStencilReference(_renderPass, _stencilRef);
-        auto scissorRect = _scissorRect;
-        // TODO: skip calling this if scissorRect is default (0, 0, attachment width, attachment  height)
-        wgpuRenderPassEncoderSetScissorRect(_renderPass, (uint32_t)scissorRect.GetX(), (uint32_t)scissorRect.GetY(), (uint32_t)scissorRect.GetWidth(), (uint32_t)scissorRect.GetHeight());
-        auto viewport = _viewport;
-        // TODO: skip calling this if viewport is default (0, 0, attachment width, attachment  height, 0, 1)
-        wgpuRenderPassEncoderSetViewport(_renderPass, viewport.X, viewport.Y, viewport.Width, viewport.Height, viewport.MinDepth, viewport.MaxDepth);
-
-        // Auto-dirty pipeline when new render pass starts
-        if (_pipelineState)
-            _pipelineDirty = true;
-        _indexBufferDirty = true;
-        _vertexBufferDirty = true;
-        _bindGroupDirty = true;
-        if (_blendFactorSet)
-            _blendFactorDirty = true;
+        FlushRenderPass();
    }

    // Flush rendering states
@@ -803,6 +741,9 @@ void GPUContextWebGPU::OnDrawCall()
        WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey) : nullptr;
        wgpuRenderPassEncoderSetPipeline(_renderPass, pipeline);
        RENDER_STAT_PS_STATE_CHANGE();
+
+        // Invalidate bind groups (layout might change)
+        _bindGroupDirty = true;
    }
    if (_indexBufferDirty && _indexBuffer.Buffer)
    {
@@ -826,10 +767,7 @@ void GPUContextWebGPU::OnDrawCall()
    }
    if (_bindGroupDirty)
    {
-        _bindGroupDirty = false;
-        // TODO: bind _samplers
-        // TODO: bind _constantBuffers
-        // TODO: bind _shaderResources
+        FlushBindGroup();
    }
 }

@@ -838,4 +776,224 @@ void GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader)
    // TODO: add compute shaders support
 }

+void GPUContextWebGPU::EndRenderPass()
+{
+    wgpuRenderPassEncoderEnd(_renderPass);
+    wgpuRenderPassEncoderRelease(_renderPass);
+    _renderPass = nullptr;
+}
+
+void GPUContextWebGPU::FlushRenderPass()
+{
+    _renderPassDirty = false;
+
+    // End existing pass (if any)
+    if (_renderPass)
+        EndRenderPass();
+
+    // Start a new render pass
+    WGPURenderPassColorAttachment colorAttachments[GPU_MAX_RT_BINDED];
+    WGPURenderPassDepthStencilAttachment depthStencilAttachment = WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT;
+    WGPURenderPassDescriptor renderPassDesc = WGPU_RENDER_PASS_DESCRIPTOR_INIT;
+    renderPassDesc.colorAttachmentCount = _renderTargetCount;
+    renderPassDesc.colorAttachments = colorAttachments;
+    PendingClear clear;
+    _pipelineKey.MultiSampleCount = 1;
+    _pipelineKey.RenderTargetCount = _renderTargetCount;
+    for (int32 i = 0; i < renderPassDesc.colorAttachmentCount; i++)
+    {
+        auto& colorAttachment = colorAttachments[i];
+        colorAttachment = WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT;
+        auto renderTarget = _renderTargets[i];
+        colorAttachment.view = renderTarget->View;
+        colorAttachment.depthSlice = renderTarget->DepthSlice;
+        colorAttachment.loadOp = WGPULoadOp_Load;
+        colorAttachment.storeOp = WGPUStoreOp_Store;
+        if (FindClear(_depthStencil, clear))
+        {
+            colorAttachment.loadOp = WGPULoadOp_Clear;
+            colorAttachment.clearValue = { clear.RGBA[0], clear.RGBA[1], clear.RGBA[2], clear.RGBA[3] };
+        }
+        _pipelineKey.MultiSampleCount = (int32)renderTarget->GetMSAA();
+        _pipelineKey.RenderTargetFormats[i] = renderTarget->Format;
+    }
+    if (_depthStencil)
+    {
+        renderPassDesc.depthStencilAttachment = &depthStencilAttachment;
+        depthStencilAttachment.view = _depthStencil->View;
+        depthStencilAttachment.depthLoadOp = WGPULoadOp_Load;
+        depthStencilAttachment.depthStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store;
+        depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly;
+        if (_depthStencil->HasStencil)
+        {
+            depthStencilAttachment.stencilLoadOp = WGPULoadOp_Load;
+            depthStencilAttachment.stencilStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store;
+            depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly;
+        }
+        else
+        {
+            depthStencilAttachment.stencilClearValue = 0;
+            depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear;
+            depthStencilAttachment.stencilStoreOp = WGPUStoreOp_Discard;
+            depthStencilAttachment.stencilReadOnly = true;
+        }
+        if (FindClear(_depthStencil, clear))
+        {
+            depthStencilAttachment.depthLoadOp = WGPULoadOp_Clear;
+            depthStencilAttachment.depthClearValue = clear.Depth;
+            if (_depthStencil->HasStencil)
+            {
+                depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear;
+                depthStencilAttachment.stencilClearValue = clear.Stencil;
+            }
+        }
+        _pipelineKey.DepthStencilFormat = _depthStencil->Format;
+    }
+    else
+    {
+        _pipelineKey.DepthStencilFormat = WGPUTextureFormat_Undefined;
+    }
+    _renderPass = wgpuCommandEncoderBeginRenderPass(Encoder, &renderPassDesc);
+    ASSERT(_renderPass);
+
+    // Discard texture clears (done manually or via render pass)
+    _pendingClears.Clear();
+
+    // Apply pending state
+    if (_stencilRef != 0)
+        wgpuRenderPassEncoderSetStencilReference(_renderPass, _stencilRef);
+    auto scissorRect = _scissorRect;
+    // TODO: skip calling this if scissorRect is default (0, 0, attachment width, attachment  height)
+    wgpuRenderPassEncoderSetScissorRect(_renderPass, (uint32_t)scissorRect.GetX(), (uint32_t)scissorRect.GetY(), (uint32_t)scissorRect.GetWidth(), (uint32_t)scissorRect.GetHeight());
+    auto viewport = _viewport;
+    // TODO: skip calling this if viewport is default (0, 0, attachment width, attachment  height, 0, 1)
+    wgpuRenderPassEncoderSetViewport(_renderPass, viewport.X, viewport.Y, viewport.Width, viewport.Height, viewport.MinDepth, viewport.MaxDepth);
+
+    // Auto-dirty pipeline when new render pass starts
+    if (_pipelineState)
+        _pipelineDirty = true;
+    _indexBufferDirty = true;
+    _vertexBufferDirty = true;
+    _bindGroupDirty = true;
+    if (_blendFactorSet)
+        _blendFactorDirty = true;
+}
+
+void GPUContextWebGPU::FlushBindGroup()
+{
+    _bindGroupDirty = false;
+
+    // Each shader stage (Vertex, Pixel) uses a separate bind group
+    WGPUBindGroupDescriptor bindGroupDesc = WGPU_BIND_GROUP_DESCRIPTOR_INIT;
+    for (int32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++)
+    {
+        auto descriptors = _pipelineState->BindGroupDescriptors[groupIndex];
+        bindGroupDesc.layout = _pipelineState->BindGroupLayouts[groupIndex];
+        if (!descriptors || !bindGroupDesc.layout)
+            continue;
+
+        // Build descriptors for the bind group
+        auto entriesCount = descriptors->DescriptorTypesCount;
+        _dynamicOffsets.Clear();
+        _bindGroupEntries.Resize(entriesCount);
+        auto entriesPtr = _bindGroupEntries.Get();
+        Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry));
+        for (int32 index = 0; index < entriesCount; index++)
+        {
+            auto& descriptor = descriptors->DescriptorTypes[index];
+            auto& entry = entriesPtr[index];
+            entry.binding = descriptor.Binding;
+            entry.size = WGPU_WHOLE_SIZE;
+            switch (descriptor.DescriptorType)
+            {
+            case VK_DESCRIPTOR_TYPE_SAMPLER:
+            {
+                GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot];
+                if (!sampler)
+                    sampler = _device->DefaultSamplers[0]; // Fallback
+                entry.sampler = sampler->Sampler;
+                break;
+            }
+            case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+            {
+                ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV);
+                auto view = _shaderResources[descriptor.Slot];
+                auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
+                if (ptr && ptr->TextureView)
+                    entry.textureView = ptr->TextureView->View;
+                if (!entry.textureView)
+                {
+                    // Fallback
+                    view = _device->DefaultTexture->View(0);
+                    ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr();
+                    entry.textureView = ptr->TextureView->View;
+                }
+                break;
+            }
+            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+            case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+            case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+            {
+                ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]);
+                GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot];
+                auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
+                if (ptr && ptr->BufferView)
+                    entry.buffer = ptr->BufferView->Buffer;
+                if (!entry.buffer)
+                {
+                    // Fallback
+                    LOG(Error, "Missing resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType);
+                    CRASH; // TODO: add default buffer as fallback (_device->DefaultBuffer)
+                }
+                break;
+            }
+            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+            {
+                GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot];
+                if (uniform && uniform->Allocation.Buffer)
+                {
+                    entry.buffer = uniform->Allocation.Buffer;
+                    entry.size = uniform->GetSize();
+                    _dynamicOffsets.Add(uniform->Allocation.Offset);
+                }
+                else
+                    CRASH; // TODO: add dummy buffer as fallback
+                break;
+            }
+            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+            {
+                GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot];
+                if (uniform && uniform->Allocation.Buffer)
+                {
+                    entry.buffer = uniform->Allocation.Buffer;
+                    entry.offset = uniform->Allocation.Offset;
+                    entry.size = uniform->GetSize();
+                }
+                else
+                    CRASH; // TODO: add dummy buffer as fallback
+                break;
+            }
+            default:
+#if GPU_ENABLE_DIAGNOSTICS
+                LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType);
+#else
+                CRASH;
+#endif
+                return;
+            }
+        }
+
+        // Create a bind group
+        bindGroupDesc.entryCount = _bindGroupEntries.Count();
+        bindGroupDesc.entries = entriesPtr;
+        WGPUBindGroup bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &bindGroupDesc);
+        _unusedBindGroups.Add(bindGroup);
+        // TODO: cache and release them
+
+        // Bind group
+        wgpuRenderPassEncoderSetBindGroup(_renderPass, groupIndex, bindGroup, _dynamicOffsets.Count(), _dynamicOffsets.Get());
+    }
+}
+
 #endif
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h
@@ -44,6 +44,9 @@ private:
    GPUDeviceWebGPU* _device;
    WGPUVertexBufferLayout _vertexBufferNullLayout;
    uint32 _minUniformBufferOffsetAlignment;
+    Array<WGPUBindGroupEntry> _bindGroupEntries;
+    Array<uint32> _dynamicOffsets;
+    Array<WGPUBindGroup> _unusedBindGroups;

    // State tracking
    int32 _renderPassDirty : 1;
@@ -86,6 +89,9 @@ private:
    void ManualClear(const PendingClear& clear);
    void OnDrawCall();
    void OnDispatch(GPUShaderProgramCS* shader);
+    void EndRenderPass();
+    void FlushRenderPass();
+    void FlushBindGroup();

 public:
    // [GPUContext]
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp
@@ -45,13 +45,13 @@ GPUVertexLayoutWebGPU::GPUVertexLayoutWebGPU(GPUDeviceWebGPU* device, const Elem
    }
 }

-GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, uint32 alignment, WGPUBufferUsage usage)
+GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, WGPUBufferUsage usage, uint32 alignment)
 {
    // Find a free buffer from the current frame
    for (auto& e : _entries)
    {
        uint32 alignedOffset = Math::AlignUp(e.ActiveOffset, alignment);
-        if (e.ActiveFrame == _frame && (usage ? (e.Usage & usage) == usage : e.Usage == WGPUBufferUsage_CopyDst) && alignedOffset + size <= e.Size)
+        if (e.ActiveFrame == _frame && e.Usage == usage && alignedOffset + size <= e.Size)
        {
            e.ActiveOffset = alignedOffset + size;
            return { e.Buffer, alignedOffset };
@@ -61,7 +61,7 @@ GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, u
    // Find an unused buffer from the old frames
    for (auto& e : _entries)
    {
-        if (e.ActiveFrame < _frame - 3 && (e.Usage & usage) == usage && size <= e.Size)
+        if (e.ActiveFrame < _frame - 3 && e.Usage == usage && size <= e.Size)
        {
            e.ActiveOffset = size;
            e.ActiveFrame = _frame;
@@ -79,7 +79,7 @@ GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, u
            desc.label = WEBGPU_STR("Upload Buffer");
 #endif
        desc.size = Math::Max<uint32>(16 * 1024, Math::RoundUpToPowerOf2(size)); // Allocate larger pages for good suballocations
-        desc.usage = WGPUBufferUsage_CopyDst | usage;
+        desc.usage = usage;
        WGPUBuffer buffer = wgpuDeviceCreateBuffer(_device, &desc);
        if (buffer == nullptr)
        {
@@ -490,6 +490,14 @@ void GPUDeviceWebGPU::DrawBegin()
    GPUDevice::DrawBegin();

    DataUploader.DrawBegin();
+
+    // Create default texture
+    if (!DefaultTexture)
+    {
+        DefaultTexture = New<GPUTextureWebGPU>(this, TEXT("DefaultTexture"));
+        DefaultTexture->Init(GPUTextureDescription::New2D(1, 1, PixelFormat::R8G8B8A8_UNorm, GPUTextureFlags::ShaderResource));
+        DefaultTexture->SetResidentMipLevels(1);
+    }
 }

 GPUDeviceWebGPU::~GPUDeviceWebGPU()
@@ -565,6 +573,7 @@ void GPUDeviceWebGPU::Dispose()

    // Clear device resources
    DataUploader.ReleaseGPU();
+    SAFE_DELETE_GPU_RESOURCE(DefaultTexture);
    SAFE_DELETE_GPU_RESOURCES(DefaultSamplers);
    SAFE_DELETE(_mainContext);
    SAFE_DELETE(Adapter);
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h
@@ -10,8 +10,26 @@

 class GPUContextWebGPU;
 class GPUAdapterWebGPU;
+class GPUTextureWebGPU;
 class GPUSamplerWebGPU;

+namespace GPUBindGroupsWebGPU
+{
+    enum Stage
+    {
+        // Vertex shader stage
+        Vertex = 0,
+        // Pixel shader stage
+        Pixel = 1,
+        // Graphics pipeline stages count
+        GraphicsMax,
+        // Compute pipeline slot
+        Compute = 0,
+        // The maximum amount of slots for all stages
+        Max = GraphicsMax,
+    };
+};
+
 /// <summary>
 /// Pool for uploading data to GPU buffers. It manages large buffers and suballocates for multiple small updates, minimizing the number of buffer creations and copies.
 /// </summary>
@@ -39,7 +57,7 @@ public:
        uint32 Offset = 0;
    };

-    Allocation Allocate(uint32 size, uint32 alignment = 16, WGPUBufferUsage usage = 0);
+    Allocation Allocate(uint32 size, WGPUBufferUsage usage, uint32 alignment = 16);
    void DrawBegin();
    void ReleaseGPU();
 };
@@ -62,6 +80,7 @@ public:
    WGPUDevice Device = nullptr;
    WGPUQueue Queue = nullptr;
    GPUSamplerWebGPU* DefaultSamplers[6] = {};
+    GPUTextureWebGPU* DefaultTexture = nullptr;
    GPUDataUploaderWebGPU DataUploader;
    uint32 MinUniformBufferOffsetAlignment = 1;

--- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp
@@ -5,7 +5,6 @@
 #include "GPUPipelineStateWebGPU.h"
 #include "GPUVertexLayoutWebGPU.h"
 #include "Engine/Core/Log.h"
-#include "Engine/Core/Math/Color32.h"
 #include "Engine/Profiler/ProfilerCPU.h"
 #include "Engine/Profiler/ProfilerMemory.h"

@@ -138,6 +137,20 @@ void GPUPipelineStateWebGPU::OnReleaseGPU()
    for (auto& e : _pipelines)
        wgpuRenderPipelineRelease(e.Value);
    _pipelines.Clear();
+    for (auto& e : BindGroupLayouts)
+    {
+        if (e)
+        {
+            wgpuBindGroupLayoutRelease(e);
+            e = nullptr;
+        }
+    }
+    if (PipelineDesc.layout)
+    {
+        wgpuPipelineLayoutRelease(PipelineDesc.layout);
+        PipelineDesc.layout = nullptr;
+    }
+    Platform::MemoryClear(&BindGroupDescriptors, sizeof(BindGroupDescriptors));
 }

 uint32 GetHash(const GPUPipelineStateWebGPU::Key& key)
@@ -260,7 +273,8 @@ bool GPUPipelineStateWebGPU::Init(const Description& desc)
            writeMask |= WGPUColorWriteMask_Blue;
        if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Alpha))
            writeMask |= WGPUColorWriteMask_Alpha;
-    }    for (auto& e : _colorTargets)
+    }
+    for (auto& e : _colorTargets)
    {
        e = WGPU_COLOR_TARGET_STATE_INIT;
        if (desc.BlendMode.BlendEnable)
@@ -270,14 +284,116 @@ bool GPUPipelineStateWebGPU::Init(const Description& desc)

    // Cache shaders
    VS = (GPUShaderProgramVSWebGPU*)desc.VS;
+    BindGroupDescriptors[GPUBindGroupsWebGPU::Vertex] = &VS->DescriptorInfo;
    PipelineDesc.vertex.module = VS->ShaderModule;
    PS = (GPUShaderProgramPSWebGPU*)desc.PS;
    if (PS)
    {
+        BindGroupDescriptors[GPUBindGroupsWebGPU::Pixel] = &PS->DescriptorInfo;
        _fragmentDesc.module = PS->ShaderModule;
    }

-    // TODO: set resources binding into PipelineDesc.layout
+    // Count the biggest bind group entries (for all shaders) to allocate reused memory
+    int32 maxEntriesCount = 0;
+    for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
+    {
+        auto descriptors = BindGroupDescriptors[groupIndex];
+        if (descriptors && maxEntriesCount < descriptors->DescriptorTypesCount)
+            maxEntriesCount = (int32)descriptors->DescriptorTypesCount;
+    }
+    Array<WGPUBindGroupLayoutEntry, InlinedAllocation<8>> entries;
+    entries.Resize(maxEntriesCount);
+
+    // Setup bind groups
+    WGPUBindGroupLayoutEntry* entriesPtr = entries.Get();
+    for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
+    {
+        auto descriptors = BindGroupDescriptors[groupIndex];
+        if (!descriptors || descriptors->DescriptorTypesCount == 0)
+            continue;
+
+        int32 entriesCount = descriptors->DescriptorTypesCount;
+        Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount);
+        auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment;
+        for (int32 index = 0; index < entriesCount; index++)
+        {
+            auto& descriptor = descriptors->DescriptorTypes[index];
+            auto& entry = entriesPtr[index];
+            entry.binding = descriptor.Binding;
+            entry.bindingArraySize = descriptor.Count;
+            entry.visibility = visibility;
+            switch (descriptor.DescriptorType)
+            {
+            case VK_DESCRIPTOR_TYPE_SAMPLER:
+                entry.sampler.type = WGPUSamplerBindingType_Undefined;
+                break;
+            case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+                entry.texture.sampleType = WGPUTextureSampleType_Undefined;
+                switch (descriptor.ResourceType)
+                {
+                case SpirvShaderResourceType::Texture1D:
+                    entry.texture.viewDimension = WGPUTextureViewDimension_1D;
+                    break;
+                case SpirvShaderResourceType::Texture2D:
+                    entry.texture.viewDimension = WGPUTextureViewDimension_2D;
+                    break;
+                case SpirvShaderResourceType::Texture3D:
+                    entry.texture.viewDimension = WGPUTextureViewDimension_3D;
+                    break;
+                case SpirvShaderResourceType::TextureCube:
+                    entry.texture.viewDimension = WGPUTextureViewDimension_Cube;
+                    break;
+                case SpirvShaderResourceType::Texture1DArray:
+                    CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write)
+                    break;
+                case SpirvShaderResourceType::Texture2DArray:
+                    entry.texture.viewDimension = WGPUTextureViewDimension_2DArray;
+                    break;
+                }
+                break;
+            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+                entry.buffer.hasDynamicOffset = true;
+            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+                if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV)
+                    entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
+                else
+                    entry.buffer.type = WGPUBufferBindingType_Storage;
+                break;
+            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+                entry.buffer.hasDynamicOffset = true;
+            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+                entry.buffer.type = WGPUBufferBindingType_Uniform;
+                break;
+            default:
+#if GPU_ENABLE_DIAGNOSTICS
+                LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(_debugName.Get(), _debugName.Count() - 1));
+#else
+                CRASH;
+#endif
+                return true;
+            }
+        }
+
+        // Create a bind group layout
+        WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT;
+        bindGroupLayoutDesc.entryCount = entriesCount;
+        bindGroupLayoutDesc.entries = entriesPtr;
+        BindGroupLayouts[groupIndex] = wgpuDeviceCreateBindGroupLayout(_device->Device, &bindGroupLayoutDesc);
+    }
+
+    // Create the pipeline layout
+    WGPUPipelineLayoutDescriptor layoutDesc = WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT;
+#if GPU_ENABLE_RESOURCE_NAMING
+    layoutDesc.label = PipelineDesc.label;
+#endif
+    layoutDesc.bindGroupLayoutCount = GPUBindGroupsWebGPU::GraphicsMax;
+    layoutDesc.bindGroupLayouts = BindGroupLayouts;
+    PipelineDesc.layout = wgpuDeviceCreatePipelineLayout(_device->Device, &layoutDesc);
+    if (!PipelineDesc.layout)
+    {
+        LOG(Error, "wgpuDeviceCreatePipelineLayout failed");
+        return true;
+    }

    _memoryUsage = 1;
    return GPUPipelineState::Init(desc);
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h
@@ -53,6 +53,8 @@ public:
    GPUShaderProgramVSWebGPU* VS = nullptr;
    GPUShaderProgramPSWebGPU* PS = nullptr;
    WGPURenderPipelineDescriptor PipelineDesc;
+    WGPUBindGroupLayout BindGroupLayouts[GPUBindGroupsWebGPU::GraphicsMax] = {};
+    SpirvShaderDescriptorInfo* BindGroupDescriptors[GPUBindGroupsWebGPU::GraphicsMax] = {};

 public:
    GPUPipelineStateWebGPU(GPUDeviceWebGPU* device)
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp
@@ -24,7 +24,7 @@ WGPUCompareFunction ToCompareFunction(GPUSamplerCompareFunction value)
    switch (value)
    {
    case GPUSamplerCompareFunction::Never:
-        return WGPUCompareFunction_Never;
+        return WGPUCompareFunction_Undefined; // Disabled comparision
    case GPUSamplerCompareFunction::Less:
        return WGPUCompareFunction_Less;
    default: