Add Compute shaders support to WebGPU

2026-03-04 21:27:42 +01:00
parent 5fb9cf3be1
commit 377d5c00aa
20 changed files with 657 additions and 449 deletions
--- a/Content/Editor/MaterialTemplates/GPUParticles.shader
+++ b/Content/Editor/MaterialTemplates/GPUParticles.shader
@@ -67,7 +67,7 @@ float Rand(inout uint seed)
 float3 ReprojectPosition(float2 uv, float rawDepth)
 {
 	uv = uv * float2(2.0, -2.0) + float2(-1.0, 1.0);
-	float4 pos = mul(float4(uv.x, uv.y, rawDepth, 1.0f), InvViewProjectionMatrix);
+	float4 pos = PROJECT_POINT(float4(uv.x, uv.y, rawDepth, 1.0f), InvViewProjectionMatrix);
 	return pos.xyz / pos.w;
 }

@@ -158,7 +158,7 @@ void SpawnParticle(Context context)
@4}

 // Main entry point for the particles simulation and spawning
-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 [numthreads(THREAD_GROUP_SIZE, 1, 1)]
 void CS_Main(uint3 dispatchThreadId : SV_DispatchThreadID)
 {
--- a/Content/Shaders/BitonicSort.flax
+++ b/Content/Shaders/BitonicSort.flax
--- a/Content/Shaders/GPUParticlesSorting.flax
+++ b/Content/Shaders/GPUParticlesSorting.flax
--- a/Content/Shaders/Histogram.flax
+++ b/Content/Shaders/Histogram.flax
--- a/Source/Engine/Graphics/RenderTools.cpp
+++ b/Source/Engine/Graphics/RenderTools.cpp
@@ -274,7 +274,7 @@ FeatureLevel RenderTools::GetFeatureLevel(ShaderProfile profile)
    case ShaderProfile::GLSL_410:
    case ShaderProfile::Unknown:
    case ShaderProfile::WebGPU:
-        return FeatureLevel::ES2;
+        return FeatureLevel::ES3_1;
    default:
        return FeatureLevel::ES2;
    }
@@ -293,6 +293,8 @@ ShaderProfileFeatures RenderTools::GetShaderProfileFeatures(ShaderProfile profil
        return ShaderProfileFeatures::ComputeShaders | ShaderProfileFeatures::GeometryShaders;
    case ShaderProfile::DirectX_SM4:
        return ShaderProfileFeatures::GeometryShaders;
+    case ShaderProfile::WebGPU:
+        return ShaderProfileFeatures::ComputeShaders;
    default:
        return ShaderProfileFeatures::None;
    }
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp
@@ -10,6 +10,7 @@
 #include "Engine/Core/Log.h"
 #include "Engine/Core/Types/Pair.h"
 #include "Engine/Profiler/ProfilerCPU.h"
+#include "Engine/Profiler/ProfilerMemory.h"
 #include "Engine/Graphics/PixelFormatExtensions.h"

 static VkStencilOp ToVulkanStencilOp(const StencilOperation value)
@@ -91,6 +92,7 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState()
    if (_pipelineState)
        return _pipelineState;
    PROFILE_CPU();
+    PROFILE_MEM(GraphicsShaders);
    ZoneText(*_name, _name.Length());

    // Create pipeline layout
@@ -224,6 +226,7 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer
        return pipeline;
    }
    PROFILE_CPU();
+    PROFILE_MEM(GraphicsShaders);
 #if !BUILD_RELEASE
    DebugName name;
    GetDebugName(name);
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp
@@ -72,6 +72,8 @@ bool GPUBufferWebGPU::OnInit()
    {
    case GPUResourceUsage::Default:
        bufferDesc.usage |= WGPUBufferUsage_CopyDst;
+        if (IsUnorderedAccess())
+            bufferDesc.usage |= WGPUBufferUsage_CopySrc; // eg. GPU particles copy particle counter between buffers
        break;
    case GPUResourceUsage::Dynamic:
        if (bufferDesc.usage == 0) // WebGPU doesn't allow to map-write Index/Vertex/Storage buffers
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp
@@ -368,8 +368,9 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data)

 void GPUContextWebGPU::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ)
 {
-    OnDispatch(shader);
-    MISSING_CODE("GPUContextWebGPU::Dispatch");
+    auto computePass = OnDispatch(shader);
+    wgpuComputePassEncoderDispatchWorkgroups(computePass, threadGroupCountX, threadGroupCountY, threadGroupCountZ);
+    EndComputePass(computePass);
    RENDER_STAT_DISPATCH_CALL();
 }

@@ -377,8 +378,9 @@ void GPUContextWebGPU::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b
 {
    ASSERT(bufferForArgs && EnumHasAnyFlags(bufferForArgs->GetFlags(), GPUBufferFlags::Argument));
    auto bufferForArgsWebGPU = (GPUBufferWebGPU*)bufferForArgs;
-    OnDispatch(shader);
-    MISSING_CODE("GPUContextWebGPU::Dispatch");
+    auto computePass = OnDispatch(shader);
+    wgpuComputePassEncoderDispatchWorkgroupsIndirect(computePass, bufferForArgsWebGPU->Buffer, offsetForArgs);
+    EndComputePass(computePass);
    RENDER_STAT_DISPATCH_CALL();
 }

@@ -865,7 +867,7 @@ void GPUContextWebGPU::OnDrawCall()
    if (_pipelineDirty)
    {
        _pipelineDirty = false;
-        WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey, _shaderResources) : nullptr;
+        WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey, { _shaderResources }) : nullptr;
        wgpuRenderPassEncoderSetPipeline(_renderPass, pipeline);
        RENDER_STAT_PS_STATE_CHANGE();

@@ -898,9 +900,38 @@ void GPUContextWebGPU::OnDrawCall()
    }
 }

-void GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader)
+WGPUComputePassEncoder GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader)
 {
-    // TODO: add compute shaders support
+    // End existing render pass (if any)
+    if (_renderPass)
+        EndRenderPass();
+
+    // Flush pending clears
+    FlushState();
+
+    // Start a new compute pass
+    WGPUComputePassDescriptor computePassDesc = WGPU_COMPUTE_PASS_DESCRIPTOR_INIT;
+    FlushTimestamps(1);
+    if (_pendingTimestampWrites.HasItems())
+        computePassDesc.timestampWrites = &_pendingTimestampWrites.Last();
+    _pendingTimestampWrites.Clear();
+    auto computePass = wgpuCommandEncoderBeginComputePass(Encoder, &computePassDesc);
+    ASSERT(computePass);
+
+    // Set pipeline
+    GPUPipelineStateWebGPU::BindGroupKey key;
+    auto shaderWebGPU = (GPUShaderProgramCSWebGPU*)shader;
+    WGPUComputePipeline pipeline = shaderWebGPU->GetPipeline(_device->Device, { _shaderResources }, key.Layout);
+    wgpuComputePassEncoderSetPipeline(computePass, pipeline);
+
+    // Set bind group
+    uint32 dynamicOffsets[DynamicOffsetsMax];
+    uint32 dynamicOffsetsCount = 0;
+    BuildBindGroup(0, shaderWebGPU->DescriptorInfo, key, dynamicOffsets, dynamicOffsetsCount);
+    WGPUBindGroup bindGroup = shaderWebGPU->GetBindGroup(_device->Device, key);
+    wgpuComputePassEncoderSetBindGroup(computePass, 0, bindGroup, dynamicOffsetsCount, dynamicOffsets);
+
+    return computePass;
 }

 void GPUContextWebGPU::EndRenderPass()
@@ -910,6 +941,13 @@ void GPUContextWebGPU::EndRenderPass()
    _renderPass = nullptr;
 }

+void GPUContextWebGPU::EndComputePass(WGPUComputePassEncoder computePass)
+{
+    wgpuComputePassEncoderEnd(computePass);
+    wgpuComputePassEncoderRelease(computePass);
+    computePass = nullptr;
+}
+
 void GPUContextWebGPU::FlushRenderPass()
 {
    _renderPassDirty = false;
@@ -1033,138 +1071,17 @@ void GPUContextWebGPU::FlushBindGroup()

    // Each shader stage (Vertex, Pixel) uses a separate bind group
    GPUPipelineStateWebGPU::BindGroupKey key;
-    for (int32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++)
+    uint32 dynamicOffsets[DynamicOffsetsMax];
+    for (uint32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++)
    {
        auto descriptors = _pipelineState->BindGroupDescriptors[groupIndex];
        key.Layout = _pipelineState->BindGroupLayouts[groupIndex];
        if (!descriptors || !key.Layout)
            continue;

-        // Build descriptors for the bind group
-        auto entriesCount = descriptors->DescriptorTypesCount;
-        uint32 dynamicOffsets[4];
+        // Build descriptors
        uint32 dynamicOffsetsCount = 0;
-        static_assert(ARRAY_COUNT(key.Entries) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group entries array.");
-        static_assert(ARRAY_COUNT(key.Versions) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group versions array.");
-        key.EntriesCount = entriesCount;
-        auto entriesPtr = key.Entries;
-        auto versionsPtr = key.Versions;
-        Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry));
-        Platform::MemoryClear(versionsPtr, ((entriesCount + 3) & ~0x3) * sizeof(uint8));
-        for (int32 index = 0; index < entriesCount; index++)
-        {
-            auto& descriptor = descriptors->DescriptorTypes[index];
-            auto& entry = entriesPtr[index];
-            entry.binding = descriptor.Binding;
-            entry.size = WGPU_WHOLE_SIZE;
-            switch (descriptor.DescriptorType)
-            {
-            case VK_DESCRIPTOR_TYPE_SAMPLER:
-            {
-                GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot];
-                if (!sampler)
-                    sampler = _device->DefaultSamplers[0]; // Fallback
-                entry.sampler = sampler->Sampler;
-                break;
-            }
-            case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-            case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-            {
-                ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV);
-                auto view = _shaderResources[descriptor.Slot];
-                auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
-                if (ptr && ptr->TextureView)
-                {
-                    entry.textureView = ptr->TextureView->View;
-                    versionsPtr[index] = ptr->Version;
-                }
-                if (!entry.textureView)
-                {
-                    // Fallback
-                    auto defaultTexture = _device->DefaultTexture[(int32)descriptor.ResourceType];
-                    if (!defaultTexture)
-                    {
-                        LOG(Error, "Missing default resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType);
-                        CRASH;
-                    }
-                    switch (descriptor.ResourceType)
-                    {
-                    case SpirvShaderResourceType::Texture3D:
-                        view = defaultTexture->ViewVolume();
-                        break;
-                    case SpirvShaderResourceType::Texture1DArray:
-                    case SpirvShaderResourceType::Texture2DArray:
-                        view = defaultTexture->ViewArray();
-                        break;
-                    default:
-                        view = defaultTexture->View(0);
-                        break;
-                    }
-                    ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr();
-                    entry.textureView = ptr->TextureView->View;
-                }
-                break;
-            }
-            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-            case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-            case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-            {
-                ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]);
-                GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot];
-                auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
-                if (ptr && ptr->BufferView)
-                {
-                    entry.buffer = ptr->BufferView->Buffer;
-                    entry.size = ((GPUBufferWebGPU*)view->GetParent())->GetSize();
-                    versionsPtr[index] = (uint64)ptr->Version;
-                }
-                if (!entry.buffer)
-                    entry.buffer = _device->DefaultBuffer; // Fallback
-                break;
-            }
-            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-            {
-                GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot];
-                if (uniform && uniform->Allocation.Buffer)
-                {
-                    entry.buffer = uniform->Allocation.Buffer;
-                    entry.size = uniform->AllocationSize;
-                    if (descriptor.DescriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
-                        entry.offset = uniform->Allocation.Offset;
-                    else
-                        dynamicOffsets[dynamicOffsetsCount++] = uniform->Allocation.Offset;
-                }
-                else
-                    LOG(Fatal, "Missing constant buffer at slot {}", descriptor.Slot);
-                break;
-            }
-            default:
-#if GPU_ENABLE_DIAGNOSTICS
-                LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType);
-#else
-                CRASH;
-#endif
-                return;
-            }
-        }
-
-#if BUILD_DEBUG
-        // Validate
-        for (int32 i = 0; i < entriesCount; i++)
-        {
-            auto& e = entriesPtr[i];
-            if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1)
-            {
-                LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName());
-                LOG(Error, " > sampler: {}", (uint32)e.sampler);
-                LOG(Error, " > textureView: {}", (uint32)e.textureView);
-                LOG(Error, " > buffer: {}", (uint32)e.buffer);
-            }
-        }
-        ASSERT(dynamicOffsetsCount <= ARRAY_COUNT(dynamicOffsets));
-#endif
+        BuildBindGroup(groupIndex, *descriptors, key, dynamicOffsets, dynamicOffsetsCount);

        // Bind group
        WGPUBindGroup bindGroup = _pipelineState->GetBindGroup(key);
@@ -1197,4 +1114,131 @@ void GPUContextWebGPU::FlushTimestamps(int32 skipLast)
    }
 }

+void GPUContextWebGPU::BuildBindGroup(uint32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, GPUPipelineStateWebGPU::BindGroupKey& key, uint32 dynamicOffsets[DynamicOffsetsMax], uint32& dynamicOffsetsCount)
+{
+    // Build descriptors for the bind group
+    auto entriesCount = descriptors.DescriptorTypesCount;
+    static_assert(ARRAY_COUNT(key.Entries) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group entries array.");
+    static_assert(ARRAY_COUNT(key.Versions) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group versions array.");
+    key.EntriesCount = entriesCount;
+    auto entriesPtr = key.Entries;
+    auto versionsPtr = key.Versions;
+    Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry));
+    Platform::MemoryClear(versionsPtr, ((entriesCount + 3) & ~0x3) * sizeof(uint8));
+    for (int32 index = 0; index < entriesCount; index++)
+    {
+        auto& descriptor = descriptors.DescriptorTypes[index];
+        auto& entry = entriesPtr[index];
+        entry.binding = descriptor.Binding;
+        entry.size = WGPU_WHOLE_SIZE;
+        switch (descriptor.DescriptorType)
+        {
+        case VK_DESCRIPTOR_TYPE_SAMPLER:
+        {
+            GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot];
+            if (!sampler)
+                sampler = _device->DefaultSamplers[0]; // Fallback
+            entry.sampler = sampler->Sampler;
+            break;
+        }
+        case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+        case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+        {
+            ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV);
+            auto view = _shaderResources[descriptor.Slot];
+            auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
+            if (ptr && ptr->TextureView)
+            {
+                entry.textureView = ptr->TextureView->View;
+                versionsPtr[index] = ptr->Version;
+            }
+            if (!entry.textureView)
+            {
+                // Fallback
+                auto defaultTexture = _device->DefaultTexture[(int32)descriptor.ResourceType];
+                if (!defaultTexture)
+                {
+                    LOG(Error, "Missing default resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType);
+                    CRASH;
+                }
+                switch (descriptor.ResourceType)
+                {
+                case SpirvShaderResourceType::Texture3D:
+                    view = defaultTexture->ViewVolume();
+                    break;
+                case SpirvShaderResourceType::Texture1DArray:
+                case SpirvShaderResourceType::Texture2DArray:
+                    view = defaultTexture->ViewArray();
+                    break;
+                default:
+                    view = defaultTexture->View(0);
+                    break;
+                }
+                ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr();
+                entry.textureView = ptr->TextureView->View;
+            }
+            break;
+        }
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+        {
+            ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]);
+            GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot];
+            auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
+            if (ptr && ptr->BufferView)
+            {
+                entry.buffer = ptr->BufferView->Buffer;
+                entry.size = ((GPUBufferWebGPU*)view->GetParent())->GetSize();
+                versionsPtr[index] = (uint64)ptr->Version;
+            }
+            if (!entry.buffer)
+                entry.buffer = _device->DefaultBuffer; // Fallback
+            break;
+        }
+        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+        {
+            GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot];
+            if (uniform && uniform->Allocation.Buffer)
+            {
+                entry.buffer = uniform->Allocation.Buffer;
+                entry.size = uniform->AllocationSize;
+                if (descriptor.DescriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
+                    entry.offset = uniform->Allocation.Offset;
+                else
+                    dynamicOffsets[dynamicOffsetsCount++] = uniform->Allocation.Offset;
+            }
+            else
+                LOG(Fatal, "Missing constant buffer at slot {}", descriptor.Slot);
+            break;
+        }
+        default:
+#if GPU_ENABLE_DIAGNOSTICS
+            LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType);
+#else
+            CRASH;
+#endif
+            return;
+        }
+    }
+
+#if BUILD_DEBUG
+    // Validate
+    for (int32 i = 0; i < entriesCount; i++)
+    {
+        auto& e = entriesPtr[i];
+        if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1)
+        {
+            LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName());
+            LOG(Error, " > sampler: {}", (uint32)e.sampler);
+            LOG(Error, " > textureView: {}", (uint32)e.textureView);
+            LOG(Error, " > buffer: {}", (uint32)e.buffer);
+        }
+    }
+    ASSERT(dynamicOffsetsCount <= DynamicOffsetsMax);
+#endif
+}
+
 #endif
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h
@@ -93,11 +93,14 @@ private:
    bool FindClear(const GPUTextureViewWebGPU* view, PendingClear& clear);
    void ManualClear(const PendingClear& clear);
    void OnDrawCall();
-    void OnDispatch(GPUShaderProgramCS* shader);
+    WGPUComputePassEncoder OnDispatch(GPUShaderProgramCS* shader);
    void EndRenderPass();
+    void EndComputePass(WGPUComputePassEncoder computePass);
    void FlushRenderPass();
    void FlushBindGroup();
    void FlushTimestamps(int32 skipLast = 0);
+    constexpr static int32 DynamicOffsetsMax = 4;
+    void BuildBindGroup(uint32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, GPUPipelineStateWebGPU::BindGroupKey& key, uint32 dynamicOffsets[DynamicOffsetsMax], uint32& dynamicOffsetsCount);

 public:
    // [GPUContext]
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp
@@ -294,6 +294,14 @@ bool GPUDeviceWebGPU::Init()
    {
        MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment;
        TimestampQuery = features.Contains(WGPUFeatureName_TimestampQuery);
+        Limits.HasCompute =
+            limits.maxStorageBuffersPerShaderStage >= GPU_MAX_UA_BINDED &&
+            limits.maxStorageTexturesPerShaderStage >= GPU_MAX_UA_BINDED &&
+            limits.maxComputeWorkgroupsPerDimension >= GPU_MAX_CS_DISPATCH_THREAD_GROUPS &&
+            limits.maxComputeWorkgroupSizeX >= 1024 &&
+            limits.maxComputeWorkgroupSizeY >= 256 &&
+            limits.maxComputeWorkgroupSizeZ >= 8 &&
+            limits.maxBufferSize >= 64 * 1024 * 1024; // 64MB
        Limits.HasInstancing = true;
        Limits.HasDrawIndirect = true;
        Limits.HasDepthAsSRV = true;
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp
@@ -142,6 +142,229 @@ WGPUBlendComponent ToBlendComponent(BlendingMode::Operation blendOp, BlendingMod
    return result;
 }

+typedef Array<WGPUBindGroupLayoutEntry, InlinedAllocation<16>> BindGroupEntries;
+
+WGPUBindGroupLayout CreateBindGroupLayout(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, int32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, BindGroupEntries& entries, const StringAnsiView& debugName, bool log, bool compute = false)
+{
+    int32 entriesCount = descriptors.DescriptorTypesCount;
+    if (entriesCount == 0)
+        return nullptr;
+    auto entriesPtr = entries.Get();
+    ASSERT_LOW_LAYER(entries.Count() >= entriesCount);
+    Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount);
+    auto visibility = compute ? WGPUShaderStage_Compute : (groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment);
+#if WEBGPU_LOG_PSO
+    if (log)
+        LOG(Info, " > group {} - {}", groupIndex, compute ? TEXT("Compute") : (groupIndex == 0 ? TEXT("Vertex") : TEXT("Fragment")));
+    const Char* samplerType = TEXT("?");
+#endif
+    for (int32 index = 0; index < entriesCount; index++)
+    {
+        auto& descriptor = descriptors.DescriptorTypes[index];
+        auto& entry = entriesPtr[index];
+        entry.binding = descriptor.Binding;
+        entry.bindingArraySize = descriptor.Count;
+        entry.visibility = visibility;
+        switch (descriptor.DescriptorType)
+        {
+        case VK_DESCRIPTOR_TYPE_SAMPLER:
+            entry.sampler.type = WGPUSamplerBindingType_Undefined;
+            if (descriptor.Slot == 4 || descriptor.Slot == 5) // Hack for ShadowSampler and ShadowSamplerLinear (this could get binded samplers table just like for shaderResources)
+                entry.sampler.type = WGPUSamplerBindingType_Comparison;
+#if WEBGPU_LOG_PSO
+            switch (entry.sampler.type)
+            {
+            case WGPUSamplerBindingType_BindingNotUsed:
+                samplerType = TEXT("BindingNotUsed");
+                break;
+            case WGPUSamplerBindingType_Undefined:
+                samplerType = TEXT("Undefined");
+                break;
+            case WGPUSamplerBindingType_Filtering:
+                samplerType = TEXT("Filtering");
+                break;
+            case WGPUSamplerBindingType_NonFiltering:
+                samplerType = TEXT("NonFiltering");
+                break;
+            case WGPUSamplerBindingType_Comparison:
+                samplerType = TEXT("Comparison");
+                break;
+            }
+            if (log)
+                LOG(Info, "   > [{}] sampler ({})", entry.binding, samplerType);
+#endif
+            break;
+        case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+            entry.texture.sampleType = WGPUTextureSampleType_Undefined;
+            if (bindings.ShaderResources[descriptor.Slot])
+            {
+                // Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_DEPTH)
+                auto ptr = (GPUResourceViewPtrWebGPU*)bindings.ShaderResources[descriptor.Slot]->GetNativePtr();
+                if (ptr && ptr->TextureView)
+                    entry.texture.sampleType = ptr->TextureView->SampleType;
+            }
+#if WEBGPU_LOG_PSO
+            if (log)
+            {
+                switch (entry.texture.sampleType)
+                {
+                case WGPUTextureSampleType_BindingNotUsed:
+                    samplerType = TEXT("BindingNotUsed");
+                    break;
+                case WGPUTextureSampleType_Undefined:
+                    samplerType = TEXT("Undefined");
+                    break;
+                case WGPUTextureSampleType_Float:
+                    samplerType = TEXT("Float");
+                    break;
+                case WGPUTextureSampleType_UnfilterableFloat:
+                    samplerType = TEXT("UnfilterableFloat");
+                    break;
+                case WGPUTextureSampleType_Depth:
+                    samplerType = TEXT("Depth");
+                    break;
+                case WGPUTextureSampleType_Sint:
+                    samplerType = TEXT("Sint");
+                    break;
+                case WGPUTextureSampleType_Uint:
+                    samplerType = TEXT("Uint");
+                    break;
+                }
+                switch (descriptor.ResourceType)
+                {
+                case SpirvShaderResourceType::Texture1D:
+                    LOG(Info, "   > [{}] texture 1D ({})", entry.binding, samplerType);
+                    break;
+                case SpirvShaderResourceType::Texture2D:
+                    LOG(Info, "   > [{}] texture 2D ({})", entry.binding, samplerType);
+                    break;
+                case SpirvShaderResourceType::Texture3D:
+                    LOG(Info, "   > [{}] texture 3D ({})", entry.binding, samplerType);
+                    break;
+                case SpirvShaderResourceType::TextureCube:
+                    LOG(Info, "   > [{}] texture Cube ({})", entry.binding, samplerType);
+                    break;
+                case SpirvShaderResourceType::Texture2DArray:
+                    LOG(Info, "   > [{}] texture 2D array ({})", entry.binding, samplerType);
+                    break;
+                }
+            }
+#endif
+            switch (descriptor.ResourceType)
+            {
+            case SpirvShaderResourceType::Texture1D:
+                entry.texture.viewDimension = WGPUTextureViewDimension_1D;
+                break;
+            case SpirvShaderResourceType::Texture2D:
+                entry.texture.viewDimension = WGPUTextureViewDimension_2D;
+                break;
+            case SpirvShaderResourceType::Texture3D:
+                entry.texture.viewDimension = WGPUTextureViewDimension_3D;
+                break;
+            case SpirvShaderResourceType::TextureCube:
+                entry.texture.viewDimension = WGPUTextureViewDimension_Cube;
+                break;
+            case SpirvShaderResourceType::Texture1DArray:
+                CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write)
+                break;
+            case SpirvShaderResourceType::Texture2DArray:
+                entry.texture.viewDimension = WGPUTextureViewDimension_2DArray;
+                break;
+            }
+            break;
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+            entry.buffer.hasDynamicOffset = true;
+        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+            if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV)
+                entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
+            else
+                entry.buffer.type = WGPUBufferBindingType_Storage;
+#if WEBGPU_LOG_PSO
+            if (log)
+                LOG(Info, "   > [{}] storage buffer (read-only = {}, dynamic = {})", entry.binding, entry.buffer.type == WGPUBufferBindingType_ReadOnlyStorage, entry.buffer.hasDynamicOffset);
+#endif
+            break;
+        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+            entry.buffer.hasDynamicOffset = true;
+        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+            entry.buffer.type = WGPUBufferBindingType_Uniform;
+#if WEBGPU_LOG_PSO
+            if (log)
+                LOG(Info, "   > [{}] uniform buffer (dynamic = {})", entry.binding, entry.buffer.hasDynamicOffset);
+#endif
+            break;
+        default:
+#if GPU_ENABLE_DIAGNOSTICS
+            LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(debugName));
+#else
+            CRASH;
+#endif
+            return nullptr;
+        }
+    }
+
+    // Create a bind group layout
+    WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT;
+    bindGroupLayoutDesc.entryCount = entriesCount;
+    bindGroupLayoutDesc.entries = entriesPtr;
+    return wgpuDeviceCreateBindGroupLayout(device, &bindGroupLayoutDesc);
+}
+
+WGPUComputePipeline GPUShaderProgramCSWebGPU::GetPipeline(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, WGPUBindGroupLayout& resultBindGroupLayout)
+{
+    resultBindGroupLayout = _bindGroupLayout;
+    if (_pipeline)
+        return _pipeline;
+    PROFILE_CPU();
+    ZoneText(*_name, _name.Length());
+#if WEBGPU_LOG_PSO
+#ifdef WEBGPU_LOG_PSO_NAME
+    const bool log = _name.Contains(WEBGPU_LOG_PSO_NAME);
+#else
+    const bool log = true;
+#endif
+    if (log)
+        LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_name));
+#endif
+
+    // Create layout bind group
+    BindGroupEntries entries;
+    entries.Resize(DescriptorInfo.DescriptorTypesCount);
+    _bindGroupLayout = CreateBindGroupLayout(device, bindings, 0, DescriptorInfo, entries, _name, log, true);
+    resultBindGroupLayout = _bindGroupLayout;
+
+    // Create the pipeline layout
+    WGPUPipelineLayoutDescriptor layoutDesc = WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT;
+#if GPU_ENABLE_RESOURCE_NAMING
+    layoutDesc.label = { _name.Get(), (size_t)_name.Length() };
+#endif
+    layoutDesc.bindGroupLayoutCount = 1;
+    layoutDesc.bindGroupLayouts = &_bindGroupLayout;
+    auto layout = wgpuDeviceCreatePipelineLayout(device, &layoutDesc);
+    if (!layout)
+    {
+        LOG(Error, "wgpuDeviceCreatePipelineLayout failed");
+        return nullptr;
+    }
+
+    // Create pipeline
+    WGPUComputePipelineDescriptor desc = WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT;
+#if GPU_ENABLE_RESOURCE_NAMING
+    desc.label = layoutDesc.label;
+#endif
+    desc.layout = layout;
+    desc.compute.module = ShaderModule;
+    _pipeline = wgpuDeviceCreateComputePipeline(device , &desc);
+    if (!_pipeline)
+    {
+#if GPU_ENABLE_RESOURCE_NAMING
+        LOG(Error, "wgpuDeviceCreateComputePipeline failed for {}", String(_name));
+#endif
+    }
+
+    return _pipeline;
+}
+
 void GPUPipelineStateWebGPU::OnReleaseGPU()
 {
    VS = nullptr;
@@ -176,12 +399,12 @@ uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key)
    return hash;
 }

-uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key)
+uint32 GetHash(const GPUBindGroupKeyWebGPU& key)
 {
    return key.Hash;
 }

-bool GPUPipelineStateWebGPU::BindGroupKey::operator==(const BindGroupKey& other) const
+bool GPUBindGroupKeyWebGPU::operator==(const GPUBindGroupKeyWebGPU& other) const
 {
    return Hash == other.Hash
        && Layout == other.Layout
@@ -190,28 +413,132 @@ bool GPUPipelineStateWebGPU::BindGroupKey::operator==(const BindGroupKey& other)
        && Platform::MemoryCompare(&Versions, &other.Versions, EntriesCount * sizeof(uint8)) == 0;
 }

-WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED])
+WGPUBindGroup GPUBindGroupCacheWebGPU::Get(WGPUDevice device, GPUBindGroupKeyWebGPU& key, const StringAnsiView& debugName, uint64 gcFrames)
 {
-    WGPURenderPipeline pipeline;
-    if (_pipelines.TryGet(key, pipeline))
-        return pipeline;
-    PROFILE_CPU();
-    PROFILE_MEM(GraphicsCommands);
-#if GPU_ENABLE_RESOURCE_NAMING
-    ZoneText(_debugName.Get(), _debugName.Count() - 1);
-#endif
-#if WEBGPU_LOG_PSO
-    LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_debugName.Get(), _debugName.Count() - 1));
+#if WEBGPU_LOG_BIND_GROUPS
 #ifdef WEBGPU_LOG_PSO_NAME
-    const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME);
+    const bool log = debugName.Contains(WEBGPU_LOG_PSO_NAME);
 #else
    const bool log = true;
 #endif
 #endif

+    // Generate a hash for the key
+    key.LastFrameUsed = Engine::FrameCount;
+    key.Hash = Crc::MemCrc32(&key.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry));
+    CombineHash(key.Hash, GetHash(key.EntriesCount));
+    CombineHash(key.Hash, GetHash(key.Layout));
+    CombineHash(key.Hash, Crc::MemCrc32(&key.Versions, key.EntriesCount * sizeof(uint8)));
+
+    // Lookup for existing bind group
+    WGPUBindGroup bindGroup;
+    auto found = _bindGroups.Find(key);
+    if (found.IsNotEnd())
+    {
+        // Get cached bind group and update the last usage frame
+        bindGroup = found->Value;
+        found->Key.LastFrameUsed = key.LastFrameUsed;
+
+        // Periodically remove old bind groups (unused for some time)
+        if (key.LastFrameUsed - _lastFrameBindGroupsGC > gcFrames * 2)
+        {
+            _lastFrameBindGroupsGC = key.LastFrameUsed;
+            int32 freed = 0;
+            for (auto it = _bindGroups.Begin(); it.IsNotEnd(); ++it)
+            {
+                if (key.LastFrameUsed - it->Key.LastFrameUsed > gcFrames)
+                {
+                    freed++;
+                    wgpuBindGroupRelease(it->Value);
+                    _bindGroups.Remove(it);
+                }
+            }
+#if WEBGPU_LOG_BIND_GROUPS
+            if (freed > 0 && log)
+                LOG(Info, "[WebGPU] Removed {} old entries from '{}'", freed, String(debugName));
+#endif
+        }
+
+        return bindGroup;
+    }
+    PROFILE_CPU();
+    PROFILE_MEM(GraphicsShaders);
+#if GPU_ENABLE_RESOURCE_NAMING
+    ZoneText(debugName.Get(), debugName.Length());
+#endif
+#if WEBGPU_LOG_BIND_GROUPS
+    if (log)
+        LOG(Info, "[WebGPU] GetBindGroup: '{}', hash: {}", String(debugName), key.Hash);
+#endif
+
+    // Build description
+    WGPUBindGroupDescriptor desc = WGPU_BIND_GROUP_DESCRIPTOR_INIT;
+#if GPU_ENABLE_RESOURCE_NAMING
+    desc.label = { debugName.Get(), (size_t)debugName.Length() };
+#endif
+    desc.layout = key.Layout;
+    desc.entryCount = key.EntriesCount;
+    desc.entries = key.Entries;
+
+    // Create object
+    bindGroup = wgpuDeviceCreateBindGroup(device, &desc);
+    if (!bindGroup)
+    {
+#if GPU_ENABLE_RESOURCE_NAMING
+        LOG(Error, "wgpuDeviceCreateBindGroup failed for {}", String(debugName));
+#endif
+        return nullptr;
+    }
+
+#if WEBGPU_LOG_BIND_GROUPS
+    // Debug detection of hash collisions
+    int32 collisions = 0, equalLayout = 0, equalEntries = 0, equalVersions = 0;
+    for (auto& e : _bindGroups)
+    {
+        auto& other = e.Key;
+        if (key.Hash == other.Hash)
+        {
+            collisions++;
+            if (key.Layout == other.Layout)
+                equalLayout++;
+            if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Entries, &other.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)) == 0)
+                equalEntries++;
+            if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Versions, &other.Versions, key.EntriesCount * sizeof(uint8)) == 0)
+                equalVersions++;
+        }
+    }
+    if (collisions > 1 && log)
+        LOG(Error, "> Hash collision! {}/{} (capacity: {}), equalLayout: {}, equalEntries: {}, equalVersions: {}", collisions, _bindGroups.Count(), _bindGroups.Capacity(), equalLayout, equalEntries, equalVersions);
+#endif
+
+    // Cache it
+    _bindGroups.Add(key, bindGroup);
+    return bindGroup;
+}
+
+WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, const GPUContextBindingsWebGPU& bindings)
+{
+    WGPURenderPipeline pipeline;
+    if (_pipelines.TryGet(key, pipeline))
+        return pipeline;
+    PROFILE_CPU();
+    PROFILE_MEM(GraphicsShaders);
+#if GPU_ENABLE_RESOURCE_NAMING
+    ZoneText(_debugName.Get(), _debugName.Count() - 1);
+#endif
+#if WEBGPU_LOG_PSO
+#ifdef WEBGPU_LOG_PSO_NAME
+    const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME);
+#else
+    const bool log = true;
+#endif
+    if (log)
+        LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_debugName.Get(), _debugName.Count() - 1));
+#endif
+
    // Lazy-init layout (cannot do it during Init as texture samplers that access eg. depth need to explicitly use UnfilterableFloat)
    if (!PipelineDesc.layout)
-        InitLayout(shaderResources);
+        InitLayout(bindings);

    // Build final pipeline description
    _depthStencilDesc.format = (WGPUTextureFormat)key.DepthStencilFormat;
@@ -295,107 +622,16 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, G
    return pipeline;
 }

-WGPUBindGroup GPUPipelineStateWebGPU::GetBindGroup(BindGroupKey& key)
+void GPUPipelineStateWebGPU::InitLayout(const GPUContextBindingsWebGPU& bindings)
 {
-    // Generate a hash for the key
-    key.LastFrameUsed = Engine::FrameCount;
-    key.Hash = Crc::MemCrc32(&key.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry));
-    CombineHash(key.Hash, GetHash(key.EntriesCount));
-    CombineHash(key.Hash, GetHash(key.Layout));
-    CombineHash(key.Hash, Crc::MemCrc32(&key.Versions, key.EntriesCount * sizeof(uint8)));
-
-    // Lookup for existing bind group
-    WGPUBindGroup bindGroup;
-    auto found = _bindGroups.Find(key);
-    if (found.IsNotEnd())
-    {
-        // Get cached bind group and update the last usage frame
-        bindGroup = found->Value;
-        found->Key.LastFrameUsed = key.LastFrameUsed;
-
-        // Periodically remove old bind groups (unused for some time)
-        if (key.LastFrameUsed - _lastFrameBindGroupsGC > 100)
-        {
-            _lastFrameBindGroupsGC = key.LastFrameUsed;
-            int32 freed = 0;
-            for (auto it = _bindGroups.Begin(); it.IsNotEnd(); ++it)
-            {
-                if (key.LastFrameUsed - it->Key.LastFrameUsed > 50)
-                {
-                    freed++;
-                    wgpuBindGroupRelease(it->Value);
-                    _bindGroups.Remove(it);
-                }
-            }
-#if WEBGPU_LOG_BIND_GROUPS
-            if (freed > 0)
-            {
-                LOG(Info, "[WebGPU] Removed {} old entries from '{}'", freed, String(_debugName.Get(), _debugName.Count() - 1));
-            }
-#endif
-        }
-
-        return bindGroup;
-    }
-    PROFILE_CPU();
-    PROFILE_MEM(GraphicsCommands);
 #if GPU_ENABLE_RESOURCE_NAMING
-    ZoneText(_debugName.Get(), _debugName.Count() - 1);
+    StringAnsiView debugName(_debugName.Get(), _debugName.Count() - 1);
+#else
+    StringAnsiView debugName;
 #endif
-#if WEBGPU_LOG_BIND_GROUPS
-    LOG(Info, "[WebGPU] GetBindGroup: '{}', hash: {}", String(_debugName.Get(), _debugName.Count() - 1), key.Hash);
-#endif
-
-    // Build description
-    WGPUBindGroupDescriptor desc = WGPU_BIND_GROUP_DESCRIPTOR_INIT;
-#if GPU_ENABLE_RESOURCE_NAMING
-    desc.label = PipelineDesc.label;
-#endif
-    desc.layout = key.Layout;
-    desc.entryCount = key.EntriesCount;
-    desc.entries = key.Entries;
-
-    // Create object
-    bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &desc);
-    if (!bindGroup)
-    {
-#if GPU_ENABLE_RESOURCE_NAMING
-        LOG(Error, "wgpuDeviceCreateBindGroup failed for {}", String(_debugName.Get(), _debugName.Count() - 1));
-#endif
-        return nullptr;
-    }
-
-#if WEBGPU_LOG_BIND_GROUPS
-    // Debug detection of hash collisions
-    int32 collisions = 0, equalLayout = 0, equalEntries = 0, equalVersions = 0;
-    for (auto& e : _bindGroups)
-    {
-        auto& other = e.Key;
-        if (key.Hash == other.Hash)
-        {
-            collisions++;
-            if (key.Layout == other.Layout)
-                equalLayout++;
-            if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Entries, &other.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)) == 0)
-                equalEntries++;
-            if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Versions, &other.Versions, key.EntriesCount * sizeof(uint8)) == 0)
-                equalVersions++;
-        }
-    }
-    if (collisions > 1)
-        LOG(Error, "> Hash collision! {}/{} (capacity: {}), equalLayout: {}, equalEntries: {}, equalVersions: {}", collisions, _bindGroups.Count(), _bindGroups.Capacity(), equalLayout, equalEntries, equalVersions);
-#endif
-
-    // Cache it
-    _bindGroups.Add(key, bindGroup);
-    return bindGroup;
-}
-
-void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED])
-{
 #if WEBGPU_LOG_PSO
 #ifdef WEBGPU_LOG_PSO_NAME
-    const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME);
+    const bool log = debugName.Contains(WEBGPU_LOG_PSO_NAME);
 #else
    const bool log = true;
 #endif
@@ -409,175 +645,15 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX
        if (descriptors && maxEntriesCount < descriptors->DescriptorTypesCount)
            maxEntriesCount = (int32)descriptors->DescriptorTypesCount;
    }
-    Array<WGPUBindGroupLayoutEntry, InlinedAllocation<8>> entries;
+    BindGroupEntries entries;
    entries.Resize(maxEntriesCount);

    // Setup bind groups
-    WGPUBindGroupLayoutEntry* entriesPtr = entries.Get();
    for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
    {
        auto descriptors = BindGroupDescriptors[groupIndex];
-        if (!descriptors || descriptors->DescriptorTypesCount == 0)
-            continue;
-
-        int32 entriesCount = descriptors->DescriptorTypesCount;
-        Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount);
-        auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment;
-#if WEBGPU_LOG_PSO
-        if (log)
-            LOG(Info, " > group {} - {}", groupIndex, groupIndex == 0 ? TEXT("Vertex") : TEXT("Fragment"));
-        const Char* samplerType = TEXT("?");
-#endif
-        for (int32 index = 0; index < entriesCount; index++)
-        {
-            auto& descriptor = descriptors->DescriptorTypes[index];
-            auto& entry = entriesPtr[index];
-            entry.binding = descriptor.Binding;
-            entry.bindingArraySize = descriptor.Count;
-            entry.visibility = visibility;
-            switch (descriptor.DescriptorType)
-            {
-            case VK_DESCRIPTOR_TYPE_SAMPLER:
-                entry.sampler.type = WGPUSamplerBindingType_Undefined;
-                if (descriptor.Slot == 4 || descriptor.Slot == 5) // Hack for ShadowSampler and ShadowSamplerLinear (this could get binded samplers table just like for shaderResources)
-                    entry.sampler.type = WGPUSamplerBindingType_Comparison;
-#if WEBGPU_LOG_PSO
-                switch (entry.sampler.type)
-                {
-                case WGPUSamplerBindingType_BindingNotUsed:
-                    samplerType = TEXT("BindingNotUsed");
-                    break;
-                case WGPUSamplerBindingType_Undefined:
-                    samplerType = TEXT("Undefined");
-                    break;
-                case WGPUSamplerBindingType_Filtering:
-                    samplerType = TEXT("Filtering");
-                    break;
-                case WGPUSamplerBindingType_NonFiltering:
-                    samplerType = TEXT("NonFiltering");
-                    break;
-                case WGPUSamplerBindingType_Comparison:
-                    samplerType = TEXT("Comparison");
-                    break;
-                }
-                if (log)
-                    LOG(Info, "   > [{}] sampler ({})", entry.binding, samplerType);
-#endif
-                break;
-            case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-                entry.texture.sampleType = WGPUTextureSampleType_Undefined;
-                if (shaderResources[descriptor.Slot])
-                {
-                    // Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_DEPTH)
-                    auto ptr = (GPUResourceViewPtrWebGPU*)shaderResources[descriptor.Slot]->GetNativePtr();
-                    if (ptr && ptr->TextureView)
-                        entry.texture.sampleType = ptr->TextureView->SampleType;
-                }
-#if WEBGPU_LOG_PSO
-                if (log)
-                {
-                    switch (entry.texture.sampleType)
-                    {
-                    case WGPUTextureSampleType_BindingNotUsed:
-                        samplerType = TEXT("BindingNotUsed");
-                        break;
-                    case WGPUTextureSampleType_Undefined:
-                        samplerType = TEXT("Undefined");
-                        break;
-                    case WGPUTextureSampleType_Float:
-                        samplerType = TEXT("Float");
-                        break;
-                    case WGPUTextureSampleType_UnfilterableFloat:
-                        samplerType = TEXT("UnfilterableFloat");
-                        break;
-                    case WGPUTextureSampleType_Depth:
-                        samplerType = TEXT("Depth");
-                        break;
-                    case WGPUTextureSampleType_Sint:
-                        samplerType = TEXT("Sint");
-                        break;
-                    case WGPUTextureSampleType_Uint:
-                        samplerType = TEXT("Uint");
-                        break;
-                    }
-                    switch (descriptor.ResourceType)
-                    {
-                    case SpirvShaderResourceType::Texture1D:
-                        LOG(Info, "   > [{}] texture 1D ({})", entry.binding, samplerType);
-                        break;
-                    case SpirvShaderResourceType::Texture2D:
-                        LOG(Info, "   > [{}] texture 2D ({})", entry.binding, samplerType);
-                        break;
-                    case SpirvShaderResourceType::Texture3D:
-                        LOG(Info, "   > [{}] texture 3D ({})", entry.binding, samplerType);
-                        break;
-                    case SpirvShaderResourceType::TextureCube:
-                        LOG(Info, "   > [{}] texture Cube ({})", entry.binding, samplerType);
-                        break;
-                    case SpirvShaderResourceType::Texture2DArray:
-                        LOG(Info, "   > [{}] texture 2D array ({})", entry.binding, samplerType);
-                        break;
-                    }
-                }
-#endif
-                switch (descriptor.ResourceType)
-                {
-                case SpirvShaderResourceType::Texture1D:
-                    entry.texture.viewDimension = WGPUTextureViewDimension_1D;
-                    break;
-                case SpirvShaderResourceType::Texture2D:
-                    entry.texture.viewDimension = WGPUTextureViewDimension_2D;
-                    break;
-                case SpirvShaderResourceType::Texture3D:
-                    entry.texture.viewDimension = WGPUTextureViewDimension_3D;
-                    break;
-                case SpirvShaderResourceType::TextureCube:
-                    entry.texture.viewDimension = WGPUTextureViewDimension_Cube;
-                    break;
-                case SpirvShaderResourceType::Texture1DArray:
-                    CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write)
-                    break;
-                case SpirvShaderResourceType::Texture2DArray:
-                    entry.texture.viewDimension = WGPUTextureViewDimension_2DArray;
-                    break;
-                }
-                break;
-            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-                entry.buffer.hasDynamicOffset = true;
-            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-                if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV)
-                    entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
-                else
-                    entry.buffer.type = WGPUBufferBindingType_Storage;
-#if WEBGPU_LOG_PSO
-                if (log)
-                    LOG(Info, "   > [{}] storage buffer (read-only = {}, dynamic = {})", entry.binding, entry.buffer.type == WGPUBufferBindingType_ReadOnlyStorage, entry.buffer.hasDynamicOffset);
-#endif
-                break;
-            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-                entry.buffer.hasDynamicOffset = true;
-            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-                entry.buffer.type = WGPUBufferBindingType_Uniform;
-#if WEBGPU_LOG_PSO
-                if (log)
-                    LOG(Info, "   > [{}] uniform buffer (dynamic = {})", entry.binding, entry.buffer.hasDynamicOffset);
-#endif
-                break;
-            default:
-#if GPU_ENABLE_DIAGNOSTICS
-                LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(_debugName.Get(), _debugName.Count() - 1));
-#else
-                CRASH;
-#endif
-                return;
-            }
-        }
-
-        // Create a bind group layout
-        WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT;
-        bindGroupLayoutDesc.entryCount = entriesCount;
-        bindGroupLayoutDesc.entries = entriesPtr;
-        BindGroupLayouts[groupIndex] = wgpuDeviceCreateBindGroupLayout(_device->Device, &bindGroupLayoutDesc);
+        if (descriptors)
+            BindGroupLayouts[groupIndex] = CreateBindGroupLayout(_device->Device, bindings, groupIndex, *descriptors, entries, debugName, log);
    }

    // Create the pipeline layout
@@ -591,7 +667,6 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX
    if (!PipelineDesc.layout)
    {
        LOG(Error, "wgpuDeviceCreatePipelineLayout failed");
-        return;
    }
 }

--- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h
@@ -38,17 +38,7 @@ public:
    };

    // Batches bind group description for the pipeline state. Used as a key for caching created bind groups.
-    struct BindGroupKey
-    {
-        uint32 Hash;
-        WGPUBindGroupLayout Layout;
-        mutable uint64 LastFrameUsed;
-        WGPUBindGroupEntry Entries[64];
-        uint8 EntriesCount;
-        uint8 Versions[64]; // Versions of descriptors used to differentiate when texture residency gets changed
-
-        bool operator==(const BindGroupKey& other) const;
-    };
+    typedef GPUBindGroupKeyWebGPU BindGroupKey;

 private:
 #if GPU_ENABLE_RESOURCE_NAMING
@@ -61,7 +51,7 @@ private:
    WGPUVertexBufferLayout _vertexBuffers[GPU_MAX_VB_BINDED];
    Dictionary<PipelineKey, WGPURenderPipeline> _pipelines;
    Dictionary<BindGroupKey, WGPUBindGroup> _bindGroups;
-    uint64 _lastFrameBindGroupsGC = 0;
+    GPUBindGroupCacheWebGPU _bindGroupCache;

 public:
    GPUShaderProgramVSWebGPU* VS = nullptr;
@@ -78,13 +68,21 @@ public:

 public:
    // Gets the pipeline for the given rendering state. Pipelines are cached and reused for the same key.
-    WGPURenderPipeline GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]);
+    WGPURenderPipeline GetPipeline(const PipelineKey& key, const GPUContextBindingsWebGPU& bindings);

    // Gets the bind group for the given key (unhashed). Bind groups are cached and reused for the same key.
-    WGPUBindGroup GetBindGroup(BindGroupKey& desc);
+    FORCE_INLINE WGPUBindGroup GetBindGroup(BindGroupKey& key)
+    {
+#if GPU_ENABLE_RESOURCE_NAMING
+        StringAnsiView debugName(_debugName.Get(), _debugName.Count() - 1);
+#else
+        StringAnsiView debugName;
+#endif
+        return _bindGroupCache.Get(_device->Device, key, debugName);
+    }

 private:
-    void InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]);
+    void InitLayout(const GPUContextBindingsWebGPU& bindings);

 public:
    // [GPUPipelineState]
@@ -97,6 +95,6 @@ protected:
 };

 uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key);
-uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key);
+uint32 GetHash(const GPUBindGroupKeyWebGPU& key);

 #endif
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderProgramWebGPU.h
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderProgramWebGPU.h
@@ -8,6 +8,42 @@
 #include "Engine/GraphicsDevice/Vulkan/Types.h"
 #include <webgpu/webgpu.h>

+/// <summary>
+/// Bundle of the current bound state to the Web GPU context (used to properly handle different texture layouts or samplers when building bind group layout).
+/// </summary>
+struct GPUContextBindingsWebGPU
+{
+    GPUResourceView** ShaderResources; // [GPU_MAX_SR_BINDED]
+};
+
+/// <summary>
+/// Batch of bind group descriptions for the layout. Used as a key for caching created bind groups.
+/// </summary>
+struct GPUBindGroupKeyWebGPU
+{
+    uint32 Hash;
+    WGPUBindGroupLayout Layout;
+    mutable uint64 LastFrameUsed;
+    WGPUBindGroupEntry Entries[64];
+    uint8 EntriesCount;
+    uint8 Versions[64]; // Versions of descriptors used to differentiate when texture residency gets changed
+
+    bool operator==(const GPUBindGroupKeyWebGPU& other) const;
+};
+
+/// <summary>
+/// Reusable utility for caching bind group objects. Handles reusing bind groups for the same key and releasing them when they are not used for a long time (based on the frame number).
+/// </summary>
+struct GPUBindGroupCacheWebGPU
+{
+private:
+    uint64 _lastFrameBindGroupsGC = 0;
+    Dictionary<GPUBindGroupKeyWebGPU, WGPUBindGroup> _bindGroups; // TODO: try using LRU cache
+
+public:
+    WGPUBindGroup Get(WGPUDevice device, GPUBindGroupKeyWebGPU& key, const StringAnsiView& debugName, uint64 gcFrames = 50);
+};
+
 /// <summary>
 /// Shaders base class for Web GPU backend.
 /// </summary>
@@ -69,4 +105,39 @@ public:
    }
 };

+/// <summary>
+/// Compute Shader for Web GPU backend.
+/// </summary>
+class GPUShaderProgramCSWebGPU : public GPUShaderProgramWebGPU<GPUShaderProgramCS>
+{
+private:
+    WGPUComputePipeline _pipeline = nullptr;
+    WGPUBindGroupLayout _bindGroupLayout = nullptr;
+    GPUBindGroupCacheWebGPU _bindGroupCache;
+
+public:
+    GPUShaderProgramCSWebGPU(const GPUShaderProgramInitializer& initializer, const SpirvShaderDescriptorInfo& descriptorInfo, WGPUShaderModule shaderModule)
+        : GPUShaderProgramWebGPU(initializer, descriptorInfo, shaderModule)
+    {
+    }
+
+    ~GPUShaderProgramCSWebGPU()
+    {
+        if (_bindGroupLayout)
+            wgpuBindGroupLayoutRelease(_bindGroupLayout);
+        if (_pipeline)
+            wgpuComputePipelineRelease(_pipeline);
+    }
+
+public:
+    // Gets the pipeline.
+    WGPUComputePipeline GetPipeline(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, WGPUBindGroupLayout& resultBindGroupLayout);
+
+    // Gets the bind group for the given key (unhashed). Bind groups are cached and reused for the same key.
+    FORCE_INLINE WGPUBindGroup GetBindGroup(WGPUDevice device, GPUBindGroupKeyWebGPU& key)
+    {
+        return _bindGroupCache.Get(device, key, _name, 60 * 60);
+    }
+};
+
 #endif
--- a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp
+++ b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp
@@ -79,10 +79,11 @@ GPUShaderProgram* GPUShaderWebGPU::CreateGPUShaderProgram(ShaderStage type, cons
        break;
    }
    case ShaderStage::Pixel:
-    {
        shader = New<GPUShaderProgramPSWebGPU>(initializer, header->DescriptorInfo, shaderModule);
        break;
-    }
+    case ShaderStage::Compute:
+        shader = New<GPUShaderProgramCSWebGPU>(initializer, header->DescriptorInfo, shaderModule);
+        break;
    }
    return shader;
 }
--- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp
+++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp
@@ -843,10 +843,10 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node)
                "	{{\n"
                "		// Collision (depth)\n"
                "		float3 nextPos = {0} + {1} * DeltaTime;\n"
-                "		nextPos = mul(float4(nextPos, 1), WorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
+                "		nextPos = PROJECT_POINT(float4(nextPos, 1), WorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation

-                "		float3 viewPos = mul(float4(nextPos, 1), ViewMatrix);\n"
-                "		float4 projPos = mul(float4(nextPos, 1), ViewProjectionMatrix);\n"
+                "		float3 viewPos = PROJECT_POINT(float4(nextPos, 1), ViewMatrix);\n"
+                "		float4 projPos = PROJECT_POINT(float4(nextPos, 1), ViewProjectionMatrix);\n"
                "		projPos.xyz /= projPos.w;\n"
                "		if (all(abs(projPos.xy) < 1.0f))\n"
                "		{{\n"
@@ -871,8 +871,8 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node)
                "				viewPos.z = linearDepth;\n"
                "				\n"

-                "				{0} = mul(float4(viewPos, 1), InvViewMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
-                "				{0} = mul(float4({0}, 1), InvWorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
+                "				{0} = PROJECT_POINT(float4(viewPos, 1), InvViewMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
+                "				{0} = PROJECT_POINT(float4({0}, 1), InvWorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
                COLLISION_LOGIC()

                "		}}\n"
--- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h
+++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h
@@ -5,7 +5,7 @@
 /// <summary>
 /// Current GPU particles emitter shader version.
 /// </summary>
-#define PARTICLE_GPU_GRAPH_VERSION 11
+#define PARTICLE_GPU_GRAPH_VERSION 12

 #if COMPILE_WITH_PARTICLE_GPU_GRAPH

--- a/Source/Shaders/BitonicSort.shader
+++ b/Source/Shaders/BitonicSort.shader
@@ -54,7 +54,7 @@ bool ShouldSwap(float a, float b)

 RWByteAddressBuffer IndirectArgsBuffer : register(u0);

-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 [numthreads(22, 1, 1)]
 void CS_IndirectArgs(uint groupIndex : SV_GroupIndex)
 {
@@ -129,7 +129,7 @@ void StoreItem(uint element, uint count)

 #ifdef _CS_PreSort

-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 META_PERMUTATION_1(THREAD_GROUP_SIZE=1024)
 META_PERMUTATION_1(THREAD_GROUP_SIZE=64)
 [numthreads(THREAD_GROUP_SIZE, 1, 1)]
@@ -177,7 +177,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)

 #ifdef _CS_InnerSort

-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 [numthreads(THREAD_GROUP_SIZE, 1, 1)]
 void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
 {
@@ -222,7 +222,7 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
 RWBuffer<uint> SortedIndices : register(u0);
 RWBuffer<float> SortingKeys : register(u1);

-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 [numthreads(1024, 1, 1)]
 void CS_OuterSort(uint3 dispatchThreadId : SV_DispatchThreadID)
 {
--- a/Source/Shaders/Common.hlsl
+++ b/Source/Shaders/Common.hlsl
@@ -59,7 +59,7 @@
 #else
 #define CAN_USE_GATHER 0
 #endif
-#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5
+#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 || defined(WGSL)
 #define CAN_USE_COMPUTE_SHADER 1
 #else
 #define CAN_USE_COMPUTE_SHADER 0
@@ -79,6 +79,7 @@
 // Alias read-only Buffer binded as shader resource into StructuredBuffer to be used as storage on WebGPU (not supported)
 #define CAN_USE_TYPED_BUFFER_LOADS 0
 #define Buffer StructuredBuffer
+#define RWBuffer RWStructuredBuffer

 // Hack matrix multiplication order for WebGPU (row-major vs column-major bug?)
 #define PROJECT_POINT(p, m) mul(m, p)
--- a/Source/Shaders/GPUParticlesSorting.shader
+++ b/Source/Shaders/GPUParticlesSorting.shader
@@ -35,7 +35,7 @@ float3 GetParticleVec3(uint particleIndex, int offset)
 }

 // Sorting keys generation shader
-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 META_PERMUTATION_1(SORT_MODE=0)
 META_PERMUTATION_1(SORT_MODE=1)
 META_PERMUTATION_1(SORT_MODE=2)
--- a/Source/Shaders/Histogram.shader
+++ b/Source/Shaders/Histogram.shader
@@ -20,7 +20,7 @@ META_CB_END
 RWStructuredBuffer<uint> HistogramBuffer : register(u0);

 // Clears the histogram
-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 [numthreads(THREADGROUP_SIZE_X, 1, 1)]
 void CS_ClearHistogram(uint dispatchThreadId : SV_DispatchThreadID)
 {
@@ -44,7 +44,7 @@ float ComputeHistogramPositionFromLuminance(float luminance)
 groupshared uint SharedHistogram[HISTOGRAM_SIZE];

 // Generates the histogram
-META_CS(true, FEATURE_LEVEL_SM5)
+META_CS(true, AUTO)
 [numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)]
 void CS_GenerateHistogram(uint3 dispatchThreadId : SV_DispatchThreadID, uint3 groupThreadId : SV_GroupThreadID)
 {