Add Compute shaders support to WebGPU

This commit is contained in:
Wojtek Figat
2026-03-04 21:27:42 +01:00
parent 5fb9cf3be1
commit 377d5c00aa
20 changed files with 657 additions and 449 deletions

View File

@@ -67,7 +67,7 @@ float Rand(inout uint seed)
float3 ReprojectPosition(float2 uv, float rawDepth)
{
uv = uv * float2(2.0, -2.0) + float2(-1.0, 1.0);
float4 pos = mul(float4(uv.x, uv.y, rawDepth, 1.0f), InvViewProjectionMatrix);
float4 pos = PROJECT_POINT(float4(uv.x, uv.y, rawDepth, 1.0f), InvViewProjectionMatrix);
return pos.xyz / pos.w;
}
@@ -158,7 +158,7 @@ void SpawnParticle(Context context)
@4}
// Main entry point for the particles simulation and spawning
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_Main(uint3 dispatchThreadId : SV_DispatchThreadID)
{

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -274,7 +274,7 @@ FeatureLevel RenderTools::GetFeatureLevel(ShaderProfile profile)
case ShaderProfile::GLSL_410:
case ShaderProfile::Unknown:
case ShaderProfile::WebGPU:
return FeatureLevel::ES2;
return FeatureLevel::ES3_1;
default:
return FeatureLevel::ES2;
}
@@ -293,6 +293,8 @@ ShaderProfileFeatures RenderTools::GetShaderProfileFeatures(ShaderProfile profil
return ShaderProfileFeatures::ComputeShaders | ShaderProfileFeatures::GeometryShaders;
case ShaderProfile::DirectX_SM4:
return ShaderProfileFeatures::GeometryShaders;
case ShaderProfile::WebGPU:
return ShaderProfileFeatures::ComputeShaders;
default:
return ShaderProfileFeatures::None;
}

View File

@@ -10,6 +10,7 @@
#include "Engine/Core/Log.h"
#include "Engine/Core/Types/Pair.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/Profiler/ProfilerMemory.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
static VkStencilOp ToVulkanStencilOp(const StencilOperation value)
@@ -91,6 +92,7 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState()
if (_pipelineState)
return _pipelineState;
PROFILE_CPU();
PROFILE_MEM(GraphicsShaders);
ZoneText(*_name, _name.Length());
// Create pipeline layout
@@ -224,6 +226,7 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer
return pipeline;
}
PROFILE_CPU();
PROFILE_MEM(GraphicsShaders);
#if !BUILD_RELEASE
DebugName name;
GetDebugName(name);

View File

@@ -72,6 +72,8 @@ bool GPUBufferWebGPU::OnInit()
{
case GPUResourceUsage::Default:
bufferDesc.usage |= WGPUBufferUsage_CopyDst;
if (IsUnorderedAccess())
bufferDesc.usage |= WGPUBufferUsage_CopySrc; // eg. GPU particles copy particle counter between buffers
break;
case GPUResourceUsage::Dynamic:
if (bufferDesc.usage == 0) // WebGPU doesn't allow to map-write Index/Vertex/Storage buffers

View File

@@ -368,8 +368,9 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data)
void GPUContextWebGPU::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ)
{
OnDispatch(shader);
MISSING_CODE("GPUContextWebGPU::Dispatch");
auto computePass = OnDispatch(shader);
wgpuComputePassEncoderDispatchWorkgroups(computePass, threadGroupCountX, threadGroupCountY, threadGroupCountZ);
EndComputePass(computePass);
RENDER_STAT_DISPATCH_CALL();
}
@@ -377,8 +378,9 @@ void GPUContextWebGPU::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b
{
ASSERT(bufferForArgs && EnumHasAnyFlags(bufferForArgs->GetFlags(), GPUBufferFlags::Argument));
auto bufferForArgsWebGPU = (GPUBufferWebGPU*)bufferForArgs;
OnDispatch(shader);
MISSING_CODE("GPUContextWebGPU::Dispatch");
auto computePass = OnDispatch(shader);
wgpuComputePassEncoderDispatchWorkgroupsIndirect(computePass, bufferForArgsWebGPU->Buffer, offsetForArgs);
EndComputePass(computePass);
RENDER_STAT_DISPATCH_CALL();
}
@@ -865,7 +867,7 @@ void GPUContextWebGPU::OnDrawCall()
if (_pipelineDirty)
{
_pipelineDirty = false;
WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey, _shaderResources) : nullptr;
WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey, { _shaderResources }) : nullptr;
wgpuRenderPassEncoderSetPipeline(_renderPass, pipeline);
RENDER_STAT_PS_STATE_CHANGE();
@@ -898,9 +900,38 @@ void GPUContextWebGPU::OnDrawCall()
}
}
void GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader)
WGPUComputePassEncoder GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader)
{
// TODO: add compute shaders support
// End existing render pass (if any)
if (_renderPass)
EndRenderPass();
// Flush pending clears
FlushState();
// Start a new compute pass
WGPUComputePassDescriptor computePassDesc = WGPU_COMPUTE_PASS_DESCRIPTOR_INIT;
FlushTimestamps(1);
if (_pendingTimestampWrites.HasItems())
computePassDesc.timestampWrites = &_pendingTimestampWrites.Last();
_pendingTimestampWrites.Clear();
auto computePass = wgpuCommandEncoderBeginComputePass(Encoder, &computePassDesc);
ASSERT(computePass);
// Set pipeline
GPUPipelineStateWebGPU::BindGroupKey key;
auto shaderWebGPU = (GPUShaderProgramCSWebGPU*)shader;
WGPUComputePipeline pipeline = shaderWebGPU->GetPipeline(_device->Device, { _shaderResources }, key.Layout);
wgpuComputePassEncoderSetPipeline(computePass, pipeline);
// Set bind group
uint32 dynamicOffsets[DynamicOffsetsMax];
uint32 dynamicOffsetsCount = 0;
BuildBindGroup(0, shaderWebGPU->DescriptorInfo, key, dynamicOffsets, dynamicOffsetsCount);
WGPUBindGroup bindGroup = shaderWebGPU->GetBindGroup(_device->Device, key);
wgpuComputePassEncoderSetBindGroup(computePass, 0, bindGroup, dynamicOffsetsCount, dynamicOffsets);
return computePass;
}
void GPUContextWebGPU::EndRenderPass()
@@ -910,6 +941,13 @@ void GPUContextWebGPU::EndRenderPass()
_renderPass = nullptr;
}
void GPUContextWebGPU::EndComputePass(WGPUComputePassEncoder computePass)
{
wgpuComputePassEncoderEnd(computePass);
wgpuComputePassEncoderRelease(computePass);
computePass = nullptr;
}
void GPUContextWebGPU::FlushRenderPass()
{
_renderPassDirty = false;
@@ -1033,138 +1071,17 @@ void GPUContextWebGPU::FlushBindGroup()
// Each shader stage (Vertex, Pixel) uses a separate bind group
GPUPipelineStateWebGPU::BindGroupKey key;
for (int32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++)
uint32 dynamicOffsets[DynamicOffsetsMax];
for (uint32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++)
{
auto descriptors = _pipelineState->BindGroupDescriptors[groupIndex];
key.Layout = _pipelineState->BindGroupLayouts[groupIndex];
if (!descriptors || !key.Layout)
continue;
// Build descriptors for the bind group
auto entriesCount = descriptors->DescriptorTypesCount;
uint32 dynamicOffsets[4];
// Build descriptors
uint32 dynamicOffsetsCount = 0;
static_assert(ARRAY_COUNT(key.Entries) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group entries array.");
static_assert(ARRAY_COUNT(key.Versions) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group versions array.");
key.EntriesCount = entriesCount;
auto entriesPtr = key.Entries;
auto versionsPtr = key.Versions;
Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry));
Platform::MemoryClear(versionsPtr, ((entriesCount + 3) & ~0x3) * sizeof(uint8));
for (int32 index = 0; index < entriesCount; index++)
{
auto& descriptor = descriptors->DescriptorTypes[index];
auto& entry = entriesPtr[index];
entry.binding = descriptor.Binding;
entry.size = WGPU_WHOLE_SIZE;
switch (descriptor.DescriptorType)
{
case VK_DESCRIPTOR_TYPE_SAMPLER:
{
GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot];
if (!sampler)
sampler = _device->DefaultSamplers[0]; // Fallback
entry.sampler = sampler->Sampler;
break;
}
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
{
ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV);
auto view = _shaderResources[descriptor.Slot];
auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
if (ptr && ptr->TextureView)
{
entry.textureView = ptr->TextureView->View;
versionsPtr[index] = ptr->Version;
}
if (!entry.textureView)
{
// Fallback
auto defaultTexture = _device->DefaultTexture[(int32)descriptor.ResourceType];
if (!defaultTexture)
{
LOG(Error, "Missing default resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType);
CRASH;
}
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture3D:
view = defaultTexture->ViewVolume();
break;
case SpirvShaderResourceType::Texture1DArray:
case SpirvShaderResourceType::Texture2DArray:
view = defaultTexture->ViewArray();
break;
default:
view = defaultTexture->View(0);
break;
}
ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr();
entry.textureView = ptr->TextureView->View;
}
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
{
ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]);
GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot];
auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
if (ptr && ptr->BufferView)
{
entry.buffer = ptr->BufferView->Buffer;
entry.size = ((GPUBufferWebGPU*)view->GetParent())->GetSize();
versionsPtr[index] = (uint64)ptr->Version;
}
if (!entry.buffer)
entry.buffer = _device->DefaultBuffer; // Fallback
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
{
GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot];
if (uniform && uniform->Allocation.Buffer)
{
entry.buffer = uniform->Allocation.Buffer;
entry.size = uniform->AllocationSize;
if (descriptor.DescriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
entry.offset = uniform->Allocation.Offset;
else
dynamicOffsets[dynamicOffsetsCount++] = uniform->Allocation.Offset;
}
else
LOG(Fatal, "Missing constant buffer at slot {}", descriptor.Slot);
break;
}
default:
#if GPU_ENABLE_DIAGNOSTICS
LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType);
#else
CRASH;
#endif
return;
}
}
#if BUILD_DEBUG
// Validate
for (int32 i = 0; i < entriesCount; i++)
{
auto& e = entriesPtr[i];
if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1)
{
LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName());
LOG(Error, " > sampler: {}", (uint32)e.sampler);
LOG(Error, " > textureView: {}", (uint32)e.textureView);
LOG(Error, " > buffer: {}", (uint32)e.buffer);
}
}
ASSERT(dynamicOffsetsCount <= ARRAY_COUNT(dynamicOffsets));
#endif
BuildBindGroup(groupIndex, *descriptors, key, dynamicOffsets, dynamicOffsetsCount);
// Bind group
WGPUBindGroup bindGroup = _pipelineState->GetBindGroup(key);
@@ -1197,4 +1114,131 @@ void GPUContextWebGPU::FlushTimestamps(int32 skipLast)
}
}
void GPUContextWebGPU::BuildBindGroup(uint32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, GPUPipelineStateWebGPU::BindGroupKey& key, uint32 dynamicOffsets[DynamicOffsetsMax], uint32& dynamicOffsetsCount)
{
// Build descriptors for the bind group
auto entriesCount = descriptors.DescriptorTypesCount;
static_assert(ARRAY_COUNT(key.Entries) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group entries array.");
static_assert(ARRAY_COUNT(key.Versions) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group versions array.");
key.EntriesCount = entriesCount;
auto entriesPtr = key.Entries;
auto versionsPtr = key.Versions;
Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry));
Platform::MemoryClear(versionsPtr, ((entriesCount + 3) & ~0x3) * sizeof(uint8));
for (int32 index = 0; index < entriesCount; index++)
{
auto& descriptor = descriptors.DescriptorTypes[index];
auto& entry = entriesPtr[index];
entry.binding = descriptor.Binding;
entry.size = WGPU_WHOLE_SIZE;
switch (descriptor.DescriptorType)
{
case VK_DESCRIPTOR_TYPE_SAMPLER:
{
GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot];
if (!sampler)
sampler = _device->DefaultSamplers[0]; // Fallback
entry.sampler = sampler->Sampler;
break;
}
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
{
ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV);
auto view = _shaderResources[descriptor.Slot];
auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
if (ptr && ptr->TextureView)
{
entry.textureView = ptr->TextureView->View;
versionsPtr[index] = ptr->Version;
}
if (!entry.textureView)
{
// Fallback
auto defaultTexture = _device->DefaultTexture[(int32)descriptor.ResourceType];
if (!defaultTexture)
{
LOG(Error, "Missing default resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType);
CRASH;
}
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture3D:
view = defaultTexture->ViewVolume();
break;
case SpirvShaderResourceType::Texture1DArray:
case SpirvShaderResourceType::Texture2DArray:
view = defaultTexture->ViewArray();
break;
default:
view = defaultTexture->View(0);
break;
}
ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr();
entry.textureView = ptr->TextureView->View;
}
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
{
ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]);
GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot];
auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr;
if (ptr && ptr->BufferView)
{
entry.buffer = ptr->BufferView->Buffer;
entry.size = ((GPUBufferWebGPU*)view->GetParent())->GetSize();
versionsPtr[index] = (uint64)ptr->Version;
}
if (!entry.buffer)
entry.buffer = _device->DefaultBuffer; // Fallback
break;
}
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
{
GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot];
if (uniform && uniform->Allocation.Buffer)
{
entry.buffer = uniform->Allocation.Buffer;
entry.size = uniform->AllocationSize;
if (descriptor.DescriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
entry.offset = uniform->Allocation.Offset;
else
dynamicOffsets[dynamicOffsetsCount++] = uniform->Allocation.Offset;
}
else
LOG(Fatal, "Missing constant buffer at slot {}", descriptor.Slot);
break;
}
default:
#if GPU_ENABLE_DIAGNOSTICS
LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType);
#else
CRASH;
#endif
return;
}
}
#if BUILD_DEBUG
// Validate
for (int32 i = 0; i < entriesCount; i++)
{
auto& e = entriesPtr[i];
if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1)
{
LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName());
LOG(Error, " > sampler: {}", (uint32)e.sampler);
LOG(Error, " > textureView: {}", (uint32)e.textureView);
LOG(Error, " > buffer: {}", (uint32)e.buffer);
}
}
ASSERT(dynamicOffsetsCount <= DynamicOffsetsMax);
#endif
}
#endif

View File

@@ -93,11 +93,14 @@ private:
bool FindClear(const GPUTextureViewWebGPU* view, PendingClear& clear);
void ManualClear(const PendingClear& clear);
void OnDrawCall();
void OnDispatch(GPUShaderProgramCS* shader);
WGPUComputePassEncoder OnDispatch(GPUShaderProgramCS* shader);
void EndRenderPass();
void EndComputePass(WGPUComputePassEncoder computePass);
void FlushRenderPass();
void FlushBindGroup();
void FlushTimestamps(int32 skipLast = 0);
constexpr static int32 DynamicOffsetsMax = 4;
void BuildBindGroup(uint32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, GPUPipelineStateWebGPU::BindGroupKey& key, uint32 dynamicOffsets[DynamicOffsetsMax], uint32& dynamicOffsetsCount);
public:
// [GPUContext]

View File

@@ -294,6 +294,14 @@ bool GPUDeviceWebGPU::Init()
{
MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment;
TimestampQuery = features.Contains(WGPUFeatureName_TimestampQuery);
Limits.HasCompute =
limits.maxStorageBuffersPerShaderStage >= GPU_MAX_UA_BINDED &&
limits.maxStorageTexturesPerShaderStage >= GPU_MAX_UA_BINDED &&
limits.maxComputeWorkgroupsPerDimension >= GPU_MAX_CS_DISPATCH_THREAD_GROUPS &&
limits.maxComputeWorkgroupSizeX >= 1024 &&
limits.maxComputeWorkgroupSizeY >= 256 &&
limits.maxComputeWorkgroupSizeZ >= 8 &&
limits.maxBufferSize >= 64 * 1024 * 1024; // 64MB
Limits.HasInstancing = true;
Limits.HasDrawIndirect = true;
Limits.HasDepthAsSRV = true;

View File

@@ -142,6 +142,229 @@ WGPUBlendComponent ToBlendComponent(BlendingMode::Operation blendOp, BlendingMod
return result;
}
typedef Array<WGPUBindGroupLayoutEntry, InlinedAllocation<16>> BindGroupEntries;
WGPUBindGroupLayout CreateBindGroupLayout(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, int32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, BindGroupEntries& entries, const StringAnsiView& debugName, bool log, bool compute = false)
{
int32 entriesCount = descriptors.DescriptorTypesCount;
if (entriesCount == 0)
return nullptr;
auto entriesPtr = entries.Get();
ASSERT_LOW_LAYER(entries.Count() >= entriesCount);
Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount);
auto visibility = compute ? WGPUShaderStage_Compute : (groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment);
#if WEBGPU_LOG_PSO
if (log)
LOG(Info, " > group {} - {}", groupIndex, compute ? TEXT("Compute") : (groupIndex == 0 ? TEXT("Vertex") : TEXT("Fragment")));
const Char* samplerType = TEXT("?");
#endif
for (int32 index = 0; index < entriesCount; index++)
{
auto& descriptor = descriptors.DescriptorTypes[index];
auto& entry = entriesPtr[index];
entry.binding = descriptor.Binding;
entry.bindingArraySize = descriptor.Count;
entry.visibility = visibility;
switch (descriptor.DescriptorType)
{
case VK_DESCRIPTOR_TYPE_SAMPLER:
entry.sampler.type = WGPUSamplerBindingType_Undefined;
if (descriptor.Slot == 4 || descriptor.Slot == 5) // Hack for ShadowSampler and ShadowSamplerLinear (this could get binded samplers table just like for shaderResources)
entry.sampler.type = WGPUSamplerBindingType_Comparison;
#if WEBGPU_LOG_PSO
switch (entry.sampler.type)
{
case WGPUSamplerBindingType_BindingNotUsed:
samplerType = TEXT("BindingNotUsed");
break;
case WGPUSamplerBindingType_Undefined:
samplerType = TEXT("Undefined");
break;
case WGPUSamplerBindingType_Filtering:
samplerType = TEXT("Filtering");
break;
case WGPUSamplerBindingType_NonFiltering:
samplerType = TEXT("NonFiltering");
break;
case WGPUSamplerBindingType_Comparison:
samplerType = TEXT("Comparison");
break;
}
if (log)
LOG(Info, " > [{}] sampler ({})", entry.binding, samplerType);
#endif
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
entry.texture.sampleType = WGPUTextureSampleType_Undefined;
if (bindings.ShaderResources[descriptor.Slot])
{
// Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_DEPTH)
auto ptr = (GPUResourceViewPtrWebGPU*)bindings.ShaderResources[descriptor.Slot]->GetNativePtr();
if (ptr && ptr->TextureView)
entry.texture.sampleType = ptr->TextureView->SampleType;
}
#if WEBGPU_LOG_PSO
if (log)
{
switch (entry.texture.sampleType)
{
case WGPUTextureSampleType_BindingNotUsed:
samplerType = TEXT("BindingNotUsed");
break;
case WGPUTextureSampleType_Undefined:
samplerType = TEXT("Undefined");
break;
case WGPUTextureSampleType_Float:
samplerType = TEXT("Float");
break;
case WGPUTextureSampleType_UnfilterableFloat:
samplerType = TEXT("UnfilterableFloat");
break;
case WGPUTextureSampleType_Depth:
samplerType = TEXT("Depth");
break;
case WGPUTextureSampleType_Sint:
samplerType = TEXT("Sint");
break;
case WGPUTextureSampleType_Uint:
samplerType = TEXT("Uint");
break;
}
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture1D:
LOG(Info, " > [{}] texture 1D ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::Texture2D:
LOG(Info, " > [{}] texture 2D ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::Texture3D:
LOG(Info, " > [{}] texture 3D ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::TextureCube:
LOG(Info, " > [{}] texture Cube ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::Texture2DArray:
LOG(Info, " > [{}] texture 2D array ({})", entry.binding, samplerType);
break;
}
}
#endif
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture1D:
entry.texture.viewDimension = WGPUTextureViewDimension_1D;
break;
case SpirvShaderResourceType::Texture2D:
entry.texture.viewDimension = WGPUTextureViewDimension_2D;
break;
case SpirvShaderResourceType::Texture3D:
entry.texture.viewDimension = WGPUTextureViewDimension_3D;
break;
case SpirvShaderResourceType::TextureCube:
entry.texture.viewDimension = WGPUTextureViewDimension_Cube;
break;
case SpirvShaderResourceType::Texture1DArray:
CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write)
break;
case SpirvShaderResourceType::Texture2DArray:
entry.texture.viewDimension = WGPUTextureViewDimension_2DArray;
break;
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV)
entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
else
entry.buffer.type = WGPUBufferBindingType_Storage;
#if WEBGPU_LOG_PSO
if (log)
LOG(Info, " > [{}] storage buffer (read-only = {}, dynamic = {})", entry.binding, entry.buffer.type == WGPUBufferBindingType_ReadOnlyStorage, entry.buffer.hasDynamicOffset);
#endif
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
entry.buffer.type = WGPUBufferBindingType_Uniform;
#if WEBGPU_LOG_PSO
if (log)
LOG(Info, " > [{}] uniform buffer (dynamic = {})", entry.binding, entry.buffer.hasDynamicOffset);
#endif
break;
default:
#if GPU_ENABLE_DIAGNOSTICS
LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(debugName));
#else
CRASH;
#endif
return nullptr;
}
}
// Create a bind group layout
WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT;
bindGroupLayoutDesc.entryCount = entriesCount;
bindGroupLayoutDesc.entries = entriesPtr;
return wgpuDeviceCreateBindGroupLayout(device, &bindGroupLayoutDesc);
}
WGPUComputePipeline GPUShaderProgramCSWebGPU::GetPipeline(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, WGPUBindGroupLayout& resultBindGroupLayout)
{
resultBindGroupLayout = _bindGroupLayout;
if (_pipeline)
return _pipeline;
PROFILE_CPU();
ZoneText(*_name, _name.Length());
#if WEBGPU_LOG_PSO
#ifdef WEBGPU_LOG_PSO_NAME
const bool log = _name.Contains(WEBGPU_LOG_PSO_NAME);
#else
const bool log = true;
#endif
if (log)
LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_name));
#endif
// Create layout bind group
BindGroupEntries entries;
entries.Resize(DescriptorInfo.DescriptorTypesCount);
_bindGroupLayout = CreateBindGroupLayout(device, bindings, 0, DescriptorInfo, entries, _name, log, true);
resultBindGroupLayout = _bindGroupLayout;
// Create the pipeline layout
WGPUPipelineLayoutDescriptor layoutDesc = WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
layoutDesc.label = { _name.Get(), (size_t)_name.Length() };
#endif
layoutDesc.bindGroupLayoutCount = 1;
layoutDesc.bindGroupLayouts = &_bindGroupLayout;
auto layout = wgpuDeviceCreatePipelineLayout(device, &layoutDesc);
if (!layout)
{
LOG(Error, "wgpuDeviceCreatePipelineLayout failed");
return nullptr;
}
// Create pipeline
WGPUComputePipelineDescriptor desc = WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
desc.label = layoutDesc.label;
#endif
desc.layout = layout;
desc.compute.module = ShaderModule;
_pipeline = wgpuDeviceCreateComputePipeline(device , &desc);
if (!_pipeline)
{
#if GPU_ENABLE_RESOURCE_NAMING
LOG(Error, "wgpuDeviceCreateComputePipeline failed for {}", String(_name));
#endif
}
return _pipeline;
}
void GPUPipelineStateWebGPU::OnReleaseGPU()
{
VS = nullptr;
@@ -176,12 +399,12 @@ uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key)
return hash;
}
uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key)
uint32 GetHash(const GPUBindGroupKeyWebGPU& key)
{
return key.Hash;
}
bool GPUPipelineStateWebGPU::BindGroupKey::operator==(const BindGroupKey& other) const
bool GPUBindGroupKeyWebGPU::operator==(const GPUBindGroupKeyWebGPU& other) const
{
return Hash == other.Hash
&& Layout == other.Layout
@@ -190,28 +413,132 @@ bool GPUPipelineStateWebGPU::BindGroupKey::operator==(const BindGroupKey& other)
&& Platform::MemoryCompare(&Versions, &other.Versions, EntriesCount * sizeof(uint8)) == 0;
}
WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED])
WGPUBindGroup GPUBindGroupCacheWebGPU::Get(WGPUDevice device, GPUBindGroupKeyWebGPU& key, const StringAnsiView& debugName, uint64 gcFrames)
{
WGPURenderPipeline pipeline;
if (_pipelines.TryGet(key, pipeline))
return pipeline;
PROFILE_CPU();
PROFILE_MEM(GraphicsCommands);
#if GPU_ENABLE_RESOURCE_NAMING
ZoneText(_debugName.Get(), _debugName.Count() - 1);
#endif
#if WEBGPU_LOG_PSO
LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_debugName.Get(), _debugName.Count() - 1));
#if WEBGPU_LOG_BIND_GROUPS
#ifdef WEBGPU_LOG_PSO_NAME
const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME);
const bool log = debugName.Contains(WEBGPU_LOG_PSO_NAME);
#else
const bool log = true;
#endif
#endif
// Generate a hash for the key
key.LastFrameUsed = Engine::FrameCount;
key.Hash = Crc::MemCrc32(&key.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry));
CombineHash(key.Hash, GetHash(key.EntriesCount));
CombineHash(key.Hash, GetHash(key.Layout));
CombineHash(key.Hash, Crc::MemCrc32(&key.Versions, key.EntriesCount * sizeof(uint8)));
// Lookup for existing bind group
WGPUBindGroup bindGroup;
auto found = _bindGroups.Find(key);
if (found.IsNotEnd())
{
// Get cached bind group and update the last usage frame
bindGroup = found->Value;
found->Key.LastFrameUsed = key.LastFrameUsed;
// Periodically remove old bind groups (unused for some time)
if (key.LastFrameUsed - _lastFrameBindGroupsGC > gcFrames * 2)
{
_lastFrameBindGroupsGC = key.LastFrameUsed;
int32 freed = 0;
for (auto it = _bindGroups.Begin(); it.IsNotEnd(); ++it)
{
if (key.LastFrameUsed - it->Key.LastFrameUsed > gcFrames)
{
freed++;
wgpuBindGroupRelease(it->Value);
_bindGroups.Remove(it);
}
}
#if WEBGPU_LOG_BIND_GROUPS
if (freed > 0 && log)
LOG(Info, "[WebGPU] Removed {} old entries from '{}'", freed, String(debugName));
#endif
}
return bindGroup;
}
PROFILE_CPU();
PROFILE_MEM(GraphicsShaders);
#if GPU_ENABLE_RESOURCE_NAMING
ZoneText(debugName.Get(), debugName.Length());
#endif
#if WEBGPU_LOG_BIND_GROUPS
if (log)
LOG(Info, "[WebGPU] GetBindGroup: '{}', hash: {}", String(debugName), key.Hash);
#endif
// Build description
WGPUBindGroupDescriptor desc = WGPU_BIND_GROUP_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
desc.label = { debugName.Get(), (size_t)debugName.Length() };
#endif
desc.layout = key.Layout;
desc.entryCount = key.EntriesCount;
desc.entries = key.Entries;
// Create object
bindGroup = wgpuDeviceCreateBindGroup(device, &desc);
if (!bindGroup)
{
#if GPU_ENABLE_RESOURCE_NAMING
LOG(Error, "wgpuDeviceCreateBindGroup failed for {}", String(debugName));
#endif
return nullptr;
}
#if WEBGPU_LOG_BIND_GROUPS
// Debug detection of hash collisions
int32 collisions = 0, equalLayout = 0, equalEntries = 0, equalVersions = 0;
for (auto& e : _bindGroups)
{
auto& other = e.Key;
if (key.Hash == other.Hash)
{
collisions++;
if (key.Layout == other.Layout)
equalLayout++;
if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Entries, &other.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)) == 0)
equalEntries++;
if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Versions, &other.Versions, key.EntriesCount * sizeof(uint8)) == 0)
equalVersions++;
}
}
if (collisions > 1 && log)
LOG(Error, "> Hash collision! {}/{} (capacity: {}), equalLayout: {}, equalEntries: {}, equalVersions: {}", collisions, _bindGroups.Count(), _bindGroups.Capacity(), equalLayout, equalEntries, equalVersions);
#endif
// Cache it
_bindGroups.Add(key, bindGroup);
return bindGroup;
}
WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, const GPUContextBindingsWebGPU& bindings)
{
WGPURenderPipeline pipeline;
if (_pipelines.TryGet(key, pipeline))
return pipeline;
PROFILE_CPU();
PROFILE_MEM(GraphicsShaders);
#if GPU_ENABLE_RESOURCE_NAMING
ZoneText(_debugName.Get(), _debugName.Count() - 1);
#endif
#if WEBGPU_LOG_PSO
#ifdef WEBGPU_LOG_PSO_NAME
const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME);
#else
const bool log = true;
#endif
if (log)
LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_debugName.Get(), _debugName.Count() - 1));
#endif
// Lazy-init layout (cannot do it during Init as texture samplers that access eg. depth need to explicitly use UnfilterableFloat)
if (!PipelineDesc.layout)
InitLayout(shaderResources);
InitLayout(bindings);
// Build final pipeline description
_depthStencilDesc.format = (WGPUTextureFormat)key.DepthStencilFormat;
@@ -295,107 +622,16 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, G
return pipeline;
}
WGPUBindGroup GPUPipelineStateWebGPU::GetBindGroup(BindGroupKey& key)
void GPUPipelineStateWebGPU::InitLayout(const GPUContextBindingsWebGPU& bindings)
{
// Generate a hash for the key
key.LastFrameUsed = Engine::FrameCount;
key.Hash = Crc::MemCrc32(&key.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry));
CombineHash(key.Hash, GetHash(key.EntriesCount));
CombineHash(key.Hash, GetHash(key.Layout));
CombineHash(key.Hash, Crc::MemCrc32(&key.Versions, key.EntriesCount * sizeof(uint8)));
// Lookup for existing bind group
WGPUBindGroup bindGroup;
auto found = _bindGroups.Find(key);
if (found.IsNotEnd())
{
// Get cached bind group and update the last usage frame
bindGroup = found->Value;
found->Key.LastFrameUsed = key.LastFrameUsed;
// Periodically remove old bind groups (unused for some time)
if (key.LastFrameUsed - _lastFrameBindGroupsGC > 100)
{
_lastFrameBindGroupsGC = key.LastFrameUsed;
int32 freed = 0;
for (auto it = _bindGroups.Begin(); it.IsNotEnd(); ++it)
{
if (key.LastFrameUsed - it->Key.LastFrameUsed > 50)
{
freed++;
wgpuBindGroupRelease(it->Value);
_bindGroups.Remove(it);
}
}
#if WEBGPU_LOG_BIND_GROUPS
if (freed > 0)
{
LOG(Info, "[WebGPU] Removed {} old entries from '{}'", freed, String(_debugName.Get(), _debugName.Count() - 1));
}
#endif
}
return bindGroup;
}
PROFILE_CPU();
PROFILE_MEM(GraphicsCommands);
#if GPU_ENABLE_RESOURCE_NAMING
ZoneText(_debugName.Get(), _debugName.Count() - 1);
StringAnsiView debugName(_debugName.Get(), _debugName.Count() - 1);
#else
StringAnsiView debugName;
#endif
#if WEBGPU_LOG_BIND_GROUPS
LOG(Info, "[WebGPU] GetBindGroup: '{}', hash: {}", String(_debugName.Get(), _debugName.Count() - 1), key.Hash);
#endif
// Build description
WGPUBindGroupDescriptor desc = WGPU_BIND_GROUP_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
desc.label = PipelineDesc.label;
#endif
desc.layout = key.Layout;
desc.entryCount = key.EntriesCount;
desc.entries = key.Entries;
// Create object
bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &desc);
if (!bindGroup)
{
#if GPU_ENABLE_RESOURCE_NAMING
LOG(Error, "wgpuDeviceCreateBindGroup failed for {}", String(_debugName.Get(), _debugName.Count() - 1));
#endif
return nullptr;
}
#if WEBGPU_LOG_BIND_GROUPS
// Debug detection of hash collisions
int32 collisions = 0, equalLayout = 0, equalEntries = 0, equalVersions = 0;
for (auto& e : _bindGroups)
{
auto& other = e.Key;
if (key.Hash == other.Hash)
{
collisions++;
if (key.Layout == other.Layout)
equalLayout++;
if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Entries, &other.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)) == 0)
equalEntries++;
if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Versions, &other.Versions, key.EntriesCount * sizeof(uint8)) == 0)
equalVersions++;
}
}
if (collisions > 1)
LOG(Error, "> Hash collision! {}/{} (capacity: {}), equalLayout: {}, equalEntries: {}, equalVersions: {}", collisions, _bindGroups.Count(), _bindGroups.Capacity(), equalLayout, equalEntries, equalVersions);
#endif
// Cache it
_bindGroups.Add(key, bindGroup);
return bindGroup;
}
void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED])
{
#if WEBGPU_LOG_PSO
#ifdef WEBGPU_LOG_PSO_NAME
const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME);
const bool log = debugName.Contains(WEBGPU_LOG_PSO_NAME);
#else
const bool log = true;
#endif
@@ -409,175 +645,15 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX
if (descriptors && maxEntriesCount < descriptors->DescriptorTypesCount)
maxEntriesCount = (int32)descriptors->DescriptorTypesCount;
}
Array<WGPUBindGroupLayoutEntry, InlinedAllocation<8>> entries;
BindGroupEntries entries;
entries.Resize(maxEntriesCount);
// Setup bind groups
WGPUBindGroupLayoutEntry* entriesPtr = entries.Get();
for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
{
auto descriptors = BindGroupDescriptors[groupIndex];
if (!descriptors || descriptors->DescriptorTypesCount == 0)
continue;
int32 entriesCount = descriptors->DescriptorTypesCount;
Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount);
auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment;
#if WEBGPU_LOG_PSO
if (log)
LOG(Info, " > group {} - {}", groupIndex, groupIndex == 0 ? TEXT("Vertex") : TEXT("Fragment"));
const Char* samplerType = TEXT("?");
#endif
for (int32 index = 0; index < entriesCount; index++)
{
auto& descriptor = descriptors->DescriptorTypes[index];
auto& entry = entriesPtr[index];
entry.binding = descriptor.Binding;
entry.bindingArraySize = descriptor.Count;
entry.visibility = visibility;
switch (descriptor.DescriptorType)
{
case VK_DESCRIPTOR_TYPE_SAMPLER:
entry.sampler.type = WGPUSamplerBindingType_Undefined;
if (descriptor.Slot == 4 || descriptor.Slot == 5) // Hack for ShadowSampler and ShadowSamplerLinear (this could get binded samplers table just like for shaderResources)
entry.sampler.type = WGPUSamplerBindingType_Comparison;
#if WEBGPU_LOG_PSO
switch (entry.sampler.type)
{
case WGPUSamplerBindingType_BindingNotUsed:
samplerType = TEXT("BindingNotUsed");
break;
case WGPUSamplerBindingType_Undefined:
samplerType = TEXT("Undefined");
break;
case WGPUSamplerBindingType_Filtering:
samplerType = TEXT("Filtering");
break;
case WGPUSamplerBindingType_NonFiltering:
samplerType = TEXT("NonFiltering");
break;
case WGPUSamplerBindingType_Comparison:
samplerType = TEXT("Comparison");
break;
}
if (log)
LOG(Info, " > [{}] sampler ({})", entry.binding, samplerType);
#endif
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
entry.texture.sampleType = WGPUTextureSampleType_Undefined;
if (shaderResources[descriptor.Slot])
{
// Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_DEPTH)
auto ptr = (GPUResourceViewPtrWebGPU*)shaderResources[descriptor.Slot]->GetNativePtr();
if (ptr && ptr->TextureView)
entry.texture.sampleType = ptr->TextureView->SampleType;
}
#if WEBGPU_LOG_PSO
if (log)
{
switch (entry.texture.sampleType)
{
case WGPUTextureSampleType_BindingNotUsed:
samplerType = TEXT("BindingNotUsed");
break;
case WGPUTextureSampleType_Undefined:
samplerType = TEXT("Undefined");
break;
case WGPUTextureSampleType_Float:
samplerType = TEXT("Float");
break;
case WGPUTextureSampleType_UnfilterableFloat:
samplerType = TEXT("UnfilterableFloat");
break;
case WGPUTextureSampleType_Depth:
samplerType = TEXT("Depth");
break;
case WGPUTextureSampleType_Sint:
samplerType = TEXT("Sint");
break;
case WGPUTextureSampleType_Uint:
samplerType = TEXT("Uint");
break;
}
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture1D:
LOG(Info, " > [{}] texture 1D ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::Texture2D:
LOG(Info, " > [{}] texture 2D ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::Texture3D:
LOG(Info, " > [{}] texture 3D ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::TextureCube:
LOG(Info, " > [{}] texture Cube ({})", entry.binding, samplerType);
break;
case SpirvShaderResourceType::Texture2DArray:
LOG(Info, " > [{}] texture 2D array ({})", entry.binding, samplerType);
break;
}
}
#endif
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture1D:
entry.texture.viewDimension = WGPUTextureViewDimension_1D;
break;
case SpirvShaderResourceType::Texture2D:
entry.texture.viewDimension = WGPUTextureViewDimension_2D;
break;
case SpirvShaderResourceType::Texture3D:
entry.texture.viewDimension = WGPUTextureViewDimension_3D;
break;
case SpirvShaderResourceType::TextureCube:
entry.texture.viewDimension = WGPUTextureViewDimension_Cube;
break;
case SpirvShaderResourceType::Texture1DArray:
CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write)
break;
case SpirvShaderResourceType::Texture2DArray:
entry.texture.viewDimension = WGPUTextureViewDimension_2DArray;
break;
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV)
entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
else
entry.buffer.type = WGPUBufferBindingType_Storage;
#if WEBGPU_LOG_PSO
if (log)
LOG(Info, " > [{}] storage buffer (read-only = {}, dynamic = {})", entry.binding, entry.buffer.type == WGPUBufferBindingType_ReadOnlyStorage, entry.buffer.hasDynamicOffset);
#endif
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
entry.buffer.type = WGPUBufferBindingType_Uniform;
#if WEBGPU_LOG_PSO
if (log)
LOG(Info, " > [{}] uniform buffer (dynamic = {})", entry.binding, entry.buffer.hasDynamicOffset);
#endif
break;
default:
#if GPU_ENABLE_DIAGNOSTICS
LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(_debugName.Get(), _debugName.Count() - 1));
#else
CRASH;
#endif
return;
}
}
// Create a bind group layout
WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT;
bindGroupLayoutDesc.entryCount = entriesCount;
bindGroupLayoutDesc.entries = entriesPtr;
BindGroupLayouts[groupIndex] = wgpuDeviceCreateBindGroupLayout(_device->Device, &bindGroupLayoutDesc);
if (descriptors)
BindGroupLayouts[groupIndex] = CreateBindGroupLayout(_device->Device, bindings, groupIndex, *descriptors, entries, debugName, log);
}
// Create the pipeline layout
@@ -591,7 +667,6 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX
if (!PipelineDesc.layout)
{
LOG(Error, "wgpuDeviceCreatePipelineLayout failed");
return;
}
}

View File

@@ -38,17 +38,7 @@ public:
};
// Batches bind group description for the pipeline state. Used as a key for caching created bind groups.
struct BindGroupKey
{
uint32 Hash;
WGPUBindGroupLayout Layout;
mutable uint64 LastFrameUsed;
WGPUBindGroupEntry Entries[64];
uint8 EntriesCount;
uint8 Versions[64]; // Versions of descriptors used to differentiate when texture residency gets changed
bool operator==(const BindGroupKey& other) const;
};
typedef GPUBindGroupKeyWebGPU BindGroupKey;
private:
#if GPU_ENABLE_RESOURCE_NAMING
@@ -61,7 +51,7 @@ private:
WGPUVertexBufferLayout _vertexBuffers[GPU_MAX_VB_BINDED];
Dictionary<PipelineKey, WGPURenderPipeline> _pipelines;
Dictionary<BindGroupKey, WGPUBindGroup> _bindGroups;
uint64 _lastFrameBindGroupsGC = 0;
GPUBindGroupCacheWebGPU _bindGroupCache;
public:
GPUShaderProgramVSWebGPU* VS = nullptr;
@@ -78,13 +68,21 @@ public:
public:
// Gets the pipeline for the given rendering state. Pipelines are cached and reused for the same key.
WGPURenderPipeline GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]);
WGPURenderPipeline GetPipeline(const PipelineKey& key, const GPUContextBindingsWebGPU& bindings);
// Gets the bind group for the given key (unhashed). Bind groups are cached and reused for the same key.
WGPUBindGroup GetBindGroup(BindGroupKey& desc);
FORCE_INLINE WGPUBindGroup GetBindGroup(BindGroupKey& key)
{
#if GPU_ENABLE_RESOURCE_NAMING
StringAnsiView debugName(_debugName.Get(), _debugName.Count() - 1);
#else
StringAnsiView debugName;
#endif
return _bindGroupCache.Get(_device->Device, key, debugName);
}
private:
void InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]);
void InitLayout(const GPUContextBindingsWebGPU& bindings);
public:
// [GPUPipelineState]
@@ -97,6 +95,6 @@ protected:
};
uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key);
uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key);
uint32 GetHash(const GPUBindGroupKeyWebGPU& key);
#endif

View File

@@ -8,6 +8,42 @@
#include "Engine/GraphicsDevice/Vulkan/Types.h"
#include <webgpu/webgpu.h>
/// <summary>
/// Bundle of the current bound state to the Web GPU context (used to properly handle different texture layouts or samplers when building bind group layout).
/// </summary>
struct GPUContextBindingsWebGPU
{
GPUResourceView** ShaderResources; // [GPU_MAX_SR_BINDED]
};
/// <summary>
/// Batch of bind group descriptions for the layout. Used as a key for caching created bind groups.
/// </summary>
struct GPUBindGroupKeyWebGPU
{
uint32 Hash;
WGPUBindGroupLayout Layout;
mutable uint64 LastFrameUsed;
WGPUBindGroupEntry Entries[64];
uint8 EntriesCount;
uint8 Versions[64]; // Versions of descriptors used to differentiate when texture residency gets changed
bool operator==(const GPUBindGroupKeyWebGPU& other) const;
};
/// <summary>
/// Reusable utility for caching bind group objects. Handles reusing bind groups for the same key and releasing them when they are not used for a long time (based on the frame number).
/// </summary>
struct GPUBindGroupCacheWebGPU
{
private:
uint64 _lastFrameBindGroupsGC = 0;
Dictionary<GPUBindGroupKeyWebGPU, WGPUBindGroup> _bindGroups; // TODO: try using LRU cache
public:
WGPUBindGroup Get(WGPUDevice device, GPUBindGroupKeyWebGPU& key, const StringAnsiView& debugName, uint64 gcFrames = 50);
};
/// <summary>
/// Shaders base class for Web GPU backend.
/// </summary>
@@ -69,4 +105,39 @@ public:
}
};
/// <summary>
/// Compute Shader for Web GPU backend.
/// </summary>
class GPUShaderProgramCSWebGPU : public GPUShaderProgramWebGPU<GPUShaderProgramCS>
{
private:
WGPUComputePipeline _pipeline = nullptr;
WGPUBindGroupLayout _bindGroupLayout = nullptr;
GPUBindGroupCacheWebGPU _bindGroupCache;
public:
GPUShaderProgramCSWebGPU(const GPUShaderProgramInitializer& initializer, const SpirvShaderDescriptorInfo& descriptorInfo, WGPUShaderModule shaderModule)
: GPUShaderProgramWebGPU(initializer, descriptorInfo, shaderModule)
{
}
~GPUShaderProgramCSWebGPU()
{
if (_bindGroupLayout)
wgpuBindGroupLayoutRelease(_bindGroupLayout);
if (_pipeline)
wgpuComputePipelineRelease(_pipeline);
}
public:
// Gets the pipeline.
WGPUComputePipeline GetPipeline(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, WGPUBindGroupLayout& resultBindGroupLayout);
// Gets the bind group for the given key (unhashed). Bind groups are cached and reused for the same key.
FORCE_INLINE WGPUBindGroup GetBindGroup(WGPUDevice device, GPUBindGroupKeyWebGPU& key)
{
return _bindGroupCache.Get(device, key, _name, 60 * 60);
}
};
#endif

View File

@@ -79,10 +79,11 @@ GPUShaderProgram* GPUShaderWebGPU::CreateGPUShaderProgram(ShaderStage type, cons
break;
}
case ShaderStage::Pixel:
{
shader = New<GPUShaderProgramPSWebGPU>(initializer, header->DescriptorInfo, shaderModule);
break;
}
case ShaderStage::Compute:
shader = New<GPUShaderProgramCSWebGPU>(initializer, header->DescriptorInfo, shaderModule);
break;
}
return shader;
}

View File

@@ -843,10 +843,10 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node)
" {{\n"
" // Collision (depth)\n"
" float3 nextPos = {0} + {1} * DeltaTime;\n"
" nextPos = mul(float4(nextPos, 1), WorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
" nextPos = PROJECT_POINT(float4(nextPos, 1), WorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
" float3 viewPos = mul(float4(nextPos, 1), ViewMatrix);\n"
" float4 projPos = mul(float4(nextPos, 1), ViewProjectionMatrix);\n"
" float3 viewPos = PROJECT_POINT(float4(nextPos, 1), ViewMatrix);\n"
" float4 projPos = PROJECT_POINT(float4(nextPos, 1), ViewProjectionMatrix);\n"
" projPos.xyz /= projPos.w;\n"
" if (all(abs(projPos.xy) < 1.0f))\n"
" {{\n"
@@ -871,8 +871,8 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node)
" viewPos.z = linearDepth;\n"
" \n"
" {0} = mul(float4(viewPos, 1), InvViewMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
" {0} = mul(float4({0}, 1), InvWorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
" {0} = PROJECT_POINT(float4(viewPos, 1), InvViewMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
" {0} = PROJECT_POINT(float4({0}, 1), InvWorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation
COLLISION_LOGIC()
" }}\n"

View File

@@ -5,7 +5,7 @@
/// <summary>
/// Current GPU particles emitter shader version.
/// </summary>
#define PARTICLE_GPU_GRAPH_VERSION 11
#define PARTICLE_GPU_GRAPH_VERSION 12
#if COMPILE_WITH_PARTICLE_GPU_GRAPH

View File

@@ -54,7 +54,7 @@ bool ShouldSwap(float a, float b)
RWByteAddressBuffer IndirectArgsBuffer : register(u0);
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
[numthreads(22, 1, 1)]
void CS_IndirectArgs(uint groupIndex : SV_GroupIndex)
{
@@ -129,7 +129,7 @@ void StoreItem(uint element, uint count)
#ifdef _CS_PreSort
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
META_PERMUTATION_1(THREAD_GROUP_SIZE=1024)
META_PERMUTATION_1(THREAD_GROUP_SIZE=64)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
@@ -177,7 +177,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
#ifdef _CS_InnerSort
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
{
@@ -222,7 +222,7 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
RWBuffer<uint> SortedIndices : register(u0);
RWBuffer<float> SortingKeys : register(u1);
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
[numthreads(1024, 1, 1)]
void CS_OuterSort(uint3 dispatchThreadId : SV_DispatchThreadID)
{

View File

@@ -59,7 +59,7 @@
#else
#define CAN_USE_GATHER 0
#endif
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 || defined(WGSL)
#define CAN_USE_COMPUTE_SHADER 1
#else
#define CAN_USE_COMPUTE_SHADER 0
@@ -79,6 +79,7 @@
// Alias read-only Buffer binded as shader resource into StructuredBuffer to be used as storage on WebGPU (not supported)
#define CAN_USE_TYPED_BUFFER_LOADS 0
#define Buffer StructuredBuffer
#define RWBuffer RWStructuredBuffer
// Hack matrix multiplication order for WebGPU (row-major vs column-major bug?)
#define PROJECT_POINT(p, m) mul(m, p)

View File

@@ -35,7 +35,7 @@ float3 GetParticleVec3(uint particleIndex, int offset)
}
// Sorting keys generation shader
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
META_PERMUTATION_1(SORT_MODE=0)
META_PERMUTATION_1(SORT_MODE=1)
META_PERMUTATION_1(SORT_MODE=2)

View File

@@ -20,7 +20,7 @@ META_CB_END
RWStructuredBuffer<uint> HistogramBuffer : register(u0);
// Clears the histogram
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
[numthreads(THREADGROUP_SIZE_X, 1, 1)]
void CS_ClearHistogram(uint dispatchThreadId : SV_DispatchThreadID)
{
@@ -44,7 +44,7 @@ float ComputeHistogramPositionFromLuminance(float luminance)
groupshared uint SharedHistogram[HISTOGRAM_SIZE];
// Generates the histogram
META_CS(true, FEATURE_LEVEL_SM5)
META_CS(true, AUTO)
[numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)]
void CS_GenerateHistogram(uint3 dispatchThreadId : SV_DispatchThreadID, uint3 groupThreadId : SV_GroupThreadID)
{