diff --git a/Content/Editor/MaterialTemplates/GPUParticles.shader b/Content/Editor/MaterialTemplates/GPUParticles.shader index 5900743e3..108e7d648 100644 --- a/Content/Editor/MaterialTemplates/GPUParticles.shader +++ b/Content/Editor/MaterialTemplates/GPUParticles.shader @@ -67,7 +67,7 @@ float Rand(inout uint seed) float3 ReprojectPosition(float2 uv, float rawDepth) { uv = uv * float2(2.0, -2.0) + float2(-1.0, 1.0); - float4 pos = mul(float4(uv.x, uv.y, rawDepth, 1.0f), InvViewProjectionMatrix); + float4 pos = PROJECT_POINT(float4(uv.x, uv.y, rawDepth, 1.0f), InvViewProjectionMatrix); return pos.xyz / pos.w; } @@ -158,7 +158,7 @@ void SpawnParticle(Context context) @4} // Main entry point for the particles simulation and spawning -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) [numthreads(THREAD_GROUP_SIZE, 1, 1)] void CS_Main(uint3 dispatchThreadId : SV_DispatchThreadID) { diff --git a/Content/Shaders/BitonicSort.flax b/Content/Shaders/BitonicSort.flax index 4d388b3fc..13fe4aa0f 100644 --- a/Content/Shaders/BitonicSort.flax +++ b/Content/Shaders/BitonicSort.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:190867e40ef793168988f358edddeb92819cc4f972f4cf9ac34cc764a06eb6e3 -size 6824 +oid sha256:1fc26ecbb38b0096d876437a174f83a1c3fa04d248930c6b2ddfe1b6e9d6b8b7 +size 6764 diff --git a/Content/Shaders/GPUParticlesSorting.flax b/Content/Shaders/GPUParticlesSorting.flax index 35cebf7b6..e20e3706b 100644 --- a/Content/Shaders/GPUParticlesSorting.flax +++ b/Content/Shaders/GPUParticlesSorting.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80ed5f51cd982ea521e3588708db54d79b905ee41e88cfd41eff976b9b50514a -size 2518 +oid sha256:70f688f65f2858a885e6c9b4cc34c0d364a7e3d77068cde98d42b8b609483e89 +size 2504 diff --git a/Content/Shaders/Histogram.flax b/Content/Shaders/Histogram.flax index c23d93973..4a59dce84 100644 --- a/Content/Shaders/Histogram.flax +++ b/Content/Shaders/Histogram.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0f6f453083d99451586af7591f17275413c431281847ad8c267eb9b4c8d9136 -size 2792 +oid sha256:cf20a5d44194de2da7368860ed22ec2966eabccfc28ccc5b7bcdcb67db65910a +size 2758 diff --git a/Source/Engine/Graphics/RenderTools.cpp b/Source/Engine/Graphics/RenderTools.cpp index d658b902c..94226d4b4 100644 --- a/Source/Engine/Graphics/RenderTools.cpp +++ b/Source/Engine/Graphics/RenderTools.cpp @@ -274,7 +274,7 @@ FeatureLevel RenderTools::GetFeatureLevel(ShaderProfile profile) case ShaderProfile::GLSL_410: case ShaderProfile::Unknown: case ShaderProfile::WebGPU: - return FeatureLevel::ES2; + return FeatureLevel::ES3_1; default: return FeatureLevel::ES2; } @@ -293,6 +293,8 @@ ShaderProfileFeatures RenderTools::GetShaderProfileFeatures(ShaderProfile profil return ShaderProfileFeatures::ComputeShaders | ShaderProfileFeatures::GeometryShaders; case ShaderProfile::DirectX_SM4: return ShaderProfileFeatures::GeometryShaders; + case ShaderProfile::WebGPU: + return ShaderProfileFeatures::ComputeShaders; default: return ShaderProfileFeatures::None; } diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp index e16a9f76e..ec28b4c56 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUPipelineStateVulkan.cpp @@ -10,6 +10,7 @@ #include "Engine/Core/Log.h" #include "Engine/Core/Types/Pair.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Graphics/PixelFormatExtensions.h" static VkStencilOp ToVulkanStencilOp(const StencilOperation value) @@ -91,6 +92,7 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState() if (_pipelineState) return _pipelineState; PROFILE_CPU(); + PROFILE_MEM(GraphicsShaders); ZoneText(*_name, _name.Length()); // Create pipeline layout @@ -224,6 +226,7 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer return pipeline; } PROFILE_CPU(); + PROFILE_MEM(GraphicsShaders); #if !BUILD_RELEASE DebugName name; GetDebugName(name); diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp index 5b172474e..6219ba38e 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp @@ -72,6 +72,8 @@ bool GPUBufferWebGPU::OnInit() { case GPUResourceUsage::Default: bufferDesc.usage |= WGPUBufferUsage_CopyDst; + if (IsUnorderedAccess()) + bufferDesc.usage |= WGPUBufferUsage_CopySrc; // eg. GPU particles copy particle counter between buffers break; case GPUResourceUsage::Dynamic: if (bufferDesc.usage == 0) // WebGPU doesn't allow to map-write Index/Vertex/Storage buffers diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp index bf99dfb0c..367b939c8 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp @@ -368,8 +368,9 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data) void GPUContextWebGPU::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ) { - OnDispatch(shader); - MISSING_CODE("GPUContextWebGPU::Dispatch"); + auto computePass = OnDispatch(shader); + wgpuComputePassEncoderDispatchWorkgroups(computePass, threadGroupCountX, threadGroupCountY, threadGroupCountZ); + EndComputePass(computePass); RENDER_STAT_DISPATCH_CALL(); } @@ -377,8 +378,9 @@ void GPUContextWebGPU::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b { ASSERT(bufferForArgs && EnumHasAnyFlags(bufferForArgs->GetFlags(), GPUBufferFlags::Argument)); auto bufferForArgsWebGPU = (GPUBufferWebGPU*)bufferForArgs; - OnDispatch(shader); - MISSING_CODE("GPUContextWebGPU::Dispatch"); + auto computePass = OnDispatch(shader); + wgpuComputePassEncoderDispatchWorkgroupsIndirect(computePass, bufferForArgsWebGPU->Buffer, offsetForArgs); + EndComputePass(computePass); RENDER_STAT_DISPATCH_CALL(); } @@ -865,7 +867,7 @@ void GPUContextWebGPU::OnDrawCall() if (_pipelineDirty) { _pipelineDirty = false; - WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey, _shaderResources) : nullptr; + WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey, { _shaderResources }) : nullptr; wgpuRenderPassEncoderSetPipeline(_renderPass, pipeline); RENDER_STAT_PS_STATE_CHANGE(); @@ -898,9 +900,38 @@ void GPUContextWebGPU::OnDrawCall() } } -void GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader) +WGPUComputePassEncoder GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader) { - // TODO: add compute shaders support + // End existing render pass (if any) + if (_renderPass) + EndRenderPass(); + + // Flush pending clears + FlushState(); + + // Start a new compute pass + WGPUComputePassDescriptor computePassDesc = WGPU_COMPUTE_PASS_DESCRIPTOR_INIT; + FlushTimestamps(1); + if (_pendingTimestampWrites.HasItems()) + computePassDesc.timestampWrites = &_pendingTimestampWrites.Last(); + _pendingTimestampWrites.Clear(); + auto computePass = wgpuCommandEncoderBeginComputePass(Encoder, &computePassDesc); + ASSERT(computePass); + + // Set pipeline + GPUPipelineStateWebGPU::BindGroupKey key; + auto shaderWebGPU = (GPUShaderProgramCSWebGPU*)shader; + WGPUComputePipeline pipeline = shaderWebGPU->GetPipeline(_device->Device, { _shaderResources }, key.Layout); + wgpuComputePassEncoderSetPipeline(computePass, pipeline); + + // Set bind group + uint32 dynamicOffsets[DynamicOffsetsMax]; + uint32 dynamicOffsetsCount = 0; + BuildBindGroup(0, shaderWebGPU->DescriptorInfo, key, dynamicOffsets, dynamicOffsetsCount); + WGPUBindGroup bindGroup = shaderWebGPU->GetBindGroup(_device->Device, key); + wgpuComputePassEncoderSetBindGroup(computePass, 0, bindGroup, dynamicOffsetsCount, dynamicOffsets); + + return computePass; } void GPUContextWebGPU::EndRenderPass() @@ -910,6 +941,13 @@ void GPUContextWebGPU::EndRenderPass() _renderPass = nullptr; } +void GPUContextWebGPU::EndComputePass(WGPUComputePassEncoder computePass) +{ + wgpuComputePassEncoderEnd(computePass); + wgpuComputePassEncoderRelease(computePass); + computePass = nullptr; +} + void GPUContextWebGPU::FlushRenderPass() { _renderPassDirty = false; @@ -1033,138 +1071,17 @@ void GPUContextWebGPU::FlushBindGroup() // Each shader stage (Vertex, Pixel) uses a separate bind group GPUPipelineStateWebGPU::BindGroupKey key; - for (int32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++) + uint32 dynamicOffsets[DynamicOffsetsMax]; + for (uint32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++) { auto descriptors = _pipelineState->BindGroupDescriptors[groupIndex]; key.Layout = _pipelineState->BindGroupLayouts[groupIndex]; if (!descriptors || !key.Layout) continue; - // Build descriptors for the bind group - auto entriesCount = descriptors->DescriptorTypesCount; - uint32 dynamicOffsets[4]; + // Build descriptors uint32 dynamicOffsetsCount = 0; - static_assert(ARRAY_COUNT(key.Entries) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group entries array."); - static_assert(ARRAY_COUNT(key.Versions) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group versions array."); - key.EntriesCount = entriesCount; - auto entriesPtr = key.Entries; - auto versionsPtr = key.Versions; - Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry)); - Platform::MemoryClear(versionsPtr, ((entriesCount + 3) & ~0x3) * sizeof(uint8)); - for (int32 index = 0; index < entriesCount; index++) - { - auto& descriptor = descriptors->DescriptorTypes[index]; - auto& entry = entriesPtr[index]; - entry.binding = descriptor.Binding; - entry.size = WGPU_WHOLE_SIZE; - switch (descriptor.DescriptorType) - { - case VK_DESCRIPTOR_TYPE_SAMPLER: - { - GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot]; - if (!sampler) - sampler = _device->DefaultSamplers[0]; // Fallback - entry.sampler = sampler->Sampler; - break; - } - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - { - ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV); - auto view = _shaderResources[descriptor.Slot]; - auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; - if (ptr && ptr->TextureView) - { - entry.textureView = ptr->TextureView->View; - versionsPtr[index] = ptr->Version; - } - if (!entry.textureView) - { - // Fallback - auto defaultTexture = _device->DefaultTexture[(int32)descriptor.ResourceType]; - if (!defaultTexture) - { - LOG(Error, "Missing default resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType); - CRASH; - } - switch (descriptor.ResourceType) - { - case SpirvShaderResourceType::Texture3D: - view = defaultTexture->ViewVolume(); - break; - case SpirvShaderResourceType::Texture1DArray: - case SpirvShaderResourceType::Texture2DArray: - view = defaultTexture->ViewArray(); - break; - default: - view = defaultTexture->View(0); - break; - } - ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr(); - entry.textureView = ptr->TextureView->View; - } - break; - } - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - { - ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]); - GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot]; - auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; - if (ptr && ptr->BufferView) - { - entry.buffer = ptr->BufferView->Buffer; - entry.size = ((GPUBufferWebGPU*)view->GetParent())->GetSize(); - versionsPtr[index] = (uint64)ptr->Version; - } - if (!entry.buffer) - entry.buffer = _device->DefaultBuffer; // Fallback - break; - } - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - { - GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot]; - if (uniform && uniform->Allocation.Buffer) - { - entry.buffer = uniform->Allocation.Buffer; - entry.size = uniform->AllocationSize; - if (descriptor.DescriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) - entry.offset = uniform->Allocation.Offset; - else - dynamicOffsets[dynamicOffsetsCount++] = uniform->Allocation.Offset; - } - else - LOG(Fatal, "Missing constant buffer at slot {}", descriptor.Slot); - break; - } - default: -#if GPU_ENABLE_DIAGNOSTICS - LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType); -#else - CRASH; -#endif - return; - } - } - -#if BUILD_DEBUG - // Validate - for (int32 i = 0; i < entriesCount; i++) - { - auto& e = entriesPtr[i]; - if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1) - { - LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName()); - LOG(Error, " > sampler: {}", (uint32)e.sampler); - LOG(Error, " > textureView: {}", (uint32)e.textureView); - LOG(Error, " > buffer: {}", (uint32)e.buffer); - } - } - ASSERT(dynamicOffsetsCount <= ARRAY_COUNT(dynamicOffsets)); -#endif + BuildBindGroup(groupIndex, *descriptors, key, dynamicOffsets, dynamicOffsetsCount); // Bind group WGPUBindGroup bindGroup = _pipelineState->GetBindGroup(key); @@ -1197,4 +1114,131 @@ void GPUContextWebGPU::FlushTimestamps(int32 skipLast) } } +void GPUContextWebGPU::BuildBindGroup(uint32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, GPUPipelineStateWebGPU::BindGroupKey& key, uint32 dynamicOffsets[DynamicOffsetsMax], uint32& dynamicOffsetsCount) +{ + // Build descriptors for the bind group + auto entriesCount = descriptors.DescriptorTypesCount; + static_assert(ARRAY_COUNT(key.Entries) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group entries array."); + static_assert(ARRAY_COUNT(key.Versions) == SpirvShaderDescriptorInfo::MaxDescriptors, "Invalid size of bind group versions array."); + key.EntriesCount = entriesCount; + auto entriesPtr = key.Entries; + auto versionsPtr = key.Versions; + Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry)); + Platform::MemoryClear(versionsPtr, ((entriesCount + 3) & ~0x3) * sizeof(uint8)); + for (int32 index = 0; index < entriesCount; index++) + { + auto& descriptor = descriptors.DescriptorTypes[index]; + auto& entry = entriesPtr[index]; + entry.binding = descriptor.Binding; + entry.size = WGPU_WHOLE_SIZE; + switch (descriptor.DescriptorType) + { + case VK_DESCRIPTOR_TYPE_SAMPLER: + { + GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot]; + if (!sampler) + sampler = _device->DefaultSamplers[0]; // Fallback + entry.sampler = sampler->Sampler; + break; + } + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + { + ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV); + auto view = _shaderResources[descriptor.Slot]; + auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; + if (ptr && ptr->TextureView) + { + entry.textureView = ptr->TextureView->View; + versionsPtr[index] = ptr->Version; + } + if (!entry.textureView) + { + // Fallback + auto defaultTexture = _device->DefaultTexture[(int32)descriptor.ResourceType]; + if (!defaultTexture) + { + LOG(Error, "Missing default resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType); + CRASH; + } + switch (descriptor.ResourceType) + { + case SpirvShaderResourceType::Texture3D: + view = defaultTexture->ViewVolume(); + break; + case SpirvShaderResourceType::Texture1DArray: + case SpirvShaderResourceType::Texture2DArray: + view = defaultTexture->ViewArray(); + break; + default: + view = defaultTexture->View(0); + break; + } + ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr(); + entry.textureView = ptr->TextureView->View; + } + break; + } + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + { + ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]); + GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot]; + auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; + if (ptr && ptr->BufferView) + { + entry.buffer = ptr->BufferView->Buffer; + entry.size = ((GPUBufferWebGPU*)view->GetParent())->GetSize(); + versionsPtr[index] = (uint64)ptr->Version; + } + if (!entry.buffer) + entry.buffer = _device->DefaultBuffer; // Fallback + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + { + GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot]; + if (uniform && uniform->Allocation.Buffer) + { + entry.buffer = uniform->Allocation.Buffer; + entry.size = uniform->AllocationSize; + if (descriptor.DescriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) + entry.offset = uniform->Allocation.Offset; + else + dynamicOffsets[dynamicOffsetsCount++] = uniform->Allocation.Offset; + } + else + LOG(Fatal, "Missing constant buffer at slot {}", descriptor.Slot); + break; + } + default: +#if GPU_ENABLE_DIAGNOSTICS + LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType); +#else + CRASH; +#endif + return; + } + } + +#if BUILD_DEBUG + // Validate + for (int32 i = 0; i < entriesCount; i++) + { + auto& e = entriesPtr[i]; + if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1) + { + LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName()); + LOG(Error, " > sampler: {}", (uint32)e.sampler); + LOG(Error, " > textureView: {}", (uint32)e.textureView); + LOG(Error, " > buffer: {}", (uint32)e.buffer); + } + } + ASSERT(dynamicOffsetsCount <= DynamicOffsetsMax); +#endif +} + #endif diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h index 140d96618..694e78c34 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h @@ -93,11 +93,14 @@ private: bool FindClear(const GPUTextureViewWebGPU* view, PendingClear& clear); void ManualClear(const PendingClear& clear); void OnDrawCall(); - void OnDispatch(GPUShaderProgramCS* shader); + WGPUComputePassEncoder OnDispatch(GPUShaderProgramCS* shader); void EndRenderPass(); + void EndComputePass(WGPUComputePassEncoder computePass); void FlushRenderPass(); void FlushBindGroup(); void FlushTimestamps(int32 skipLast = 0); + constexpr static int32 DynamicOffsetsMax = 4; + void BuildBindGroup(uint32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, GPUPipelineStateWebGPU::BindGroupKey& key, uint32 dynamicOffsets[DynamicOffsetsMax], uint32& dynamicOffsetsCount); public: // [GPUContext] diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp index 585a7a4fa..57f9023d8 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp @@ -294,6 +294,14 @@ bool GPUDeviceWebGPU::Init() { MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment; TimestampQuery = features.Contains(WGPUFeatureName_TimestampQuery); + Limits.HasCompute = + limits.maxStorageBuffersPerShaderStage >= GPU_MAX_UA_BINDED && + limits.maxStorageTexturesPerShaderStage >= GPU_MAX_UA_BINDED && + limits.maxComputeWorkgroupsPerDimension >= GPU_MAX_CS_DISPATCH_THREAD_GROUPS && + limits.maxComputeWorkgroupSizeX >= 1024 && + limits.maxComputeWorkgroupSizeY >= 256 && + limits.maxComputeWorkgroupSizeZ >= 8 && + limits.maxBufferSize >= 64 * 1024 * 1024; // 64MB Limits.HasInstancing = true; Limits.HasDrawIndirect = true; Limits.HasDepthAsSRV = true; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp index 112368cd1..467a76ef3 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp @@ -142,6 +142,229 @@ WGPUBlendComponent ToBlendComponent(BlendingMode::Operation blendOp, BlendingMod return result; } +typedef Array> BindGroupEntries; + +WGPUBindGroupLayout CreateBindGroupLayout(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, int32 groupIndex, const SpirvShaderDescriptorInfo& descriptors, BindGroupEntries& entries, const StringAnsiView& debugName, bool log, bool compute = false) +{ + int32 entriesCount = descriptors.DescriptorTypesCount; + if (entriesCount == 0) + return nullptr; + auto entriesPtr = entries.Get(); + ASSERT_LOW_LAYER(entries.Count() >= entriesCount); + Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount); + auto visibility = compute ? WGPUShaderStage_Compute : (groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment); +#if WEBGPU_LOG_PSO + if (log) + LOG(Info, " > group {} - {}", groupIndex, compute ? TEXT("Compute") : (groupIndex == 0 ? TEXT("Vertex") : TEXT("Fragment"))); + const Char* samplerType = TEXT("?"); +#endif + for (int32 index = 0; index < entriesCount; index++) + { + auto& descriptor = descriptors.DescriptorTypes[index]; + auto& entry = entriesPtr[index]; + entry.binding = descriptor.Binding; + entry.bindingArraySize = descriptor.Count; + entry.visibility = visibility; + switch (descriptor.DescriptorType) + { + case VK_DESCRIPTOR_TYPE_SAMPLER: + entry.sampler.type = WGPUSamplerBindingType_Undefined; + if (descriptor.Slot == 4 || descriptor.Slot == 5) // Hack for ShadowSampler and ShadowSamplerLinear (this could get binded samplers table just like for shaderResources) + entry.sampler.type = WGPUSamplerBindingType_Comparison; +#if WEBGPU_LOG_PSO + switch (entry.sampler.type) + { + case WGPUSamplerBindingType_BindingNotUsed: + samplerType = TEXT("BindingNotUsed"); + break; + case WGPUSamplerBindingType_Undefined: + samplerType = TEXT("Undefined"); + break; + case WGPUSamplerBindingType_Filtering: + samplerType = TEXT("Filtering"); + break; + case WGPUSamplerBindingType_NonFiltering: + samplerType = TEXT("NonFiltering"); + break; + case WGPUSamplerBindingType_Comparison: + samplerType = TEXT("Comparison"); + break; + } + if (log) + LOG(Info, " > [{}] sampler ({})", entry.binding, samplerType); +#endif + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + entry.texture.sampleType = WGPUTextureSampleType_Undefined; + if (bindings.ShaderResources[descriptor.Slot]) + { + // Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_DEPTH) + auto ptr = (GPUResourceViewPtrWebGPU*)bindings.ShaderResources[descriptor.Slot]->GetNativePtr(); + if (ptr && ptr->TextureView) + entry.texture.sampleType = ptr->TextureView->SampleType; + } +#if WEBGPU_LOG_PSO + if (log) + { + switch (entry.texture.sampleType) + { + case WGPUTextureSampleType_BindingNotUsed: + samplerType = TEXT("BindingNotUsed"); + break; + case WGPUTextureSampleType_Undefined: + samplerType = TEXT("Undefined"); + break; + case WGPUTextureSampleType_Float: + samplerType = TEXT("Float"); + break; + case WGPUTextureSampleType_UnfilterableFloat: + samplerType = TEXT("UnfilterableFloat"); + break; + case WGPUTextureSampleType_Depth: + samplerType = TEXT("Depth"); + break; + case WGPUTextureSampleType_Sint: + samplerType = TEXT("Sint"); + break; + case WGPUTextureSampleType_Uint: + samplerType = TEXT("Uint"); + break; + } + switch (descriptor.ResourceType) + { + case SpirvShaderResourceType::Texture1D: + LOG(Info, " > [{}] texture 1D ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::Texture2D: + LOG(Info, " > [{}] texture 2D ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::Texture3D: + LOG(Info, " > [{}] texture 3D ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::TextureCube: + LOG(Info, " > [{}] texture Cube ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::Texture2DArray: + LOG(Info, " > [{}] texture 2D array ({})", entry.binding, samplerType); + break; + } + } +#endif + switch (descriptor.ResourceType) + { + case SpirvShaderResourceType::Texture1D: + entry.texture.viewDimension = WGPUTextureViewDimension_1D; + break; + case SpirvShaderResourceType::Texture2D: + entry.texture.viewDimension = WGPUTextureViewDimension_2D; + break; + case SpirvShaderResourceType::Texture3D: + entry.texture.viewDimension = WGPUTextureViewDimension_3D; + break; + case SpirvShaderResourceType::TextureCube: + entry.texture.viewDimension = WGPUTextureViewDimension_Cube; + break; + case SpirvShaderResourceType::Texture1DArray: + CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write) + break; + case SpirvShaderResourceType::Texture2DArray: + entry.texture.viewDimension = WGPUTextureViewDimension_2DArray; + break; + } + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + entry.buffer.hasDynamicOffset = true; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV) + entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage; + else + entry.buffer.type = WGPUBufferBindingType_Storage; +#if WEBGPU_LOG_PSO + if (log) + LOG(Info, " > [{}] storage buffer (read-only = {}, dynamic = {})", entry.binding, entry.buffer.type == WGPUBufferBindingType_ReadOnlyStorage, entry.buffer.hasDynamicOffset); +#endif + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + entry.buffer.hasDynamicOffset = true; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + entry.buffer.type = WGPUBufferBindingType_Uniform; +#if WEBGPU_LOG_PSO + if (log) + LOG(Info, " > [{}] uniform buffer (dynamic = {})", entry.binding, entry.buffer.hasDynamicOffset); +#endif + break; + default: +#if GPU_ENABLE_DIAGNOSTICS + LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(debugName)); +#else + CRASH; +#endif + return nullptr; + } + } + + // Create a bind group layout + WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT; + bindGroupLayoutDesc.entryCount = entriesCount; + bindGroupLayoutDesc.entries = entriesPtr; + return wgpuDeviceCreateBindGroupLayout(device, &bindGroupLayoutDesc); +} + +WGPUComputePipeline GPUShaderProgramCSWebGPU::GetPipeline(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, WGPUBindGroupLayout& resultBindGroupLayout) +{ + resultBindGroupLayout = _bindGroupLayout; + if (_pipeline) + return _pipeline; + PROFILE_CPU(); + ZoneText(*_name, _name.Length()); +#if WEBGPU_LOG_PSO +#ifdef WEBGPU_LOG_PSO_NAME + const bool log = _name.Contains(WEBGPU_LOG_PSO_NAME); +#else + const bool log = true; +#endif + if (log) + LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_name)); +#endif + + // Create layout bind group + BindGroupEntries entries; + entries.Resize(DescriptorInfo.DescriptorTypesCount); + _bindGroupLayout = CreateBindGroupLayout(device, bindings, 0, DescriptorInfo, entries, _name, log, true); + resultBindGroupLayout = _bindGroupLayout; + + // Create the pipeline layout + WGPUPipelineLayoutDescriptor layoutDesc = WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT; +#if GPU_ENABLE_RESOURCE_NAMING + layoutDesc.label = { _name.Get(), (size_t)_name.Length() }; +#endif + layoutDesc.bindGroupLayoutCount = 1; + layoutDesc.bindGroupLayouts = &_bindGroupLayout; + auto layout = wgpuDeviceCreatePipelineLayout(device, &layoutDesc); + if (!layout) + { + LOG(Error, "wgpuDeviceCreatePipelineLayout failed"); + return nullptr; + } + + // Create pipeline + WGPUComputePipelineDescriptor desc = WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT; +#if GPU_ENABLE_RESOURCE_NAMING + desc.label = layoutDesc.label; +#endif + desc.layout = layout; + desc.compute.module = ShaderModule; + _pipeline = wgpuDeviceCreateComputePipeline(device , &desc); + if (!_pipeline) + { +#if GPU_ENABLE_RESOURCE_NAMING + LOG(Error, "wgpuDeviceCreateComputePipeline failed for {}", String(_name)); +#endif + } + + return _pipeline; +} + void GPUPipelineStateWebGPU::OnReleaseGPU() { VS = nullptr; @@ -176,12 +399,12 @@ uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key) return hash; } -uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key) +uint32 GetHash(const GPUBindGroupKeyWebGPU& key) { return key.Hash; } -bool GPUPipelineStateWebGPU::BindGroupKey::operator==(const BindGroupKey& other) const +bool GPUBindGroupKeyWebGPU::operator==(const GPUBindGroupKeyWebGPU& other) const { return Hash == other.Hash && Layout == other.Layout @@ -190,28 +413,132 @@ bool GPUPipelineStateWebGPU::BindGroupKey::operator==(const BindGroupKey& other) && Platform::MemoryCompare(&Versions, &other.Versions, EntriesCount * sizeof(uint8)) == 0; } -WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]) +WGPUBindGroup GPUBindGroupCacheWebGPU::Get(WGPUDevice device, GPUBindGroupKeyWebGPU& key, const StringAnsiView& debugName, uint64 gcFrames) { - WGPURenderPipeline pipeline; - if (_pipelines.TryGet(key, pipeline)) - return pipeline; - PROFILE_CPU(); - PROFILE_MEM(GraphicsCommands); -#if GPU_ENABLE_RESOURCE_NAMING - ZoneText(_debugName.Get(), _debugName.Count() - 1); -#endif -#if WEBGPU_LOG_PSO - LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_debugName.Get(), _debugName.Count() - 1)); +#if WEBGPU_LOG_BIND_GROUPS #ifdef WEBGPU_LOG_PSO_NAME - const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME); + const bool log = debugName.Contains(WEBGPU_LOG_PSO_NAME); #else const bool log = true; #endif #endif + // Generate a hash for the key + key.LastFrameUsed = Engine::FrameCount; + key.Hash = Crc::MemCrc32(&key.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)); + CombineHash(key.Hash, GetHash(key.EntriesCount)); + CombineHash(key.Hash, GetHash(key.Layout)); + CombineHash(key.Hash, Crc::MemCrc32(&key.Versions, key.EntriesCount * sizeof(uint8))); + + // Lookup for existing bind group + WGPUBindGroup bindGroup; + auto found = _bindGroups.Find(key); + if (found.IsNotEnd()) + { + // Get cached bind group and update the last usage frame + bindGroup = found->Value; + found->Key.LastFrameUsed = key.LastFrameUsed; + + // Periodically remove old bind groups (unused for some time) + if (key.LastFrameUsed - _lastFrameBindGroupsGC > gcFrames * 2) + { + _lastFrameBindGroupsGC = key.LastFrameUsed; + int32 freed = 0; + for (auto it = _bindGroups.Begin(); it.IsNotEnd(); ++it) + { + if (key.LastFrameUsed - it->Key.LastFrameUsed > gcFrames) + { + freed++; + wgpuBindGroupRelease(it->Value); + _bindGroups.Remove(it); + } + } +#if WEBGPU_LOG_BIND_GROUPS + if (freed > 0 && log) + LOG(Info, "[WebGPU] Removed {} old entries from '{}'", freed, String(debugName)); +#endif + } + + return bindGroup; + } + PROFILE_CPU(); + PROFILE_MEM(GraphicsShaders); +#if GPU_ENABLE_RESOURCE_NAMING + ZoneText(debugName.Get(), debugName.Length()); +#endif +#if WEBGPU_LOG_BIND_GROUPS + if (log) + LOG(Info, "[WebGPU] GetBindGroup: '{}', hash: {}", String(debugName), key.Hash); +#endif + + // Build description + WGPUBindGroupDescriptor desc = WGPU_BIND_GROUP_DESCRIPTOR_INIT; +#if GPU_ENABLE_RESOURCE_NAMING + desc.label = { debugName.Get(), (size_t)debugName.Length() }; +#endif + desc.layout = key.Layout; + desc.entryCount = key.EntriesCount; + desc.entries = key.Entries; + + // Create object + bindGroup = wgpuDeviceCreateBindGroup(device, &desc); + if (!bindGroup) + { +#if GPU_ENABLE_RESOURCE_NAMING + LOG(Error, "wgpuDeviceCreateBindGroup failed for {}", String(debugName)); +#endif + return nullptr; + } + +#if WEBGPU_LOG_BIND_GROUPS + // Debug detection of hash collisions + int32 collisions = 0, equalLayout = 0, equalEntries = 0, equalVersions = 0; + for (auto& e : _bindGroups) + { + auto& other = e.Key; + if (key.Hash == other.Hash) + { + collisions++; + if (key.Layout == other.Layout) + equalLayout++; + if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Entries, &other.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)) == 0) + equalEntries++; + if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Versions, &other.Versions, key.EntriesCount * sizeof(uint8)) == 0) + equalVersions++; + } + } + if (collisions > 1 && log) + LOG(Error, "> Hash collision! {}/{} (capacity: {}), equalLayout: {}, equalEntries: {}, equalVersions: {}", collisions, _bindGroups.Count(), _bindGroups.Capacity(), equalLayout, equalEntries, equalVersions); +#endif + + // Cache it + _bindGroups.Add(key, bindGroup); + return bindGroup; +} + +WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, const GPUContextBindingsWebGPU& bindings) +{ + WGPURenderPipeline pipeline; + if (_pipelines.TryGet(key, pipeline)) + return pipeline; + PROFILE_CPU(); + PROFILE_MEM(GraphicsShaders); +#if GPU_ENABLE_RESOURCE_NAMING + ZoneText(_debugName.Get(), _debugName.Count() - 1); +#endif +#if WEBGPU_LOG_PSO +#ifdef WEBGPU_LOG_PSO_NAME + const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME); +#else + const bool log = true; +#endif + if (log) + LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_debugName.Get(), _debugName.Count() - 1)); +#endif + // Lazy-init layout (cannot do it during Init as texture samplers that access eg. depth need to explicitly use UnfilterableFloat) if (!PipelineDesc.layout) - InitLayout(shaderResources); + InitLayout(bindings); // Build final pipeline description _depthStencilDesc.format = (WGPUTextureFormat)key.DepthStencilFormat; @@ -295,107 +622,16 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const PipelineKey& key, G return pipeline; } -WGPUBindGroup GPUPipelineStateWebGPU::GetBindGroup(BindGroupKey& key) +void GPUPipelineStateWebGPU::InitLayout(const GPUContextBindingsWebGPU& bindings) { - // Generate a hash for the key - key.LastFrameUsed = Engine::FrameCount; - key.Hash = Crc::MemCrc32(&key.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)); - CombineHash(key.Hash, GetHash(key.EntriesCount)); - CombineHash(key.Hash, GetHash(key.Layout)); - CombineHash(key.Hash, Crc::MemCrc32(&key.Versions, key.EntriesCount * sizeof(uint8))); - - // Lookup for existing bind group - WGPUBindGroup bindGroup; - auto found = _bindGroups.Find(key); - if (found.IsNotEnd()) - { - // Get cached bind group and update the last usage frame - bindGroup = found->Value; - found->Key.LastFrameUsed = key.LastFrameUsed; - - // Periodically remove old bind groups (unused for some time) - if (key.LastFrameUsed - _lastFrameBindGroupsGC > 100) - { - _lastFrameBindGroupsGC = key.LastFrameUsed; - int32 freed = 0; - for (auto it = _bindGroups.Begin(); it.IsNotEnd(); ++it) - { - if (key.LastFrameUsed - it->Key.LastFrameUsed > 50) - { - freed++; - wgpuBindGroupRelease(it->Value); - _bindGroups.Remove(it); - } - } -#if WEBGPU_LOG_BIND_GROUPS - if (freed > 0) - { - LOG(Info, "[WebGPU] Removed {} old entries from '{}'", freed, String(_debugName.Get(), _debugName.Count() - 1)); - } -#endif - } - - return bindGroup; - } - PROFILE_CPU(); - PROFILE_MEM(GraphicsCommands); #if GPU_ENABLE_RESOURCE_NAMING - ZoneText(_debugName.Get(), _debugName.Count() - 1); + StringAnsiView debugName(_debugName.Get(), _debugName.Count() - 1); +#else + StringAnsiView debugName; #endif -#if WEBGPU_LOG_BIND_GROUPS - LOG(Info, "[WebGPU] GetBindGroup: '{}', hash: {}", String(_debugName.Get(), _debugName.Count() - 1), key.Hash); -#endif - - // Build description - WGPUBindGroupDescriptor desc = WGPU_BIND_GROUP_DESCRIPTOR_INIT; -#if GPU_ENABLE_RESOURCE_NAMING - desc.label = PipelineDesc.label; -#endif - desc.layout = key.Layout; - desc.entryCount = key.EntriesCount; - desc.entries = key.Entries; - - // Create object - bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &desc); - if (!bindGroup) - { -#if GPU_ENABLE_RESOURCE_NAMING - LOG(Error, "wgpuDeviceCreateBindGroup failed for {}", String(_debugName.Get(), _debugName.Count() - 1)); -#endif - return nullptr; - } - -#if WEBGPU_LOG_BIND_GROUPS - // Debug detection of hash collisions - int32 collisions = 0, equalLayout = 0, equalEntries = 0, equalVersions = 0; - for (auto& e : _bindGroups) - { - auto& other = e.Key; - if (key.Hash == other.Hash) - { - collisions++; - if (key.Layout == other.Layout) - equalLayout++; - if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Entries, &other.Entries, key.EntriesCount * sizeof(WGPUBindGroupEntry)) == 0) - equalEntries++; - if (key.EntriesCount == other.EntriesCount && Platform::MemoryCompare(&key.Versions, &other.Versions, key.EntriesCount * sizeof(uint8)) == 0) - equalVersions++; - } - } - if (collisions > 1) - LOG(Error, "> Hash collision! {}/{} (capacity: {}), equalLayout: {}, equalEntries: {}, equalVersions: {}", collisions, _bindGroups.Count(), _bindGroups.Capacity(), equalLayout, equalEntries, equalVersions); -#endif - - // Cache it - _bindGroups.Add(key, bindGroup); - return bindGroup; -} - -void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]) -{ #if WEBGPU_LOG_PSO #ifdef WEBGPU_LOG_PSO_NAME - const bool log = StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains(WEBGPU_LOG_PSO_NAME); + const bool log = debugName.Contains(WEBGPU_LOG_PSO_NAME); #else const bool log = true; #endif @@ -409,175 +645,15 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX if (descriptors && maxEntriesCount < descriptors->DescriptorTypesCount) maxEntriesCount = (int32)descriptors->DescriptorTypesCount; } - Array> entries; + BindGroupEntries entries; entries.Resize(maxEntriesCount); // Setup bind groups - WGPUBindGroupLayoutEntry* entriesPtr = entries.Get(); for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++) { auto descriptors = BindGroupDescriptors[groupIndex]; - if (!descriptors || descriptors->DescriptorTypesCount == 0) - continue; - - int32 entriesCount = descriptors->DescriptorTypesCount; - Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount); - auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment; -#if WEBGPU_LOG_PSO - if (log) - LOG(Info, " > group {} - {}", groupIndex, groupIndex == 0 ? TEXT("Vertex") : TEXT("Fragment")); - const Char* samplerType = TEXT("?"); -#endif - for (int32 index = 0; index < entriesCount; index++) - { - auto& descriptor = descriptors->DescriptorTypes[index]; - auto& entry = entriesPtr[index]; - entry.binding = descriptor.Binding; - entry.bindingArraySize = descriptor.Count; - entry.visibility = visibility; - switch (descriptor.DescriptorType) - { - case VK_DESCRIPTOR_TYPE_SAMPLER: - entry.sampler.type = WGPUSamplerBindingType_Undefined; - if (descriptor.Slot == 4 || descriptor.Slot == 5) // Hack for ShadowSampler and ShadowSamplerLinear (this could get binded samplers table just like for shaderResources) - entry.sampler.type = WGPUSamplerBindingType_Comparison; -#if WEBGPU_LOG_PSO - switch (entry.sampler.type) - { - case WGPUSamplerBindingType_BindingNotUsed: - samplerType = TEXT("BindingNotUsed"); - break; - case WGPUSamplerBindingType_Undefined: - samplerType = TEXT("Undefined"); - break; - case WGPUSamplerBindingType_Filtering: - samplerType = TEXT("Filtering"); - break; - case WGPUSamplerBindingType_NonFiltering: - samplerType = TEXT("NonFiltering"); - break; - case WGPUSamplerBindingType_Comparison: - samplerType = TEXT("Comparison"); - break; - } - if (log) - LOG(Info, " > [{}] sampler ({})", entry.binding, samplerType); -#endif - break; - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - entry.texture.sampleType = WGPUTextureSampleType_Undefined; - if (shaderResources[descriptor.Slot]) - { - // Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_DEPTH) - auto ptr = (GPUResourceViewPtrWebGPU*)shaderResources[descriptor.Slot]->GetNativePtr(); - if (ptr && ptr->TextureView) - entry.texture.sampleType = ptr->TextureView->SampleType; - } -#if WEBGPU_LOG_PSO - if (log) - { - switch (entry.texture.sampleType) - { - case WGPUTextureSampleType_BindingNotUsed: - samplerType = TEXT("BindingNotUsed"); - break; - case WGPUTextureSampleType_Undefined: - samplerType = TEXT("Undefined"); - break; - case WGPUTextureSampleType_Float: - samplerType = TEXT("Float"); - break; - case WGPUTextureSampleType_UnfilterableFloat: - samplerType = TEXT("UnfilterableFloat"); - break; - case WGPUTextureSampleType_Depth: - samplerType = TEXT("Depth"); - break; - case WGPUTextureSampleType_Sint: - samplerType = TEXT("Sint"); - break; - case WGPUTextureSampleType_Uint: - samplerType = TEXT("Uint"); - break; - } - switch (descriptor.ResourceType) - { - case SpirvShaderResourceType::Texture1D: - LOG(Info, " > [{}] texture 1D ({})", entry.binding, samplerType); - break; - case SpirvShaderResourceType::Texture2D: - LOG(Info, " > [{}] texture 2D ({})", entry.binding, samplerType); - break; - case SpirvShaderResourceType::Texture3D: - LOG(Info, " > [{}] texture 3D ({})", entry.binding, samplerType); - break; - case SpirvShaderResourceType::TextureCube: - LOG(Info, " > [{}] texture Cube ({})", entry.binding, samplerType); - break; - case SpirvShaderResourceType::Texture2DArray: - LOG(Info, " > [{}] texture 2D array ({})", entry.binding, samplerType); - break; - } - } -#endif - switch (descriptor.ResourceType) - { - case SpirvShaderResourceType::Texture1D: - entry.texture.viewDimension = WGPUTextureViewDimension_1D; - break; - case SpirvShaderResourceType::Texture2D: - entry.texture.viewDimension = WGPUTextureViewDimension_2D; - break; - case SpirvShaderResourceType::Texture3D: - entry.texture.viewDimension = WGPUTextureViewDimension_3D; - break; - case SpirvShaderResourceType::TextureCube: - entry.texture.viewDimension = WGPUTextureViewDimension_Cube; - break; - case SpirvShaderResourceType::Texture1DArray: - CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write) - break; - case SpirvShaderResourceType::Texture2DArray: - entry.texture.viewDimension = WGPUTextureViewDimension_2DArray; - break; - } - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - entry.buffer.hasDynamicOffset = true; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV) - entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage; - else - entry.buffer.type = WGPUBufferBindingType_Storage; -#if WEBGPU_LOG_PSO - if (log) - LOG(Info, " > [{}] storage buffer (read-only = {}, dynamic = {})", entry.binding, entry.buffer.type == WGPUBufferBindingType_ReadOnlyStorage, entry.buffer.hasDynamicOffset); -#endif - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - entry.buffer.hasDynamicOffset = true; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - entry.buffer.type = WGPUBufferBindingType_Uniform; -#if WEBGPU_LOG_PSO - if (log) - LOG(Info, " > [{}] uniform buffer (dynamic = {})", entry.binding, entry.buffer.hasDynamicOffset); -#endif - break; - default: -#if GPU_ENABLE_DIAGNOSTICS - LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(_debugName.Get(), _debugName.Count() - 1)); -#else - CRASH; -#endif - return; - } - } - - // Create a bind group layout - WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT; - bindGroupLayoutDesc.entryCount = entriesCount; - bindGroupLayoutDesc.entries = entriesPtr; - BindGroupLayouts[groupIndex] = wgpuDeviceCreateBindGroupLayout(_device->Device, &bindGroupLayoutDesc); + if (descriptors) + BindGroupLayouts[groupIndex] = CreateBindGroupLayout(_device->Device, bindings, groupIndex, *descriptors, entries, debugName, log); } // Create the pipeline layout @@ -591,7 +667,6 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX if (!PipelineDesc.layout) { LOG(Error, "wgpuDeviceCreatePipelineLayout failed"); - return; } } diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h index b36ecccb2..bafa40d40 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h @@ -38,17 +38,7 @@ public: }; // Batches bind group description for the pipeline state. Used as a key for caching created bind groups. - struct BindGroupKey - { - uint32 Hash; - WGPUBindGroupLayout Layout; - mutable uint64 LastFrameUsed; - WGPUBindGroupEntry Entries[64]; - uint8 EntriesCount; - uint8 Versions[64]; // Versions of descriptors used to differentiate when texture residency gets changed - - bool operator==(const BindGroupKey& other) const; - }; + typedef GPUBindGroupKeyWebGPU BindGroupKey; private: #if GPU_ENABLE_RESOURCE_NAMING @@ -61,7 +51,7 @@ private: WGPUVertexBufferLayout _vertexBuffers[GPU_MAX_VB_BINDED]; Dictionary _pipelines; Dictionary _bindGroups; - uint64 _lastFrameBindGroupsGC = 0; + GPUBindGroupCacheWebGPU _bindGroupCache; public: GPUShaderProgramVSWebGPU* VS = nullptr; @@ -78,13 +68,21 @@ public: public: // Gets the pipeline for the given rendering state. Pipelines are cached and reused for the same key. - WGPURenderPipeline GetPipeline(const PipelineKey& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]); + WGPURenderPipeline GetPipeline(const PipelineKey& key, const GPUContextBindingsWebGPU& bindings); // Gets the bind group for the given key (unhashed). Bind groups are cached and reused for the same key. - WGPUBindGroup GetBindGroup(BindGroupKey& desc); + FORCE_INLINE WGPUBindGroup GetBindGroup(BindGroupKey& key) + { +#if GPU_ENABLE_RESOURCE_NAMING + StringAnsiView debugName(_debugName.Get(), _debugName.Count() - 1); +#else + StringAnsiView debugName; +#endif + return _bindGroupCache.Get(_device->Device, key, debugName); + } private: - void InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]); + void InitLayout(const GPUContextBindingsWebGPU& bindings); public: // [GPUPipelineState] @@ -97,6 +95,6 @@ protected: }; uint32 GetHash(const GPUPipelineStateWebGPU::PipelineKey& key); -uint32 GetHash(const GPUPipelineStateWebGPU::BindGroupKey& key); +uint32 GetHash(const GPUBindGroupKeyWebGPU& key); #endif diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderProgramWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderProgramWebGPU.h index b20d9b617..8d1fba61e 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderProgramWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderProgramWebGPU.h @@ -8,6 +8,42 @@ #include "Engine/GraphicsDevice/Vulkan/Types.h" #include +/// +/// Bundle of the current bound state to the Web GPU context (used to properly handle different texture layouts or samplers when building bind group layout). +/// +struct GPUContextBindingsWebGPU +{ + GPUResourceView** ShaderResources; // [GPU_MAX_SR_BINDED] +}; + +/// +/// Batch of bind group descriptions for the layout. Used as a key for caching created bind groups. +/// +struct GPUBindGroupKeyWebGPU +{ + uint32 Hash; + WGPUBindGroupLayout Layout; + mutable uint64 LastFrameUsed; + WGPUBindGroupEntry Entries[64]; + uint8 EntriesCount; + uint8 Versions[64]; // Versions of descriptors used to differentiate when texture residency gets changed + + bool operator==(const GPUBindGroupKeyWebGPU& other) const; +}; + +/// +/// Reusable utility for caching bind group objects. Handles reusing bind groups for the same key and releasing them when they are not used for a long time (based on the frame number). +/// +struct GPUBindGroupCacheWebGPU +{ +private: + uint64 _lastFrameBindGroupsGC = 0; + Dictionary _bindGroups; // TODO: try using LRU cache + +public: + WGPUBindGroup Get(WGPUDevice device, GPUBindGroupKeyWebGPU& key, const StringAnsiView& debugName, uint64 gcFrames = 50); +}; + /// /// Shaders base class for Web GPU backend. /// @@ -69,4 +105,39 @@ public: } }; +/// +/// Compute Shader for Web GPU backend. +/// +class GPUShaderProgramCSWebGPU : public GPUShaderProgramWebGPU +{ +private: + WGPUComputePipeline _pipeline = nullptr; + WGPUBindGroupLayout _bindGroupLayout = nullptr; + GPUBindGroupCacheWebGPU _bindGroupCache; + +public: + GPUShaderProgramCSWebGPU(const GPUShaderProgramInitializer& initializer, const SpirvShaderDescriptorInfo& descriptorInfo, WGPUShaderModule shaderModule) + : GPUShaderProgramWebGPU(initializer, descriptorInfo, shaderModule) + { + } + + ~GPUShaderProgramCSWebGPU() + { + if (_bindGroupLayout) + wgpuBindGroupLayoutRelease(_bindGroupLayout); + if (_pipeline) + wgpuComputePipelineRelease(_pipeline); + } + +public: + // Gets the pipeline. + WGPUComputePipeline GetPipeline(WGPUDevice device, const GPUContextBindingsWebGPU& bindings, WGPUBindGroupLayout& resultBindGroupLayout); + + // Gets the bind group for the given key (unhashed). Bind groups are cached and reused for the same key. + FORCE_INLINE WGPUBindGroup GetBindGroup(WGPUDevice device, GPUBindGroupKeyWebGPU& key) + { + return _bindGroupCache.Get(device, key, _name, 60 * 60); + } +}; + #endif diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp index 1619d16b6..c3fcf0baa 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUShaderWebGPU.cpp @@ -79,10 +79,11 @@ GPUShaderProgram* GPUShaderWebGPU::CreateGPUShaderProgram(ShaderStage type, cons break; } case ShaderStage::Pixel: - { shader = New(initializer, header->DescriptorInfo, shaderModule); break; - } + case ShaderStage::Compute: + shader = New(initializer, header->DescriptorInfo, shaderModule); + break; } return shader; } diff --git a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp index 321312476..331e69c15 100644 --- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp +++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.ParticleModules.cpp @@ -843,10 +843,10 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node) " {{\n" " // Collision (depth)\n" " float3 nextPos = {0} + {1} * DeltaTime;\n" - " nextPos = mul(float4(nextPos, 1), WorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation + " nextPos = PROJECT_POINT(float4(nextPos, 1), WorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation - " float3 viewPos = mul(float4(nextPos, 1), ViewMatrix);\n" - " float4 projPos = mul(float4(nextPos, 1), ViewProjectionMatrix);\n" + " float3 viewPos = PROJECT_POINT(float4(nextPos, 1), ViewMatrix);\n" + " float4 projPos = PROJECT_POINT(float4(nextPos, 1), ViewProjectionMatrix);\n" " projPos.xyz /= projPos.w;\n" " if (all(abs(projPos.xy) < 1.0f))\n" " {{\n" @@ -871,8 +871,8 @@ void ParticleEmitterGPUGenerator::ProcessModule(Node* node) " viewPos.z = linearDepth;\n" " \n" - " {0} = mul(float4(viewPos, 1), InvViewMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation - " {0} = mul(float4({0}, 1), InvWorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation + " {0} = PROJECT_POINT(float4(viewPos, 1), InvViewMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation + " {0} = PROJECT_POINT(float4({0}, 1), InvWorldMatrix).xyz;\n" // TODO: don't transform by WorldMatrix if particle system uses World Space simulation COLLISION_LOGIC() " }}\n" diff --git a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h index e4971a7a6..360f61592 100644 --- a/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h +++ b/Source/Engine/Particles/Graph/GPU/ParticleEmitterGraph.GPU.h @@ -5,7 +5,7 @@ /// /// Current GPU particles emitter shader version. /// -#define PARTICLE_GPU_GRAPH_VERSION 11 +#define PARTICLE_GPU_GRAPH_VERSION 12 #if COMPILE_WITH_PARTICLE_GPU_GRAPH diff --git a/Source/Shaders/BitonicSort.shader b/Source/Shaders/BitonicSort.shader index 6538ff7ff..129f58de7 100644 --- a/Source/Shaders/BitonicSort.shader +++ b/Source/Shaders/BitonicSort.shader @@ -54,7 +54,7 @@ bool ShouldSwap(float a, float b) RWByteAddressBuffer IndirectArgsBuffer : register(u0); -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) [numthreads(22, 1, 1)] void CS_IndirectArgs(uint groupIndex : SV_GroupIndex) { @@ -129,7 +129,7 @@ void StoreItem(uint element, uint count) #ifdef _CS_PreSort -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) META_PERMUTATION_1(THREAD_GROUP_SIZE=1024) META_PERMUTATION_1(THREAD_GROUP_SIZE=64) [numthreads(THREAD_GROUP_SIZE, 1, 1)] @@ -177,7 +177,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) #ifdef _CS_InnerSort -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) [numthreads(THREAD_GROUP_SIZE, 1, 1)] void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) { @@ -222,7 +222,7 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex) RWBuffer SortedIndices : register(u0); RWBuffer SortingKeys : register(u1); -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) [numthreads(1024, 1, 1)] void CS_OuterSort(uint3 dispatchThreadId : SV_DispatchThreadID) { diff --git a/Source/Shaders/Common.hlsl b/Source/Shaders/Common.hlsl index c678259d1..f28c087fc 100644 --- a/Source/Shaders/Common.hlsl +++ b/Source/Shaders/Common.hlsl @@ -59,7 +59,7 @@ #else #define CAN_USE_GATHER 0 #endif -#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 +#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 || defined(WGSL) #define CAN_USE_COMPUTE_SHADER 1 #else #define CAN_USE_COMPUTE_SHADER 0 @@ -79,6 +79,7 @@ // Alias read-only Buffer binded as shader resource into StructuredBuffer to be used as storage on WebGPU (not supported) #define CAN_USE_TYPED_BUFFER_LOADS 0 #define Buffer StructuredBuffer +#define RWBuffer RWStructuredBuffer // Hack matrix multiplication order for WebGPU (row-major vs column-major bug?) #define PROJECT_POINT(p, m) mul(m, p) diff --git a/Source/Shaders/GPUParticlesSorting.shader b/Source/Shaders/GPUParticlesSorting.shader index 113096421..67ec94a71 100644 --- a/Source/Shaders/GPUParticlesSorting.shader +++ b/Source/Shaders/GPUParticlesSorting.shader @@ -35,7 +35,7 @@ float3 GetParticleVec3(uint particleIndex, int offset) } // Sorting keys generation shader -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) META_PERMUTATION_1(SORT_MODE=0) META_PERMUTATION_1(SORT_MODE=1) META_PERMUTATION_1(SORT_MODE=2) diff --git a/Source/Shaders/Histogram.shader b/Source/Shaders/Histogram.shader index ab1fa1850..5fb281c72 100644 --- a/Source/Shaders/Histogram.shader +++ b/Source/Shaders/Histogram.shader @@ -20,7 +20,7 @@ META_CB_END RWStructuredBuffer HistogramBuffer : register(u0); // Clears the histogram -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) [numthreads(THREADGROUP_SIZE_X, 1, 1)] void CS_ClearHistogram(uint dispatchThreadId : SV_DispatchThreadID) { @@ -44,7 +44,7 @@ float ComputeHistogramPositionFromLuminance(float luminance) groupshared uint SharedHistogram[HISTOGRAM_SIZE]; // Generates the histogram -META_CS(true, FEATURE_LEVEL_SM5) +META_CS(true, AUTO) [numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)] void CS_GenerateHistogram(uint3 dispatchThreadId : SV_DispatchThreadID, uint3 groupThreadId : SV_GroupThreadID) {