diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp index 57c657531..eeded8e92 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.cpp @@ -71,11 +71,13 @@ bool GPUBufferWebGPU::OnInit() switch (_desc.Usage) { case GPUResourceUsage::Default: - if (!_desc.InitData) - bufferDesc.usage |= WGPUBufferUsage_CopyDst; + bufferDesc.usage |= WGPUBufferUsage_CopyDst; break; case GPUResourceUsage::Dynamic: - bufferDesc.usage |= WGPUBufferUsage_MapWrite; + if (bufferDesc.usage == 0) // WebGPU doesn't allow to map-write Index/Vertex/Storage buffers + bufferDesc.usage = WGPUBufferUsage_MapWrite; + else + bufferDesc.usage |= WGPUBufferUsage_CopyDst; break; case GPUResourceUsage::StagingUpload: bufferDesc.usage |= WGPUBufferUsage_MapWrite | WGPUBufferUsage_CopySrc; @@ -88,19 +90,24 @@ bool GPUBufferWebGPU::OnInit() break; } bufferDesc.size = _desc.Size; - bufferDesc.mappedAtCreation = _desc.InitData != nullptr; + bufferDesc.mappedAtCreation = _desc.InitData != nullptr && (bufferDesc.usage & WGPUBufferUsage_MapWrite); Buffer = wgpuDeviceCreateBuffer(_device->Device, &bufferDesc); if (!Buffer) return true; _memoryUsage = _desc.Size; + Usage = bufferDesc.usage; // Initialize with a data if provided - if (_desc.InitData) + if (bufferDesc.mappedAtCreation) { //wgpuBufferWriteMappedRange(Buffer, 0, _desc.InitData, _desc.Size); Platform::MemoryCopy(wgpuBufferGetMappedRange(Buffer, 0, _desc.Size), _desc.InitData, _desc.Size); wgpuBufferUnmap(Buffer); } + else if (_desc.InitData) + { + wgpuQueueWriteBuffer(_device->Queue, Buffer, 0, _desc.InitData, _desc.Size); + } // Create view _view.Set(this, Buffer); diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.h index 951c96335..8020d1167 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUBufferWebGPU.h @@ -52,6 +52,8 @@ public: public: // Handle to the WebGPU buffer object. WGPUBuffer Buffer = nullptr; + // Usage flags of the buffer. + WGPUBufferUsage Usage = 0; public: // [GPUBuffer] diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp index 2ec86e47a..fec3cc243 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp @@ -11,6 +11,7 @@ #include "GPUSamplerWebGPU.h" #include "GPUVertexLayoutWebGPU.h" #include "RenderToolsWebGPU.h" +#include "Engine/Core/Log.h" #include "Engine/Core/Math/Viewport.h" #include "Engine/Core/Math/Rectangle.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -42,6 +43,20 @@ GPUContextWebGPU::GPUContextWebGPU(GPUDeviceWebGPU* device) { _vertexBufferNullLayout = WGPU_VERTEX_BUFFER_LAYOUT_INIT; _minUniformBufferOffsetAlignment = device->MinUniformBufferOffsetAlignment; + + // Setup descriptor handles tables lookup cache + _resourceTables[(int32)SpirvShaderResourceBindingType::INVALID] = nullptr; + _resourceTables[(int32)SpirvShaderResourceBindingType::CB] = nullptr; + _resourceTables[(int32)SpirvShaderResourceBindingType::SAMPLER] = nullptr; + _resourceTables[(int32)SpirvShaderResourceBindingType::SRV] = _shaderResources; + _resourceTables[(int32)SpirvShaderResourceBindingType::UAV] = _storageResources; +#if ENABLE_ASSERTION + _resourceTableSizes[(int32)SpirvShaderResourceBindingType::INVALID] = 0; + _resourceTableSizes[(int32)SpirvShaderResourceBindingType::CB] = GPU_MAX_CB_BINDED; + _resourceTableSizes[(int32)SpirvShaderResourceBindingType::SAMPLER] = GPU_MAX_SAMPLER_BINDED; + _resourceTableSizes[(int32)SpirvShaderResourceBindingType::SRV] = GPU_MAX_SR_BINDED; + _resourceTableSizes[(int32)SpirvShaderResourceBindingType::UAV] = GPU_MAX_UA_BINDED; +#endif } GPUContextWebGPU::~GPUContextWebGPU() @@ -311,7 +326,7 @@ void GPUContextWebGPU::UpdateCB(GPUConstantBuffer* cb, const void* data) if (size != 0) { // Allocate a chunk of memory in a shared page allocator - auto allocation = _device->DataUploader.Allocate(size, _minUniformBufferOffsetAlignment, WGPUBufferUsage_Uniform); + auto allocation = _device->DataUploader.Allocate(size, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst, _minUniformBufferOffsetAlignment); cbWebGPU->Allocation = allocation; // TODO: consider holding CPU-side staging buffer and copying data to the GPU buffer in a single batch for all uniforms (before flushing the active command encoder) wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size); @@ -448,10 +463,7 @@ void GPUContextWebGPU::Flush() // End existing pass (if any) if (_renderPass) - { - wgpuRenderPassEncoderEnd(_renderPass); - wgpuRenderPassEncoderRelease(_renderPass); - } + EndRenderPass(); // End commands recording WGPUCommandBufferDescriptor commandBufferDesc = WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT; @@ -462,6 +474,10 @@ void GPUContextWebGPU::Flush() wgpuQueueSubmit(_device->Queue, 1, &commandBuffer); wgpuCommandBufferRelease(commandBuffer); } + + for (auto e : _unusedBindGroups) + wgpuBindGroupRelease(e); + _unusedBindGroups.Clear(); } void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 size, uint32 offset) @@ -469,11 +485,19 @@ void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 ASSERT(data); ASSERT(buffer && buffer->GetSize() >= size + offset); auto bufferWebGPU = (GPUBufferWebGPU*)buffer; - if (bufferWebGPU->IsDynamic()) + if (bufferWebGPU->Usage & WGPUBufferUsage_MapWrite) { + CRASH; // TODO: impl this (map if not mapped yet and memcpy) + } + else if (bufferWebGPU->IsDynamic()) + { + // Cannot insert copy commands in encoder during render pass + if (_renderPass) + EndRenderPass(); + // Synchronous upload via shared buffer // TODO: test using map/unmap sequence - auto allocation = _device->DataUploader.Allocate(size - offset); + auto allocation = _device->DataUploader.Allocate(size - offset, WGPUBufferUsage_CopySrc | WGPUBufferUsage_CopyDst); wgpuQueueWriteBuffer(_device->Queue, allocation.Buffer, allocation.Offset, data, size); wgpuCommandEncoderCopyBufferToBuffer(Encoder, allocation.Buffer, allocation.Offset, bufferWebGPU->Buffer, offset, size); } @@ -486,6 +510,10 @@ void GPUContextWebGPU::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32 void GPUContextWebGPU::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset) { + // Cannot insert copy commands in encoder during render pass + if (_renderPass) + EndRenderPass(); + ASSERT(dstBuffer && srcBuffer); auto srcBufferWebGPU = (GPUBufferWebGPU*)srcBuffer; auto dstBufferWebGPU = (GPUBufferWebGPU*)dstBuffer; @@ -553,6 +581,10 @@ void GPUContextWebGPU::CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBu void GPUContextWebGPU::CopyResource(GPUResource* dstResource, GPUResource* srcResource) { + // Cannot insert copy commands in encoder during render pass + if (_renderPass) + EndRenderPass(); + ASSERT(dstResource && srcResource); auto dstTexture = Cast(dstResource); auto srcTexture = Cast(srcResource); @@ -590,6 +622,10 @@ void GPUContextWebGPU::CopyResource(GPUResource* dstResource, GPUResource* srcRe void GPUContextWebGPU::CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) { + // Cannot insert copy commands in encoder during render pass + if (_renderPass) + EndRenderPass(); + ASSERT(dstResource && srcResource); auto dstTexture = Cast(dstResource); auto srcTexture = Cast(srcResource); @@ -640,11 +676,7 @@ void GPUContextWebGPU::ManualClear(const PendingClear& clear) { // End existing pass (if any) if (_renderPass) - { - wgpuRenderPassEncoderEnd(_renderPass); - wgpuRenderPassEncoderRelease(_renderPass); - _renderPass = nullptr; - } + EndRenderPass(); // Clear with a render pass WGPURenderPassColorAttachment colorAttachment; @@ -699,101 +731,7 @@ void GPUContextWebGPU::OnDrawCall() // Check if need to start a new render pass if (_renderPassDirty) { - _renderPassDirty = false; - - // End existing pass (if any) - if (_renderPass) - { - wgpuRenderPassEncoderEnd(_renderPass); - wgpuRenderPassEncoderRelease(_renderPass); - } - - // Start a new render pass - WGPURenderPassColorAttachment colorAttachments[GPU_MAX_RT_BINDED]; - WGPURenderPassDepthStencilAttachment depthStencilAttachment = WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT; - WGPURenderPassDescriptor renderPassDesc = WGPU_RENDER_PASS_DESCRIPTOR_INIT; - renderPassDesc.colorAttachmentCount = _renderTargetCount; - renderPassDesc.colorAttachments = colorAttachments; - PendingClear clear; - _pipelineKey.MultiSampleCount = 1; - _pipelineKey.RenderTargetCount = _renderTargetCount; - for (int32 i = 0; i < renderPassDesc.colorAttachmentCount; i++) - { - auto& colorAttachment = colorAttachments[i]; - colorAttachment = WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT; - auto renderTarget = _renderTargets[i]; - colorAttachment.view = renderTarget->View; - colorAttachment.depthSlice = renderTarget->DepthSlice; - colorAttachment.loadOp = WGPULoadOp_Load; - colorAttachment.storeOp = WGPUStoreOp_Store; - if (FindClear(_depthStencil, clear)) - { - colorAttachment.loadOp = WGPULoadOp_Clear; - colorAttachment.clearValue = { clear.RGBA[0], clear.RGBA[1], clear.RGBA[2], clear.RGBA[3] }; - } - _pipelineKey.MultiSampleCount = (int32)renderTarget->GetMSAA(); - _pipelineKey.RenderTargetFormats[i] = renderTarget->Format; - } - if (_depthStencil) - { - renderPassDesc.depthStencilAttachment = &depthStencilAttachment; - depthStencilAttachment.view = _depthStencil->View; - depthStencilAttachment.depthLoadOp = WGPULoadOp_Load; - depthStencilAttachment.depthStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store; - depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly; - if (_depthStencil->HasStencil) - { - depthStencilAttachment.stencilLoadOp = WGPULoadOp_Load; - depthStencilAttachment.stencilStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store; - depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly; - } - else - { - depthStencilAttachment.stencilClearValue = 0; - depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear; - depthStencilAttachment.stencilStoreOp = WGPUStoreOp_Discard; - depthStencilAttachment.stencilReadOnly = true; - } - if (FindClear(_depthStencil, clear)) - { - depthStencilAttachment.depthLoadOp = WGPULoadOp_Clear; - depthStencilAttachment.depthClearValue = clear.Depth; - if (_depthStencil->HasStencil) - { - depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear; - depthStencilAttachment.stencilClearValue = clear.Stencil; - } - } - _pipelineKey.DepthStencilFormat = _depthStencil->Format; - } - else - { - _pipelineKey.DepthStencilFormat = WGPUTextureFormat_Undefined; - } - _renderPass = wgpuCommandEncoderBeginRenderPass(Encoder, &renderPassDesc); - ASSERT(_renderPass); - - // Discard texture clears (done manually or via render pass) - _pendingClears.Clear(); - - // Apply pending state - if (_stencilRef != 0) - wgpuRenderPassEncoderSetStencilReference(_renderPass, _stencilRef); - auto scissorRect = _scissorRect; - // TODO: skip calling this if scissorRect is default (0, 0, attachment width, attachment height) - wgpuRenderPassEncoderSetScissorRect(_renderPass, (uint32_t)scissorRect.GetX(), (uint32_t)scissorRect.GetY(), (uint32_t)scissorRect.GetWidth(), (uint32_t)scissorRect.GetHeight()); - auto viewport = _viewport; - // TODO: skip calling this if viewport is default (0, 0, attachment width, attachment height, 0, 1) - wgpuRenderPassEncoderSetViewport(_renderPass, viewport.X, viewport.Y, viewport.Width, viewport.Height, viewport.MinDepth, viewport.MaxDepth); - - // Auto-dirty pipeline when new render pass starts - if (_pipelineState) - _pipelineDirty = true; - _indexBufferDirty = true; - _vertexBufferDirty = true; - _bindGroupDirty = true; - if (_blendFactorSet) - _blendFactorDirty = true; + FlushRenderPass(); } // Flush rendering states @@ -803,6 +741,9 @@ void GPUContextWebGPU::OnDrawCall() WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey) : nullptr; wgpuRenderPassEncoderSetPipeline(_renderPass, pipeline); RENDER_STAT_PS_STATE_CHANGE(); + + // Invalidate bind groups (layout might change) + _bindGroupDirty = true; } if (_indexBufferDirty && _indexBuffer.Buffer) { @@ -826,10 +767,7 @@ void GPUContextWebGPU::OnDrawCall() } if (_bindGroupDirty) { - _bindGroupDirty = false; - // TODO: bind _samplers - // TODO: bind _constantBuffers - // TODO: bind _shaderResources + FlushBindGroup(); } } @@ -838,4 +776,224 @@ void GPUContextWebGPU::OnDispatch(GPUShaderProgramCS* shader) // TODO: add compute shaders support } +void GPUContextWebGPU::EndRenderPass() +{ + wgpuRenderPassEncoderEnd(_renderPass); + wgpuRenderPassEncoderRelease(_renderPass); + _renderPass = nullptr; +} + +void GPUContextWebGPU::FlushRenderPass() +{ + _renderPassDirty = false; + + // End existing pass (if any) + if (_renderPass) + EndRenderPass(); + + // Start a new render pass + WGPURenderPassColorAttachment colorAttachments[GPU_MAX_RT_BINDED]; + WGPURenderPassDepthStencilAttachment depthStencilAttachment = WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT; + WGPURenderPassDescriptor renderPassDesc = WGPU_RENDER_PASS_DESCRIPTOR_INIT; + renderPassDesc.colorAttachmentCount = _renderTargetCount; + renderPassDesc.colorAttachments = colorAttachments; + PendingClear clear; + _pipelineKey.MultiSampleCount = 1; + _pipelineKey.RenderTargetCount = _renderTargetCount; + for (int32 i = 0; i < renderPassDesc.colorAttachmentCount; i++) + { + auto& colorAttachment = colorAttachments[i]; + colorAttachment = WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT; + auto renderTarget = _renderTargets[i]; + colorAttachment.view = renderTarget->View; + colorAttachment.depthSlice = renderTarget->DepthSlice; + colorAttachment.loadOp = WGPULoadOp_Load; + colorAttachment.storeOp = WGPUStoreOp_Store; + if (FindClear(_depthStencil, clear)) + { + colorAttachment.loadOp = WGPULoadOp_Clear; + colorAttachment.clearValue = { clear.RGBA[0], clear.RGBA[1], clear.RGBA[2], clear.RGBA[3] }; + } + _pipelineKey.MultiSampleCount = (int32)renderTarget->GetMSAA(); + _pipelineKey.RenderTargetFormats[i] = renderTarget->Format; + } + if (_depthStencil) + { + renderPassDesc.depthStencilAttachment = &depthStencilAttachment; + depthStencilAttachment.view = _depthStencil->View; + depthStencilAttachment.depthLoadOp = WGPULoadOp_Load; + depthStencilAttachment.depthStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store; + depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly; + if (_depthStencil->HasStencil) + { + depthStencilAttachment.stencilLoadOp = WGPULoadOp_Load; + depthStencilAttachment.stencilStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store; + depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly; + } + else + { + depthStencilAttachment.stencilClearValue = 0; + depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear; + depthStencilAttachment.stencilStoreOp = WGPUStoreOp_Discard; + depthStencilAttachment.stencilReadOnly = true; + } + if (FindClear(_depthStencil, clear)) + { + depthStencilAttachment.depthLoadOp = WGPULoadOp_Clear; + depthStencilAttachment.depthClearValue = clear.Depth; + if (_depthStencil->HasStencil) + { + depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear; + depthStencilAttachment.stencilClearValue = clear.Stencil; + } + } + _pipelineKey.DepthStencilFormat = _depthStencil->Format; + } + else + { + _pipelineKey.DepthStencilFormat = WGPUTextureFormat_Undefined; + } + _renderPass = wgpuCommandEncoderBeginRenderPass(Encoder, &renderPassDesc); + ASSERT(_renderPass); + + // Discard texture clears (done manually or via render pass) + _pendingClears.Clear(); + + // Apply pending state + if (_stencilRef != 0) + wgpuRenderPassEncoderSetStencilReference(_renderPass, _stencilRef); + auto scissorRect = _scissorRect; + // TODO: skip calling this if scissorRect is default (0, 0, attachment width, attachment height) + wgpuRenderPassEncoderSetScissorRect(_renderPass, (uint32_t)scissorRect.GetX(), (uint32_t)scissorRect.GetY(), (uint32_t)scissorRect.GetWidth(), (uint32_t)scissorRect.GetHeight()); + auto viewport = _viewport; + // TODO: skip calling this if viewport is default (0, 0, attachment width, attachment height, 0, 1) + wgpuRenderPassEncoderSetViewport(_renderPass, viewport.X, viewport.Y, viewport.Width, viewport.Height, viewport.MinDepth, viewport.MaxDepth); + + // Auto-dirty pipeline when new render pass starts + if (_pipelineState) + _pipelineDirty = true; + _indexBufferDirty = true; + _vertexBufferDirty = true; + _bindGroupDirty = true; + if (_blendFactorSet) + _blendFactorDirty = true; +} + +void GPUContextWebGPU::FlushBindGroup() +{ + _bindGroupDirty = false; + + // Each shader stage (Vertex, Pixel) uses a separate bind group + WGPUBindGroupDescriptor bindGroupDesc = WGPU_BIND_GROUP_DESCRIPTOR_INIT; + for (int32 groupIndex = 0; groupIndex < GPUBindGroupsWebGPU::GraphicsMax; groupIndex++) + { + auto descriptors = _pipelineState->BindGroupDescriptors[groupIndex]; + bindGroupDesc.layout = _pipelineState->BindGroupLayouts[groupIndex]; + if (!descriptors || !bindGroupDesc.layout) + continue; + + // Build descriptors for the bind group + auto entriesCount = descriptors->DescriptorTypesCount; + _dynamicOffsets.Clear(); + _bindGroupEntries.Resize(entriesCount); + auto entriesPtr = _bindGroupEntries.Get(); + Platform::MemoryClear(entriesPtr, entriesCount * sizeof(WGPUBindGroupEntry)); + for (int32 index = 0; index < entriesCount; index++) + { + auto& descriptor = descriptors->DescriptorTypes[index]; + auto& entry = entriesPtr[index]; + entry.binding = descriptor.Binding; + entry.size = WGPU_WHOLE_SIZE; + switch (descriptor.DescriptorType) + { + case VK_DESCRIPTOR_TYPE_SAMPLER: + { + GPUSamplerWebGPU* sampler = _samplers[descriptor.Slot]; + if (!sampler) + sampler = _device->DefaultSamplers[0]; // Fallback + entry.sampler = sampler->Sampler; + break; + } + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + { + ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV); + auto view = _shaderResources[descriptor.Slot]; + auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; + if (ptr && ptr->TextureView) + entry.textureView = ptr->TextureView->View; + if (!entry.textureView) + { + // Fallback + view = _device->DefaultTexture->View(0); + ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr(); + entry.textureView = ptr->TextureView->View; + } + break; + } + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + { + ASSERT(descriptor.Slot < _resourceTableSizes[(int32)descriptor.BindingType]); + GPUResourceView* view = _resourceTables[(int32)descriptor.BindingType][descriptor.Slot]; + auto ptr = view ? (GPUResourceViewPtrWebGPU*)view->GetNativePtr() : nullptr; + if (ptr && ptr->BufferView) + entry.buffer = ptr->BufferView->Buffer; + if (!entry.buffer) + { + // Fallback + LOG(Error, "Missing resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType); + CRASH; // TODO: add default buffer as fallback (_device->DefaultBuffer) + } + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + { + GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot]; + if (uniform && uniform->Allocation.Buffer) + { + entry.buffer = uniform->Allocation.Buffer; + entry.size = uniform->GetSize(); + _dynamicOffsets.Add(uniform->Allocation.Offset); + } + else + CRASH; // TODO: add dummy buffer as fallback + break; + } + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + { + GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot]; + if (uniform && uniform->Allocation.Buffer) + { + entry.buffer = uniform->Allocation.Buffer; + entry.offset = uniform->Allocation.Offset; + entry.size = uniform->GetSize(); + } + else + CRASH; // TODO: add dummy buffer as fallback + break; + } + default: +#if GPU_ENABLE_DIAGNOSTICS + LOG(Fatal, "Unknown descriptor type: {} used as {}", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType); +#else + CRASH; +#endif + return; + } + } + + // Create a bind group + bindGroupDesc.entryCount = _bindGroupEntries.Count(); + bindGroupDesc.entries = entriesPtr; + WGPUBindGroup bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &bindGroupDesc); + _unusedBindGroups.Add(bindGroup); + // TODO: cache and release them + + // Bind group + wgpuRenderPassEncoderSetBindGroup(_renderPass, groupIndex, bindGroup, _dynamicOffsets.Count(), _dynamicOffsets.Get()); + } +} + #endif diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h index a9e8be54f..c13c8e8c6 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h @@ -44,6 +44,9 @@ private: GPUDeviceWebGPU* _device; WGPUVertexBufferLayout _vertexBufferNullLayout; uint32 _minUniformBufferOffsetAlignment; + Array _bindGroupEntries; + Array _dynamicOffsets; + Array _unusedBindGroups; // State tracking int32 _renderPassDirty : 1; @@ -86,6 +89,9 @@ private: void ManualClear(const PendingClear& clear); void OnDrawCall(); void OnDispatch(GPUShaderProgramCS* shader); + void EndRenderPass(); + void FlushRenderPass(); + void FlushBindGroup(); public: // [GPUContext] diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp index 27e0e9e59..a150a6270 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp @@ -45,13 +45,13 @@ GPUVertexLayoutWebGPU::GPUVertexLayoutWebGPU(GPUDeviceWebGPU* device, const Elem } } -GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, uint32 alignment, WGPUBufferUsage usage) +GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, WGPUBufferUsage usage, uint32 alignment) { // Find a free buffer from the current frame for (auto& e : _entries) { uint32 alignedOffset = Math::AlignUp(e.ActiveOffset, alignment); - if (e.ActiveFrame == _frame && (usage ? (e.Usage & usage) == usage : e.Usage == WGPUBufferUsage_CopyDst) && alignedOffset + size <= e.Size) + if (e.ActiveFrame == _frame && e.Usage == usage && alignedOffset + size <= e.Size) { e.ActiveOffset = alignedOffset + size; return { e.Buffer, alignedOffset }; @@ -61,7 +61,7 @@ GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, u // Find an unused buffer from the old frames for (auto& e : _entries) { - if (e.ActiveFrame < _frame - 3 && (e.Usage & usage) == usage && size <= e.Size) + if (e.ActiveFrame < _frame - 3 && e.Usage == usage && size <= e.Size) { e.ActiveOffset = size; e.ActiveFrame = _frame; @@ -79,7 +79,7 @@ GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, u desc.label = WEBGPU_STR("Upload Buffer"); #endif desc.size = Math::Max(16 * 1024, Math::RoundUpToPowerOf2(size)); // Allocate larger pages for good suballocations - desc.usage = WGPUBufferUsage_CopyDst | usage; + desc.usage = usage; WGPUBuffer buffer = wgpuDeviceCreateBuffer(_device, &desc); if (buffer == nullptr) { @@ -490,6 +490,14 @@ void GPUDeviceWebGPU::DrawBegin() GPUDevice::DrawBegin(); DataUploader.DrawBegin(); + + // Create default texture + if (!DefaultTexture) + { + DefaultTexture = New(this, TEXT("DefaultTexture")); + DefaultTexture->Init(GPUTextureDescription::New2D(1, 1, PixelFormat::R8G8B8A8_UNorm, GPUTextureFlags::ShaderResource)); + DefaultTexture->SetResidentMipLevels(1); + } } GPUDeviceWebGPU::~GPUDeviceWebGPU() @@ -565,6 +573,7 @@ void GPUDeviceWebGPU::Dispose() // Clear device resources DataUploader.ReleaseGPU(); + SAFE_DELETE_GPU_RESOURCE(DefaultTexture); SAFE_DELETE_GPU_RESOURCES(DefaultSamplers); SAFE_DELETE(_mainContext); SAFE_DELETE(Adapter); diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h index 8731388b0..5959d1dda 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h @@ -10,8 +10,26 @@ class GPUContextWebGPU; class GPUAdapterWebGPU; +class GPUTextureWebGPU; class GPUSamplerWebGPU; +namespace GPUBindGroupsWebGPU +{ + enum Stage + { + // Vertex shader stage + Vertex = 0, + // Pixel shader stage + Pixel = 1, + // Graphics pipeline stages count + GraphicsMax, + // Compute pipeline slot + Compute = 0, + // The maximum amount of slots for all stages + Max = GraphicsMax, + }; +}; + /// /// Pool for uploading data to GPU buffers. It manages large buffers and suballocates for multiple small updates, minimizing the number of buffer creations and copies. /// @@ -39,7 +57,7 @@ public: uint32 Offset = 0; }; - Allocation Allocate(uint32 size, uint32 alignment = 16, WGPUBufferUsage usage = 0); + Allocation Allocate(uint32 size, WGPUBufferUsage usage, uint32 alignment = 16); void DrawBegin(); void ReleaseGPU(); }; @@ -62,6 +80,7 @@ public: WGPUDevice Device = nullptr; WGPUQueue Queue = nullptr; GPUSamplerWebGPU* DefaultSamplers[6] = {}; + GPUTextureWebGPU* DefaultTexture = nullptr; GPUDataUploaderWebGPU DataUploader; uint32 MinUniformBufferOffsetAlignment = 1; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp index a1efeda23..70230f18f 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp @@ -5,7 +5,6 @@ #include "GPUPipelineStateWebGPU.h" #include "GPUVertexLayoutWebGPU.h" #include "Engine/Core/Log.h" -#include "Engine/Core/Math/Color32.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" @@ -138,6 +137,20 @@ void GPUPipelineStateWebGPU::OnReleaseGPU() for (auto& e : _pipelines) wgpuRenderPipelineRelease(e.Value); _pipelines.Clear(); + for (auto& e : BindGroupLayouts) + { + if (e) + { + wgpuBindGroupLayoutRelease(e); + e = nullptr; + } + } + if (PipelineDesc.layout) + { + wgpuPipelineLayoutRelease(PipelineDesc.layout); + PipelineDesc.layout = nullptr; + } + Platform::MemoryClear(&BindGroupDescriptors, sizeof(BindGroupDescriptors)); } uint32 GetHash(const GPUPipelineStateWebGPU::Key& key) @@ -260,7 +273,8 @@ bool GPUPipelineStateWebGPU::Init(const Description& desc) writeMask |= WGPUColorWriteMask_Blue; if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Alpha)) writeMask |= WGPUColorWriteMask_Alpha; - } for (auto& e : _colorTargets) + } + for (auto& e : _colorTargets) { e = WGPU_COLOR_TARGET_STATE_INIT; if (desc.BlendMode.BlendEnable) @@ -270,14 +284,116 @@ bool GPUPipelineStateWebGPU::Init(const Description& desc) // Cache shaders VS = (GPUShaderProgramVSWebGPU*)desc.VS; + BindGroupDescriptors[GPUBindGroupsWebGPU::Vertex] = &VS->DescriptorInfo; PipelineDesc.vertex.module = VS->ShaderModule; PS = (GPUShaderProgramPSWebGPU*)desc.PS; if (PS) { + BindGroupDescriptors[GPUBindGroupsWebGPU::Pixel] = &PS->DescriptorInfo; _fragmentDesc.module = PS->ShaderModule; } - // TODO: set resources binding into PipelineDesc.layout + // Count the biggest bind group entries (for all shaders) to allocate reused memory + int32 maxEntriesCount = 0; + for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++) + { + auto descriptors = BindGroupDescriptors[groupIndex]; + if (descriptors && maxEntriesCount < descriptors->DescriptorTypesCount) + maxEntriesCount = (int32)descriptors->DescriptorTypesCount; + } + Array> entries; + entries.Resize(maxEntriesCount); + + // Setup bind groups + WGPUBindGroupLayoutEntry* entriesPtr = entries.Get(); + for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++) + { + auto descriptors = BindGroupDescriptors[groupIndex]; + if (!descriptors || descriptors->DescriptorTypesCount == 0) + continue; + + int32 entriesCount = descriptors->DescriptorTypesCount; + Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount); + auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment; + for (int32 index = 0; index < entriesCount; index++) + { + auto& descriptor = descriptors->DescriptorTypes[index]; + auto& entry = entriesPtr[index]; + entry.binding = descriptor.Binding; + entry.bindingArraySize = descriptor.Count; + entry.visibility = visibility; + switch (descriptor.DescriptorType) + { + case VK_DESCRIPTOR_TYPE_SAMPLER: + entry.sampler.type = WGPUSamplerBindingType_Undefined; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + entry.texture.sampleType = WGPUTextureSampleType_Undefined; + switch (descriptor.ResourceType) + { + case SpirvShaderResourceType::Texture1D: + entry.texture.viewDimension = WGPUTextureViewDimension_1D; + break; + case SpirvShaderResourceType::Texture2D: + entry.texture.viewDimension = WGPUTextureViewDimension_2D; + break; + case SpirvShaderResourceType::Texture3D: + entry.texture.viewDimension = WGPUTextureViewDimension_3D; + break; + case SpirvShaderResourceType::TextureCube: + entry.texture.viewDimension = WGPUTextureViewDimension_Cube; + break; + case SpirvShaderResourceType::Texture1DArray: + CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write) + break; + case SpirvShaderResourceType::Texture2DArray: + entry.texture.viewDimension = WGPUTextureViewDimension_2DArray; + break; + } + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + entry.buffer.hasDynamicOffset = true; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV) + entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage; + else + entry.buffer.type = WGPUBufferBindingType_Storage; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + entry.buffer.hasDynamicOffset = true; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + entry.buffer.type = WGPUBufferBindingType_Uniform; + break; + default: +#if GPU_ENABLE_DIAGNOSTICS + LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(_debugName.Get(), _debugName.Count() - 1)); +#else + CRASH; +#endif + return true; + } + } + + // Create a bind group layout + WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT; + bindGroupLayoutDesc.entryCount = entriesCount; + bindGroupLayoutDesc.entries = entriesPtr; + BindGroupLayouts[groupIndex] = wgpuDeviceCreateBindGroupLayout(_device->Device, &bindGroupLayoutDesc); + } + + // Create the pipeline layout + WGPUPipelineLayoutDescriptor layoutDesc = WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT; +#if GPU_ENABLE_RESOURCE_NAMING + layoutDesc.label = PipelineDesc.label; +#endif + layoutDesc.bindGroupLayoutCount = GPUBindGroupsWebGPU::GraphicsMax; + layoutDesc.bindGroupLayouts = BindGroupLayouts; + PipelineDesc.layout = wgpuDeviceCreatePipelineLayout(_device->Device, &layoutDesc); + if (!PipelineDesc.layout) + { + LOG(Error, "wgpuDeviceCreatePipelineLayout failed"); + return true; + } _memoryUsage = 1; return GPUPipelineState::Init(desc); diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h index d7ca21fec..7a66b1c8a 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h @@ -53,6 +53,8 @@ public: GPUShaderProgramVSWebGPU* VS = nullptr; GPUShaderProgramPSWebGPU* PS = nullptr; WGPURenderPipelineDescriptor PipelineDesc; + WGPUBindGroupLayout BindGroupLayouts[GPUBindGroupsWebGPU::GraphicsMax] = {}; + SpirvShaderDescriptorInfo* BindGroupDescriptors[GPUBindGroupsWebGPU::GraphicsMax] = {}; public: GPUPipelineStateWebGPU(GPUDeviceWebGPU* device) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp index fbffd9a30..218c35a72 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp @@ -24,7 +24,7 @@ WGPUCompareFunction ToCompareFunction(GPUSamplerCompareFunction value) switch (value) { case GPUSamplerCompareFunction::Never: - return WGPUCompareFunction_Never; + return WGPUCompareFunction_Undefined; // Disabled comparision case GPUSamplerCompareFunction::Less: return WGPUCompareFunction_Less; default: