From ad3c2be5109e9e864eb101eb955595d3fd1c0f49 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 3 Mar 2026 21:35:42 +0100 Subject: [PATCH] Add timer and occlusion queries support to WebGPU --- .../DirectX/DX12/GPUDeviceDX12.cpp | 1 + .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 5 + .../WebGPU/GPUContextWebGPU.cpp | 101 ++++++++- .../GraphicsDevice/WebGPU/GPUContextWebGPU.h | 6 + .../GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp | 201 +++++++++++++++++- .../GraphicsDevice/WebGPU/GPUDeviceWebGPU.h | 62 ++++++ .../GraphicsDevice/WebGPU/IncludeWebGPU.h | 2 + Source/Engine/Profiler/ProfilerGPU.cpp | 1 + .../Engine/Renderer/AmbientOcclusionPass.cpp | 4 +- 9 files changed, 373 insertions(+), 10 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index ad8d814f5..db0b0555b 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -985,6 +985,7 @@ GPUQueryDX12 GPUDeviceDX12::AllocQuery(GPUQueryType type) if (heapIndex == QueryHeaps.Count()) { // Allocate a new query heap + PROFILE_MEM(GraphicsCommands); auto heap = New(); int32 size = type == GPUQueryType::Occlusion ? 4096 : 1024; if (heap->Init(this, type, size)) diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index 6e1164a46..5266a704b 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -1245,6 +1245,7 @@ int32 GPUDeviceVulkan::GetOrCreateQueryPool(GPUQueryType type) } PROFILE_CPU_NAMED("Create Create Pool"); + PROFILE_MEM(GraphicsCommands); auto pool = New(this, type == GPUQueryType::Occlusion ? 4096 : 1024, type); QueryPools.Add(pool); return QueryPools.Count() - 1; @@ -1257,6 +1258,7 @@ RenderPassVulkan* GPUDeviceVulkan::GetOrCreateRenderPass(RenderTargetLayoutVulka return renderPass; PROFILE_CPU_NAMED("Create Render Pass"); + PROFILE_MEM(GraphicsCommands); renderPass = New(this, layout); _renderPasses.Add(layout, renderPass); return renderPass; @@ -1269,6 +1271,7 @@ FramebufferVulkan* GPUDeviceVulkan::GetOrCreateFramebuffer(FramebufferVulkan::Ke return framebuffer; PROFILE_CPU_NAMED("Create Framebuffer"); + PROFILE_MEM(GraphicsCommands); framebuffer = New(this, key, extent, layers); _framebuffers.Add(key, framebuffer); return framebuffer; @@ -1281,6 +1284,7 @@ PipelineLayoutVulkan* GPUDeviceVulkan::GetOrCreateLayout(DescriptorSetLayoutInfo return layout; PROFILE_CPU_NAMED("Create Pipeline Layout"); + PROFILE_MEM(GraphicsCommands); layout = New(this, key); _layouts.Add(key, layout); return layout; @@ -2237,6 +2241,7 @@ FenceVulkan* FenceManagerVulkan::AllocateFence(bool createSignaled) } else { + PROFILE_MEM(GraphicsCommands); fence = New(); fence->IsSignaled = createSignaled; VkFenceCreateInfo info; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp index b190d9d86..bf99dfb0c 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp @@ -72,6 +72,7 @@ void GPUContextWebGPU::FrameBegin() GPUContext::FrameBegin(); // Setup + _usedQuerySets = 0; _renderPassDirty = false; _pipelineDirty = false; _bindGroupDirty = false; @@ -424,12 +425,53 @@ void GPUContextWebGPU::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, ui uint64 GPUContextWebGPU::BeginQuery(GPUQueryType type) { - // TODO: impl timer/occlusion queries - return 0; + auto query = _device->AllocateQuery(type); + if (query.Raw) + { + ASSERT_LOW_LAYER(query.Set < WEBGPU_MAX_QUERY_SETS); + auto set = _device->QuerySets[query.Set]; + if (set->Type == GPUQueryType::Timer) + { + // Put a new timestamp write + WriteTimestamp(set, query.Index); + } + else if (_activeOcclusionQuerySet == query.Set && _renderPass) + { + // Begin occlusion query on the active set + wgpuRenderPassEncoderBeginOcclusionQuery(_renderPass, query.Index); + } + else + { + // Set the next pending occlusion query set to use for the next pass (or frame) + _pendingOcclusionQuerySet = query.Set; + } + + // Mark query set as used (to be resolved on the frame end) + static_assert(sizeof(_usedQuerySets) * 8 >= WEBGPU_MAX_QUERY_SETS, "Not enough bits in flags of used queries set."); + _usedQuerySets |= 1u << query.Set; + + } + return query.Raw; } void GPUContextWebGPU::EndQuery(uint64 queryID) { + if (queryID) + { + GPUQueryWebGPU query; + query.Raw = queryID; + auto set = _device->QuerySets[query.Set]; + if (set->Type == GPUQueryType::Timer) + { + // Put a new timestamp write + WriteTimestamp(set, query.Index + 1); + } + else if (_activeOcclusionQuerySet == query.Set && _renderPass) + { + // End occlusion query on the active set + wgpuRenderPassEncoderEndOcclusionQuery(_renderPass); + } + } } void GPUContextWebGPU::SetViewport(const Viewport& viewport) @@ -496,6 +538,18 @@ void GPUContextWebGPU::Flush() if (_renderPass) EndRenderPass(); + // Flush pending actions + FlushTimestamps(); + _pendingTimestampWrites.Clear(); + + // Resolve used queries + for (uint32 setIndex = 0; setIndex < _device->QuerySetsCount; setIndex++) + { + if (_usedQuerySets & (1u << setIndex)) + _device->QuerySets[setIndex]->Resolve(Encoder); + } + _usedQuerySets = 0; + // End commands recording WGPUCommandBufferDescriptor commandBufferDesc = WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT; WGPUCommandBuffer commandBuffer = wgpuCommandEncoderFinish(Encoder, &commandBufferDesc); @@ -724,6 +778,15 @@ void GPUContextWebGPU::CopySubresource(GPUResource* dstResource, uint32 dstSubre } } +void GPUContextWebGPU::WriteTimestamp(GPUQuerySetWebGPU* set, uint32 index) +{ + WGPUPassTimestampWrites write = WGPU_PASS_TIMESTAMP_WRITES_INIT; + write.querySet = set->Set; + write.beginningOfPassWriteIndex = index; + write.endOfPassWriteIndex = 0; // makePassTimestampWrites doesn't pass undefined properly thus it has to be a valid query (index 0 is left as dummy) + _pendingTimestampWrites.Add(write); +} + bool GPUContextWebGPU::FindClear(const GPUTextureViewWebGPU* view, PendingClear& clear) { for (auto& e : _pendingClears) @@ -928,6 +991,15 @@ void GPUContextWebGPU::FlushRenderPass() { _pipelineKey.DepthStencilFormat = WGPUTextureFormat_Undefined; } + if (_pendingOcclusionQuerySet != _activeOcclusionQuerySet) + { + _activeOcclusionQuerySet = _pendingOcclusionQuerySet; + renderPassDesc.occlusionQuerySet = _device->QuerySets[_activeOcclusionQuerySet]->Set; + } + FlushTimestamps(1); + if (_pendingTimestampWrites.HasItems()) + renderPassDesc.timestampWrites = &_pendingTimestampWrites.Last(); + _pendingTimestampWrites.Clear(); ASSERT(attachmentSize.Packed != 0); _renderPass = wgpuCommandEncoderBeginRenderPass(Encoder, &renderPassDesc); ASSERT(_renderPass); @@ -1100,4 +1172,29 @@ void GPUContextWebGPU::FlushBindGroup() } } +void GPUContextWebGPU::FlushTimestamps(int32 skipLast) +{ + for (int32 i = 0; i < _pendingTimestampWrites.Count() - skipLast; i++) + { + // WebGPU timestamps have very bad API design made for single-file examples, not real game engines so drain writes here with dummy render passes + // Also, webgpu.h wrapper doesn't pass timestampWrites as array but just a single item... + WGPURenderPassDescriptor dummyDesc = WGPU_RENDER_PASS_DESCRIPTOR_INIT; + if (!_device->DefaultRenderTarget) + { + _device->DefaultRenderTarget = (GPUTextureWebGPU*)_device->CreateTexture(TEXT("DefaultRenderTarget")); + _device->DefaultRenderTarget->Init(GPUTextureDescription::New2D(1, 1, PixelFormat::R8G8B8A8_UNorm, GPUTextureFlags::RenderTarget)); + } + WGPURenderPassColorAttachment dummyAttachment = WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT; + dummyAttachment.view = ((GPUTextureViewWebGPU*)_device->DefaultRenderTarget->View(0))->ViewRender; + dummyAttachment.loadOp = WGPULoadOp_Clear; + dummyAttachment.storeOp = WGPUStoreOp_Discard; + dummyDesc.colorAttachmentCount = 1; + dummyDesc.colorAttachments = &dummyAttachment; + dummyDesc.timestampWrites = &_pendingTimestampWrites[i]; + auto renderPass = wgpuCommandEncoderBeginRenderPass(Encoder, &dummyDesc); + wgpuRenderPassEncoderEnd(renderPass); + wgpuRenderPassEncoderRelease(renderPass); + } +} + #endif diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h index 29c4c8544..140d96618 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.h @@ -43,6 +43,10 @@ private: GPUDeviceWebGPU* _device; uint32 _minUniformBufferOffsetAlignment; + int32 _activeOcclusionQuerySet = -1; + int32 _pendingOcclusionQuerySet = -1; + uint32 _usedQuerySets = 0; + Array _pendingTimestampWrites; // State tracking uint32 _renderPassDirty : 1; @@ -85,6 +89,7 @@ public: WGPUCommandEncoder Encoder = nullptr; private: + void WriteTimestamp(GPUQuerySetWebGPU* set, uint32 index); bool FindClear(const GPUTextureViewWebGPU* view, PendingClear& clear); void ManualClear(const PendingClear& clear); void OnDrawCall(); @@ -92,6 +97,7 @@ private: void EndRenderPass(); void FlushRenderPass(); void FlushBindGroup(); + void FlushTimestamps(int32 skipLast = 0); public: // [GPUContext] diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp index c8bbb1d1c..585a7a4fa 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp @@ -30,6 +30,132 @@ GPUVertexLayoutWebGPU::GPUVertexLayoutWebGPU(GPUDeviceWebGPU* device, const Elem SetElements(elements, explicitOffsets); } +GPUQuerySetWebGPU::GPUQuerySetWebGPU(WGPUDevice device, GPUQueryType type, uint32 count) + : _device(device) + , _count(count) + , Type(type) +{ + // Timer queries use 2 items for begin/end timestamps + ASSERT_LOW_LAYER(count % 2 == 0 || type != GPUQueryType::Timer); + if (type == GPUQueryType::Timer) + _index = 2; // Skip first item in timer queries due to bug in makePassTimestampWrites that cannot pass undefined value properly + + // Create query set + WGPUQuerySetDescriptor desc = WGPU_QUERY_SET_DESCRIPTOR_INIT; + desc.type = type == GPUQueryType::Timer ? WGPUQueryType_Timestamp : WGPUQueryType_Occlusion; + desc.count = count; + Set = wgpuDeviceCreateQuerySet(device, &desc); + ASSERT(Set); + + // Create buffer for queries data + WGPUBufferDescriptor bufferDesc = WGPU_BUFFER_DESCRIPTOR_INIT; + bufferDesc.size = count * sizeof(uint64); + bufferDesc.usage = WGPUBufferUsage_QueryResolve | WGPUBufferUsage_CopySrc; + _queryBuffer = wgpuDeviceCreateBuffer(device, &bufferDesc); + ASSERT(_queryBuffer); + + // Create buffer for reading copied queries data on CPU + bufferDesc.usage = WGPUBufferUsage_MapRead | WGPUBufferUsage_CopyDst; + _readBuffer = wgpuDeviceCreateBuffer(device, &bufferDesc); + ASSERT(_readBuffer); + +#if COMPILE_WITH_PROFILER + _memorySize = bufferDesc.size * 3; // Set + QueryBuffer + ReadBuffer + PROFILE_MEM_INC(GraphicsCommands, _memorySize); +#endif +} + +GPUQuerySetWebGPU::~GPUQuerySetWebGPU() +{ + PROFILE_MEM_DEC(GraphicsCommands, _memorySize); + wgpuBufferDestroy(_readBuffer); + wgpuBufferRelease(_readBuffer); + wgpuBufferDestroy(_queryBuffer); + wgpuBufferRelease(_queryBuffer); + wgpuQuerySetDestroy(Set); + wgpuQuerySetRelease(Set); +} + +bool GPUQuerySetWebGPU::CanAllocate() const +{ + return _index < _count && (_state == Active || _state == Mapped); +} + +uint32 GPUQuerySetWebGPU::Allocate() +{ + if (_state == Mapped) + { + // Start a new batch from the beginning + wgpuBufferUnmap(_readBuffer); + _state = Active; + _index = 2; + _mapped = nullptr; + } + uint32 index = _index; + _index += Type == GPUQueryType::Timer ? 2 : 1; + return index; +} + +void GPUQuerySetWebGPU::Resolve(WGPUCommandEncoder encoder) +{ + ASSERT(_index != 0 && _state == Active); + wgpuCommandEncoderResolveQuerySet(encoder, Set, 0, _index, _queryBuffer, 0); + wgpuCommandEncoderCopyBufferToBuffer(encoder, _queryBuffer, 0, _readBuffer, 0, _index * sizeof(uint64)); + _state = Resolved; +} + +bool GPUQuerySetWebGPU::Read(uint32 index, uint64& result, bool wait) +{ + if (_state == Resolved) + { + // Start mapping the buffer + ASSERT(!wait); // TODO: impl wgpuBufferMapAsync with waiting (see GPUBufferWebGPU::Map) + WGPUBufferMapCallbackInfo callback = WGPU_BUFFER_MAP_CALLBACK_INFO_INIT; + callback.mode = WGPUCallbackMode_AllowSpontaneous; + callback.userdata1 = this; + callback.callback = [](WGPUMapAsyncStatus status, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) + { + if (status == WGPUMapAsyncStatus_Success) + { + auto set = (GPUQuerySetWebGPU*)userdata1; + set->OnRead(); + } +#if !BUILD_RELEASE + else + { + LOG(Error, "Query Set map failed with status {}, {}", (uint32)status, WEBGPU_TO_STR(message)); + } +#endif + }; + wgpuBufferMapAsync(_readBuffer, WGPUMapMode_Read, 0, _index * sizeof(uint64), callback); + _state = Mapping; + } + else if (_state == Mapped) + { + // Read the results from mapped buffer + if (Type == GPUQueryType::Timer) + { + // Timestamp calculates a difference between two queries (begin/end) in nanoseconds (result is in microseconds) + result = Math::Max(_mapped[index + 1] - _mapped[index], 0ull) / 1000; + } + else + { + // Occlusion outputs number of fragment samples that pass all the tests (scissor, stencil, depth, etc.) + result = _mapped[index]; + } + return true; + } + return false; +} + +void GPUQuerySetWebGPU::OnRead() +{ + // Get mapped buffer pointer + ASSERT(_state == Mapping); + _state = Mapped; + _mapped = (const uint64*)wgpuBufferGetConstMappedRange(_readBuffer, 0, _index * sizeof(uint64)); +} + GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, WGPUBufferUsage usage, uint32 alignment) { // Find a free buffer from the current frame @@ -167,6 +293,7 @@ bool GPUDeviceWebGPU::Init() if (wgpuAdapterGetLimits(Adapter->Adapter, &limits) == WGPUStatus_Success) { MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment; + TimestampQuery = features.Contains(WGPUFeatureName_TimestampQuery); Limits.HasInstancing = true; Limits.HasDrawIndirect = true; Limits.HasDepthAsSRV = true; @@ -174,11 +301,11 @@ bool GPUDeviceWebGPU::Init() Limits.HasDepthClip = features.Contains(WGPUFeatureName_DepthClipControl); Limits.HasReadOnlyDepth = true; Limits.MaximumSamplerAnisotropy = 4; - Limits.MaximumTexture1DSize = Math::Min(GPU_MAX_TEXTURE_SIZE, limits.maxTextureDimension1D); - Limits.MaximumTexture2DSize = Math::Min(GPU_MAX_TEXTURE_SIZE, limits.maxTextureDimension2D); - Limits.MaximumTexture3DSize = Math::Min(GPU_MAX_TEXTURE_SIZE, limits.maxTextureDimension3D); - Limits.MaximumMipLevelsCount = Math::Min(GPU_MAX_TEXTURE_MIP_LEVELS, (int32)log2(limits.maxTextureDimension2D)); - Limits.MaximumTexture1DArraySize = Limits.MaximumTexture2DArraySize = Math::Min(GPU_MAX_TEXTURE_ARRAY_SIZE, limits.maxTextureArrayLayers); + Limits.MaximumTexture1DSize = limits.maxTextureDimension1D; + Limits.MaximumTexture2DSize = limits.maxTextureDimension2D; + Limits.MaximumTexture3DSize = limits.maxTextureDimension3D; + Limits.MaximumMipLevelsCount = (int32)log2(limits.maxTextureDimension2D); + Limits.MaximumTexture1DArraySize = Limits.MaximumTexture2DArraySize = limits.maxTextureArrayLayers; if (limits.maxTextureArrayLayers >= 6) Limits.MaximumTextureCubeSize = Limits.MaximumTexture2DSize; @@ -624,7 +751,11 @@ void GPUDeviceWebGPU::Dispose() preDispose(); // Clear device resources + for (int32 i = 0; i < QuerySetsCount; i++) + Delete(QuerySets[i]); + QuerySetsCount = 0; DataUploader.ReleaseGPU(); + SAFE_DELETE_GPU_RESOURCE(DefaultRenderTarget); SAFE_DELETE_GPU_RESOURCES(DefaultTexture); SAFE_DELETE_GPU_RESOURCES(DefaultSamplers); SAFE_DELETE(_mainContext); @@ -653,12 +784,68 @@ void GPUDeviceWebGPU::Dispose() void GPUDeviceWebGPU::WaitForGPU() { + // TODO: this could use onSubmittedWorkDone (assuming any submit has been already done) +} + +GPUQueryWebGPU GPUDeviceWebGPU::AllocateQuery(GPUQueryType type) +{ + // Ignore if device doesn't support timer queries + if (type == GPUQueryType::Timer && !TimestampQuery) + return {}; + + // Get query set with free space + int32 setIndex = 0; + for (; setIndex < QuerySetsCount; setIndex++) + { + auto heap = QuerySets[setIndex]; + if (heap->Type == type && heap->CanAllocate()) + break; + } + if (setIndex == QuerySetsCount) + { + if (setIndex == WEBGPU_MAX_QUERY_SETS) + { +#if !BUILD_RELEASE + static bool SingleTimeLog = true; + if (SingleTimeLog) + { + SingleTimeLog = false; + LOG(Error, "Run out of the query sets capacity."); + } +#endif + return {}; + } + + // Allocate a new query heap + PROFILE_MEM(GraphicsCommands); + uint32 size = type == GPUQueryType::Occlusion ? 4096 : 1024; + auto set = New(Device, type, size); + QuerySets[QuerySetsCount++] = set; + } + + // Allocate query from the set + GPUQueryWebGPU query; + { + static_assert(sizeof(GPUQueryWebGPU) == sizeof(uint64), "Invalid WebGPU query size."); + query.Set = setIndex; + query.Index = QuerySets[setIndex]->Allocate(); + } + return query; } bool GPUDeviceWebGPU::GetQueryResult(uint64 queryID, uint64& result, bool wait) { - // TODO: impl queries - return false; + if (queryID == 0) + { + // Invalid query + result = 0; + return true; + } + + GPUQueryWebGPU query; + query.Raw = queryID; + auto set = QuerySets[query.Set]; + return set->Read(query.Index, result, wait); } GPUTexture* GPUDeviceWebGPU::CreateTexture(const StringView& name) diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h index d87957775..5b56126ac 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.h @@ -30,6 +30,62 @@ namespace GPUBindGroupsWebGPU }; }; +/// +/// GPU query ID packed into 64-bits. +/// +struct GPUQueryWebGPU +{ + union + { + struct + { + uint32 Set; + uint32 Index; + }; + uint64 Raw; + }; +}; + +/// +/// Set of GPU queries allocated in batch with functionality to read results via a separate CPU buffer. +/// +class GPUQuerySetWebGPU +{ +private: + WGPUDevice _device; + uint32 _count; + uint32 _index = 0; + enum States + { + Active, + Resolved, + Mapping, + Mapped, + } _state = Active; +#if COMPILE_WITH_PROFILER + uint64 _memorySize; +#endif + WGPUBuffer _queryBuffer; + WGPUBuffer _readBuffer; + const uint64* _mapped = nullptr; + +public: + const GPUQueryType Type; + WGPUQuerySet Set; + +public: + GPUQuerySetWebGPU(WGPUDevice device, GPUQueryType type, uint32 count); + ~GPUQuerySetWebGPU(); + + bool CanAllocate() const; + uint32 Allocate(); + void Resolve(WGPUCommandEncoder encoder); + bool Read(uint32 index, uint64& result, bool wait); + +private: + void OnRead(); +}; + /// /// Pool for uploading data to GPU buffers. It manages large buffers and suballocates for multiple small updates, minimizing the number of buffer creations and copies. /// @@ -79,11 +135,17 @@ public: WGPUInstance WebGPUInstance; WGPUDevice Device = nullptr; WGPUQueue Queue = nullptr; + GPUTextureWebGPU* DefaultRenderTarget = nullptr; GPUSamplerWebGPU* DefaultSamplers[6] = {}; GPUTextureWebGPU* DefaultTexture[10] = {}; WGPUBuffer DefaultBuffer = nullptr; GPUDataUploaderWebGPU DataUploader; uint32 MinUniformBufferOffsetAlignment = 1; + bool TimestampQuery = false; + uint32 QuerySetsCount = 0; + GPUQuerySetWebGPU* QuerySets[WEBGPU_MAX_QUERY_SETS] = {}; + + GPUQueryWebGPU AllocateQuery(GPUQueryType type); public: // [GPUDeviceDX] diff --git a/Source/Engine/GraphicsDevice/WebGPU/IncludeWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/IncludeWebGPU.h index a7f3a4ec7..06c3b3d9f 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/IncludeWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/IncludeWebGPU.h @@ -23,4 +23,6 @@ // Utiltiy macro to get WGPUStringView for a text constant #define WEBGPU_STR(str) { str, ARRAY_COUNT(str) - 1 } +#define WEBGPU_MAX_QUERY_SETS 8 + #endif diff --git a/Source/Engine/Profiler/ProfilerGPU.cpp b/Source/Engine/Profiler/ProfilerGPU.cpp index 1ede560f7..2cf40afb2 100644 --- a/Source/Engine/Profiler/ProfilerGPU.cpp +++ b/Source/Engine/Profiler/ProfilerGPU.cpp @@ -398,6 +398,7 @@ void GraphicsDumping::Print() auto& draw = Items[0]; { // The root item is always the drawing by engine + draw.Time = Math::Max(draw.Time, 0.000001f); if (draw.Count == 1) sb.AppendFormat(TEXT(" Frame time: {} ms ({} FPS)"), Utilities::RoundTo2DecimalPlaces(draw.Time), (int32)(1000.0f / draw.Time)).AppendLine(); else diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.cpp b/Source/Engine/Renderer/AmbientOcclusionPass.cpp index 437b3077d..07a22e8f0 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.cpp +++ b/Source/Engine/Renderer/AmbientOcclusionPass.cpp @@ -230,6 +230,9 @@ void AmbientOcclusionPass::Render(RenderContext& renderContext) Math::Min(renderContext.Buffers->GetWidth(), renderContext.Buffers->GetHeight()) < 16 || checkIfSkipPass()) return; + auto device = GPUDevice::Instance; + if (device->Limits.MaximumTexture2DArraySize < 4) + return; PROFILE_GPU_CPU("Ambient Occlusion"); settings = ASSAO_Settings(); @@ -270,7 +273,6 @@ void AmbientOcclusionPass::Render(RenderContext& renderContext) settings.SkipHalfPixels = true; // Cache data - auto device = GPUDevice::Instance; auto context = device->GetMainContext(); int32 m_sizeX = renderContext.Buffers->GetWidth(); int32 m_sizeY = renderContext.Buffers->GetHeight();