From 9ac231c40319fcdbfc7f3a5d9287b87a8c01900b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 16 Jan 2026 10:40:30 +0100 Subject: [PATCH] Add new GPU Query API that is lightweight and supports occlusion queries --- Source/Engine/Graphics/Enums.h | 18 +++ Source/Engine/Graphics/GPUContext.h | 15 +++ Source/Engine/Graphics/GPUDevice.h | 10 ++ Source/Engine/Graphics/GPUResource.h | 2 +- Source/Engine/Graphics/Graphics.Build.cs | 3 +- .../DirectX/DX11/GPUContextDX11.cpp | 75 +++++++++++ .../DirectX/DX11/GPUContextDX11.h | 2 + .../DirectX/DX11/GPUDeviceDX11.cpp | 96 ++++++++++++++ .../DirectX/DX11/GPUDeviceDX11.h | 35 +++++ .../DirectX/DX12/GPUContextDX12.cpp | 25 ++++ .../DirectX/DX12/GPUContextDX12.h | 2 + .../DirectX/DX12/GPUDeviceDX12.cpp | 78 +++++++++-- .../DirectX/DX12/GPUDeviceDX12.h | 13 +- .../DirectX/DX12/GPUTimerQueryDX12.cpp | 37 +----- .../DirectX/DX12/GPUTimerQueryDX12.h | 4 +- .../DirectX/DX12/QueryHeapDX12.cpp | 108 +++++++--------- .../DirectX/DX12/QueryHeapDX12.h | 121 +++++++++--------- .../GraphicsDevice/Null/GPUContextNull.h | 9 ++ .../GraphicsDevice/Null/GPUDeviceNull.cpp | 5 + .../GraphicsDevice/Null/GPUDeviceNull.h | 4 +- .../GraphicsDevice/Vulkan/CmdBufferVulkan.cpp | 61 ++++++--- .../GraphicsDevice/Vulkan/CmdBufferVulkan.h | 23 ++-- Source/Engine/GraphicsDevice/Vulkan/Config.h | 10 +- .../Vulkan/GPUContextVulkan.cpp | 66 ++++++++++ .../GraphicsDevice/Vulkan/GPUContextVulkan.h | 2 + .../GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp | 114 ++++++++++++++--- .../GraphicsDevice/Vulkan/GPUDeviceVulkan.h | 35 +++-- .../Vulkan/GPUTimerQueryVulkan.cpp | 96 +++++++++++--- .../Vulkan/GPUTimerQueryVulkan.h | 10 ++ .../Vulkan/Mac/MacVulkanPlatform.h | 2 +- .../Vulkan/iOS/iOSVulkanPlatform.h | 2 +- 31 files changed, 829 insertions(+), 254 deletions(-) diff --git a/Source/Engine/Graphics/Enums.h b/Source/Engine/Graphics/Enums.h index 107fe3533..96a9fed7b 100644 --- a/Source/Engine/Graphics/Enums.h +++ b/Source/Engine/Graphics/Enums.h @@ -349,6 +349,24 @@ API_ENUM(Attributes="Flags") enum class GPUResourceMapMode DECLARE_ENUM_OPERATORS(GPUResourceMapMode); +/// +/// GPU resources types. +/// +enum class GPUQueryType +{ + /// + /// Measures duration of GPU commands execution. Returns time in microseconds (1/1000 ms). + /// + Timer = 0, + + /// + /// Tests object visibility by counting number of pixel samples that are not culled (by depth or stencil tests). + /// + Occlusion = 1, + + MAX +}; + /// /// Primitives types. /// diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index 1144d6f49..5d1c3a020 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -28,6 +28,7 @@ class GPUBufferView; class GPUVertexLayout; struct GPUPass; enum class GPUResourceAccess; +enum class GPUQueryType; // Gets the GPU texture view. Checks if pointer is not null and texture has one or more mip levels loaded. #define GET_TEXTURE_VIEW_SAFE(t) (t && t->ResidentMipLevels() > 0 ? t->View() : nullptr) @@ -554,6 +555,20 @@ public: /// The aligned byte offset for arguments. API_FUNCTION() virtual void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) = 0; +public: + /// + /// Begins the GPU query that will measure commands until EndQuery. + /// + /// Query type. + /// Unique identifier of the query used to EndQuery and then GetQueryResult to read the query result data. + virtual uint64 BeginQuery(GPUQueryType type) = 0; + + /// + /// Ends the GPU query. Use GPUDevice::GetQueryResult to read the results back. + /// + /// Query identifier returned by BeginQuery. + virtual void EndQuery(uint64 queryID) = 0; + public: /// /// Sets the rendering viewport and scissor rectangle. diff --git a/Source/Engine/Graphics/GPUDevice.h b/Source/Engine/Graphics/GPUDevice.h index 8f9393a5b..d424ec67d 100644 --- a/Source/Engine/Graphics/GPUDevice.h +++ b/Source/Engine/Graphics/GPUDevice.h @@ -370,6 +370,16 @@ public: /// virtual void WaitForGPU() = 0; + /// + /// Reads the query result from the GPU. + /// + /// GPU query results are short-lived, meaning that in the frame that results are ready, they won't be available in the next frame, as queries are reused. + /// Query identifier returned by GPUContext::BeginQuery. + /// The output result data of the query. Valid only when function returns true. + /// True if wait for the GPU to end processing commands for sync data ready. Otherwise, if query is incomplete then function will return value of false without result. + /// True if got valid query result, otherwise false. If called with wait enabled then device failed to readback the query data. + virtual bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) = 0; + public: void AddResource(GPUResource* resource); void RemoveResource(GPUResource* resource); diff --git a/Source/Engine/Graphics/GPUResource.h b/Source/Engine/Graphics/GPUResource.h index d612a846d..1b6178036 100644 --- a/Source/Engine/Graphics/GPUResource.h +++ b/Source/Engine/Graphics/GPUResource.h @@ -32,7 +32,7 @@ API_ENUM() enum class GPUResourceType PipelineState, // GPU binding descriptor Descriptor, - // GPU timer query + // GPU timer or occlusion query Query, // GPU texture sampler Sampler, diff --git a/Source/Engine/Graphics/Graphics.Build.cs b/Source/Engine/Graphics/Graphics.Build.cs index 1c59349ec..51b37144b 100644 --- a/Source/Engine/Graphics/Graphics.Build.cs +++ b/Source/Engine/Graphics/Graphics.Build.cs @@ -40,6 +40,7 @@ public abstract class GraphicsDeviceBaseModule : EngineModule public class Graphics : EngineModule { private static bool _logMissingVulkanSDK; + private static bool _logMissingWindowsSDK; /// public override void Setup(BuildOptions options) @@ -59,7 +60,7 @@ public class Graphics : EngineModule if (windowsToolchain != null && windowsToolchain.SDK != Flax.Build.Platforms.WindowsPlatformSDK.v8_1) options.PrivateDependencies.Add("GraphicsDeviceDX12"); else - Log.WarningOnce(string.Format("Building for {0} without Vulkan rendering backend (Vulkan SDK is missing)", options.Platform.Target), ref _logMissingVulkanSDK); + Log.WarningOnce(string.Format("Building for {0} without D3D12 rendering backend (Windows SDK is missing)", options.Platform.Target), ref _logMissingWindowsSDK); break; case TargetPlatform.UWP: options.PrivateDependencies.Add("GraphicsDeviceDX11"); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index f623f53b5..92a957ffd 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -566,6 +566,81 @@ void GPUContextDX11::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint RENDER_STAT_DRAW_CALL(0, 0); } +uint64 GPUContextDX11::BeginQuery(GPUQueryType type) +{ + // Allocate a pooled query + uint16 queryIndex; + static_assert(ARRAY_COUNT(_device->_readyQueries) == (int32)GPUQueryType::MAX, "Invalid query types count"); + if (_device->_readyQueries[(int32)type].HasItems()) + { + // Use query from cached list + queryIndex = _device->_readyQueries[(int32)type].Pop(); + } + else + { + // Add a new query + queryIndex = _device->_queries.Count(); + auto& query = _device->_queries.AddOne(); + query.Type = type; + D3D11_QUERY_DESC queryDesc; + queryDesc.Query = D3D11_QUERY_TIMESTAMP; + queryDesc.MiscFlags = 0; + HRESULT hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.Query); + LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0); + if (type == GPUQueryType::Timer) + { + // Timer queries need additional one for begin and end disjoint + hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.TimerBeginQuery); + LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0); + queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.DisjointQuery); + LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0); + } + } + static_assert(sizeof(GPUQueryDX11) == sizeof(uint64), "Invalid query size."); + GPUQueryDX11 q = {}; + q.Type = (uint16)type; + q.Index = queryIndex; + q.Padding = 1; // Ensure Raw is never 0, even for the first query + + // Begin query + { + auto& query = _device->_queries[queryIndex]; + ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Ready); + ASSERT_LOW_LAYER(query.Type == type); + query.State = GPUQueryDataDX11::Active; + auto context = _device->GetIM(); + if (type == GPUQueryType::Timer) + { + context->Begin(query.DisjointQuery); + context->End(query.TimerBeginQuery); + } + else + { + context->Begin(query.Query); + } + } + + return q.Raw; +} + +void GPUContextDX11::EndQuery(uint64 queryID) +{ + if (!queryID) + return; + + // End query + GPUQueryDX11 q; + q.Raw = queryID; + auto& query = _device->_queries[q.Index]; + auto context = _device->GetIM(); + context->End(query.Query); + if (q.Type == (uint16)GPUQueryType::Timer) + { + context->End(query.DisjointQuery); + } +} + void GPUContextDX11::SetViewport(const Viewport& viewport) { _context->RSSetViewports(1, (D3D11_VIEWPORT*)&viewport); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index eee2699df..5e3c14e9e 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -154,6 +154,8 @@ public: void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override; void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override; void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override; + uint64 BeginQuery(GPUQueryType type) override; + void EndQuery(uint64 queryID) override; void SetViewport(const Viewport& viewport) override; void SetScissor(const Rectangle& scissorRect) override; GPUPipelineState* GetState() const override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index b187ed3c7..2d8a0f8c7 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -175,6 +175,15 @@ GPUVertexLayoutDX11::GPUVertexLayoutDX11(GPUDeviceDX11* device, const Elements& } } +void GPUQueryDataDX11::Release() +{ + SAFE_RELEASE(Query); + SAFE_RELEASE(TimerBeginQuery); + SAFE_RELEASE(DisjointQuery); + Result = 0; + State = Ready; +} + GPUDevice* GPUDeviceDX11::Create() { // Configuration @@ -801,6 +810,11 @@ void GPUDeviceDX11::Dispose() { SAFE_RELEASE(RasterizerStates[i]); } + for (auto& query : _queries) + query.Release(); + _queries.Clear(); + for (auto& e : _readyQueries) + e.Clear(); // Clear DirectX stuff SAFE_DELETE(_mainContext); @@ -877,6 +891,88 @@ void GPUDeviceDX11::DrawEnd() infoQueue->ClearStoredMessages(); } #endif + + // Auto-return finished queries back to the pool + auto* queries = _queries.Get(); + int32 queriesCount = _queries.Count(); + for (int32 i = 0; i < queriesCount; i++) + { + auto& query = queries[i]; + if (query.State == GPUQueryDataDX11::Finished) + { + query.State = GPUQueryDataDX11::Ready; + query.Result = 0; + _readyQueries[(int32)query.Type].Push(i); + } + } +} + +bool GPUDeviceDX11::GetQueryResult(uint64 queryID, uint64& result, bool wait) +{ + if (!queryID) + return false; + + GPUQueryDX11 q; + q.Raw = queryID; + auto& query = _queries[q.Index]; + if (query.State == GPUQueryDataDX11::Finished) + { + // Use resolved result + result = query.Result; + return true; + } + auto context = GetIM(); + +RETRY: + bool hasData; + if (q.Type == (uint16)GPUQueryType::Timer) + { + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; + hasData = context->GetData(query.DisjointQuery, &disjointData, sizeof(disjointData), 0) == S_OK; + if (hasData) + { + UINT64 timeBegin = 0, timeEnd = 0; + context->GetData(query.TimerBeginQuery, &timeBegin, sizeof(timeBegin), 0); + context->GetData(query.Query, &timeEnd, sizeof(timeEnd), 0); + + if (disjointData.Disjoint == FALSE) + { + result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / disjointData.Frequency : 0; + } + else + { + result = 0; +#if !BUILD_RELEASE + static bool LogOnce = true; + if (LogOnce) + { + LogOnce = false; + LOG(Warning, "Unreliable GPU timer query detected."); + } +#endif + } + } + } + else + { + hasData = context->GetData(query.Query, &result, sizeof(uint64), 0) == S_OK; + } + + if (!hasData && wait) + { + // Wait until data is ready + Platform::Yield(); + goto RETRY; + } + + if (hasData) + { + // Query has valid data now (until auto-recycle back to pool) + query.State = GPUQueryDataDX11::Finished; + query.Result = result; + } + + return hasData; } GPUTexture* GPUDeviceDX11::CreateTexture(const StringView& name) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h index 403a10a4c..9657ebc59 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h @@ -15,6 +15,38 @@ enum class StencilOperation : byte; class GPUContextDX11; class GPUSwapChainDX11; +/// +/// GPU query ID packed into 64-bits. +/// +struct GPUQueryDX11 +{ + union + { + struct + { + uint16 Type; + uint16 Index; + uint32 Padding; + }; + uint64 Raw; + }; +}; + +/// +/// GPU query data (reusable via pooling). +/// +struct GPUQueryDataDX11 +{ + ID3D11Query* Query = nullptr; + ID3D11Query* TimerBeginQuery = nullptr; + ID3D11Query* DisjointQuery = nullptr; + uint64 Result = 0; + enum States { Ready, Active, Finished } State = Ready; + GPUQueryType Type = GPUQueryType::MAX; + + void Release(); +}; + /// /// Implementation of Graphics Device for DirectX 11 backend. /// @@ -60,6 +92,8 @@ private: GPUContextDX11* _mainContext = nullptr; bool _allowTearing = false; GPUBuffer* _dummyVB = nullptr; + Array _queries; + Array _readyQueries[2]; // Timer and Occlusion // Static Samplers ID3D11SamplerState* _samplerLinearClamp = nullptr; @@ -124,6 +158,7 @@ public: void Dispose() override; void WaitForGPU() override; void DrawEnd() override; + bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override; GPUTexture* CreateTexture(const StringView& name) override; GPUShader* CreateShader(const StringView& name) override; GPUPipelineState* CreatePipelineState() override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 98143c7c3..eb654bb07 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -1275,6 +1275,31 @@ void GPUContextDX12::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint RENDER_STAT_DRAW_CALL(0, 0); } +uint64 GPUContextDX12::BeginQuery(GPUQueryType type) +{ + auto query = _device->AllocQuery(type); + if (query.Raw) + { + auto heap = _device->QueryHeaps[query.Heap]; + if (type == GPUQueryType::Timer) // Timer queries call End twice on different queries to calculate duration between GPU time clocks + _commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.SecondaryElement); + else + _commandList->BeginQuery(heap->QueryHeap, heap->QueryType, query.Element); + } + return query.Raw; +} + +void GPUContextDX12::EndQuery(uint64 queryID) +{ + if (queryID) + { + GPUQueryDX12 query; + query.Raw = queryID; + auto heap = _device->QueryHeaps[query.Heap]; + _commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.Element); + } +} + void GPUContextDX12::SetViewport(const Viewport& viewport) { _commandList->RSSetViewports(1, (D3D12_VIEWPORT*)&viewport); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h index 51f24f4a6..8c13b8ce4 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h @@ -197,6 +197,8 @@ public: void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override; void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override; void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override; + uint64 BeginQuery(GPUQueryType type) override; + void EndQuery(uint64 queryID) override; void SetViewport(const Viewport& viewport) override; void SetScissor(const Rectangle& scissorRect) override; GPUPipelineState* GetState() const override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index 4b9298b6c..447ff14a0 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -555,7 +555,6 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter) , _commandQueue(nullptr) , _mainContext(nullptr) , UploadBuffer(this) - , TimestampQueryHeap(this, D3D12_QUERY_HEAP_TYPE_TIMESTAMP, DX12_BACK_BUFFER_COUNT * 1024) , Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false) , Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false) , Heap_DSV(this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 64, false) @@ -883,9 +882,6 @@ bool GPUDeviceDX12::Init() VALIDATE_DIRECTX_CALL(_device->CreateRootSignature(0, signatureBlob->GetBufferPointer(), signatureBlob->GetBufferSize(), IID_PPV_ARGS(&_rootSignature))); } - if (TimestampQueryHeap.Init()) - return true; - // Cached command signatures { DrawIndirectCommandSignature = New(this, 1); @@ -927,8 +923,9 @@ void GPUDeviceDX12::RenderEnd() // Base GPUDeviceDX::RenderEnd(); - // Resolve the timestamp queries - TimestampQueryHeap.EndQueryBatchAndResolveQueryData(_mainContext); + // Resolve the queries + for (auto heap : QueryHeaps) + heap->EndQueryBatchAndResolveQueryData(_mainContext); } GPUDeviceDX12::~GPUDeviceDX12() @@ -957,11 +954,47 @@ ID3D12CommandQueue* GPUDeviceDX12::GetCommandQueueDX12() const return _commandQueue->GetCommandQueue(); } +GPUQueryDX12 GPUDeviceDX12::AllocQuery(GPUQueryType type) +{ + // Get query heap with free space + int32 heapIndex = 0; + int32 count = GPUQueryDX12::GetQueriesCount(type); + for (; heapIndex < QueryHeaps.Count(); heapIndex++) + { + auto heap = QueryHeaps[heapIndex]; + if (heap->Type == type && heap->CanAlloc(count)) + break; + } + if (heapIndex == QueryHeaps.Count()) + { + // Allocate a new query heap + auto heap = New(); + int32 size = type == GPUQueryType::Occlusion ? 4096 : 1024; + if (heap->Init(this, type, size)) + { + Delete(heap); + return {}; + } + QueryHeaps.Add(heap); + } + + // Alloc query from the heap + GPUQueryDX12 query = {}; + { + static_assert(sizeof(GPUQueryDX12) == sizeof(uint64), "Invalid DX12 query size."); + query.Type = (uint16)type; + query.Heap = heapIndex; + auto heap = QueryHeaps[heapIndex]; + heap->Alloc(query.Element); + if (count == 2) + heap->Alloc(query.SecondaryElement); + } + return query; +} + void GPUDeviceDX12::Dispose() { GPUDeviceLock lock(this); - - // Check if has been disposed already if (_state == DeviceState::Disposed) return; @@ -982,7 +1015,12 @@ void GPUDeviceDX12::Dispose() for (auto& srv : _nullSrv) srv.Release(); _nullUav.Release(); - TimestampQueryHeap.Destroy(); + for (auto* heap : QueryHeaps) + { + heap->Destroy(); + Delete(heap); + } + QueryHeaps.Clear(); DX_SAFE_RELEASE_CHECK(_rootSignature, 0); Heap_CBV_SRV_UAV.ReleaseGPU(); Heap_RTV.ReleaseGPU(); @@ -1012,6 +1050,28 @@ void GPUDeviceDX12::WaitForGPU() _commandQueue->WaitForGPU(); } +bool GPUDeviceDX12::GetQueryResult(uint64 queryID, uint64& result, bool wait) +{ + GPUQueryDX12 query; + query.Raw = queryID; + auto heap = QueryHeaps[query.Heap]; + int32 count = GPUQueryDX12::GetQueriesCount((GPUQueryType)query.Type); + if (!wait && (!heap->IsReady(query.Element) || (count != 2 || !heap->IsReady(query.SecondaryElement)))) + return false; + if (query.Type == (uint16)GPUQueryType::Timer) + { + uint64 timestampFrequency = 1; + const uint64 timeBegin = *(uint64*)heap->Resolve(query.SecondaryElement); + const uint64 timeEnd = *(uint64*)heap->Resolve(query.Element, ×tampFrequency); + result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / timestampFrequency : 0; + } + else + { + result = *(uint64*)heap->Resolve(query.Element); + } + return true; +} + GPUTexture* GPUDeviceDX12::CreateTexture(const StringView& name) { PROFILE_MEM(GraphicsTextures); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h index e9c1cacaa..582af6109 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h @@ -65,21 +65,13 @@ public: ~GPUDeviceDX12(); public: - /// - /// Data uploading utility via pages. - /// UploadBufferDX12 UploadBuffer; - - /// - /// The timestamp queries heap. - /// - QueryHeapDX12 TimestampQueryHeap; - bool AllowTearing = false; CommandSignatureDX12* DispatchIndirectCommandSignature = nullptr; CommandSignatureDX12* DrawIndexedIndirectCommandSignature = nullptr; CommandSignatureDX12* DrawIndirectCommandSignature = nullptr; GPUBuffer* DummyVB = nullptr; + Array> QueryHeaps; D3D12_CPU_DESCRIPTOR_HANDLE NullSRV(D3D12_SRV_DIMENSION dimension) const; D3D12_CPU_DESCRIPTOR_HANDLE NullUAV() const; @@ -136,6 +128,8 @@ public: return _mainContext; } + GPUQueryDX12 AllocQuery(GPUQueryType type); + public: DescriptorHeapPoolDX12 Heap_CBV_SRV_UAV; @@ -185,6 +179,7 @@ public: void RenderEnd() override; void Dispose() final override; void WaitForGPU() override; + bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override; GPUTexture* CreateTexture(const StringView& name) override; GPUShader* CreateShader(const StringView& name) override; GPUPipelineState* CreatePipelineState() override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp index ca19ebc2b..c64f0a3c9 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp @@ -20,9 +20,7 @@ void GPUTimerQueryDX12::OnReleaseGPU() void GPUTimerQueryDX12::Begin() { const auto context = _device->GetMainContextDX12(); - auto& heap = _device->TimestampQueryHeap; - heap.EndQuery(context, _begin); - + _query = context->BeginQuery(GPUQueryType::Timer); _hasResult = false; _endCalled = false; } @@ -31,14 +29,8 @@ void GPUTimerQueryDX12::End() { if (_endCalled) return; - const auto context = _device->GetMainContextDX12(); - auto& heap = _device->TimestampQueryHeap; - heap.EndQuery(context, _end); - - const auto queue = _device->GetCommandQueue()->GetCommandQueue(); - VALIDATE_DIRECTX_CALL(queue->GetTimestampFrequency(&_gpuFrequency)); - + context->EndQuery(_query); _endCalled = true; } @@ -48,33 +40,16 @@ bool GPUTimerQueryDX12::HasResult() return false; if (_hasResult) return true; - - auto& heap = _device->TimestampQueryHeap; - return heap.IsReady(_end) && heap.IsReady(_begin); + uint64 result; + return _device->GetQueryResult(_query, result, false); } float GPUTimerQueryDX12::GetResult() { if (_hasResult) - { return _timeDelta; - } - - const uint64 timeBegin = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_begin); - const uint64 timeEnd = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_end); - - // Calculate event duration in milliseconds - if (timeEnd > timeBegin) - { - const uint64 delta = timeEnd - timeBegin; - const double frequency = double(_gpuFrequency); - _timeDelta = static_cast((delta / frequency) * 1000.0); - } - else - { - _timeDelta = 0.0f; - } - + uint64 result; + _timeDelta = _device->GetQueryResult(_query, result, true) ? (float)((double)result / 1000.0) : 0.0f; _hasResult = true; return _timeDelta; } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h index 22e8713d5..d43a366c1 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h @@ -17,9 +17,7 @@ private: bool _hasResult = false; bool _endCalled = false; float _timeDelta = 0.0f; - uint64 _gpuFrequency = 0; - QueryHeapDX12::ElementHandle _begin; - QueryHeapDX12::ElementHandle _end; + uint64 _query = 0; public: diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp index 5977a9404..5a24c5b4c 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp @@ -7,42 +7,34 @@ #include "GPUContextDX12.h" #include "../RenderToolsDX.h" -QueryHeapDX12::QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount) - : _device(device) - , _queryHeap(nullptr) - , _resultBuffer(nullptr) - , _queryHeapType(queryHeapType) - , _currentIndex(0) - , _queryHeapCount(queryHeapCount) +bool QueryHeapDX12::Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size) { - if (queryHeapType == D3D12_QUERY_HEAP_TYPE_OCCLUSION) - { - _resultSize = sizeof(uint64); - _queryType = D3D12_QUERY_TYPE_OCCLUSION; - } - else if (queryHeapType == D3D12_QUERY_HEAP_TYPE_TIMESTAMP) - { - _resultSize = sizeof(uint64); - _queryType = D3D12_QUERY_TYPE_TIMESTAMP; - } - else - { - MISSING_CODE("Not support D3D12 query heap type."); - } -} - -bool QueryHeapDX12::Init() -{ - _resultData.Resize(_resultSize * _queryHeapCount); - // Create the query heap - D3D12_QUERY_HEAP_DESC heapDesc; - heapDesc.Type = _queryHeapType; + Type = type; + _device = device; + _queryHeapCount = size; + D3D12_QUERY_HEAP_DESC heapDesc = {}; heapDesc.Count = _queryHeapCount; heapDesc.NodeMask = 0; - HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&_queryHeap)); + switch (type) + { + case GPUQueryType::Timer: + _resultSize = sizeof(uint64); + QueryType = D3D12_QUERY_TYPE_TIMESTAMP; + heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + break; + case GPUQueryType::Occlusion: + _resultSize = sizeof(uint64); + QueryType = D3D12_QUERY_TYPE_OCCLUSION; + heapDesc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION; + break; + case GPUQueryType::MAX: + return true; + } + _resultData.Resize(_resultSize * _queryHeapCount); + HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&QueryHeap)); LOG_DIRECTX_RESULT_WITH_RETURN(result, true); - DX_SET_DEBUG_NAME(_queryHeap, "Query Heap"); + DX_SET_DEBUG_NAME(QueryHeap, "Query Heap"); // Create the result buffer D3D12_HEAP_PROPERTIES heapProperties; @@ -77,8 +69,8 @@ bool QueryHeapDX12::Init() void QueryHeapDX12::Destroy() { SAFE_RELEASE(_resultBuffer); - SAFE_RELEASE(_queryHeap); - _currentBatch.Clear(); + SAFE_RELEASE(QueryHeap); + _currentBatch = QueryBatch(); _resultData.SetCapacity(0); } @@ -92,45 +84,36 @@ void QueryHeapDX12::EndQueryBatchAndResolveQueryData(GPUContextDX12* context) _currentBatch.Open = false; // Resolve the batch - const int32 offset = _currentBatch.Start * _resultSize; - context->GetCommandList()->ResolveQueryData(_queryHeap, _queryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset); - _currentBatch.Sync = _device->GetCommandQueue()->GetSyncPoint(); + const uint32 offset = _currentBatch.Start * _resultSize; + context->GetCommandList()->ResolveQueryData(QueryHeap, QueryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset); + const auto queue = _device->GetCommandQueue(); + _currentBatch.Sync = queue->GetSyncPoint(); + + // Get GPU clock frequency for timer queries + if (Type == GPUQueryType::Timer) + { + VALIDATE_DIRECTX_CALL(queue->GetCommandQueue()->GetTimestampFrequency(&_currentBatch.TimestampFrequency)); + } // Begin a new query batch _batches.Add(_currentBatch); StartQueryBatch(); } -void QueryHeapDX12::AllocQuery(GPUContextDX12* context, ElementHandle& handle) +bool QueryHeapDX12::CanAlloc(int32 count) const +{ + return _currentBatch.Open && _currentIndex + count <= GetQueryHeapCount(); +} + +void QueryHeapDX12::Alloc(ElementHandle& handle) { ASSERT(_currentBatch.Open); - // Check if need to start from the buffer head - if (_currentIndex >= GetQueryHeapCount()) - { - // We're in the middle of a batch, but we're at the end of the heap so split the batch in two - EndQueryBatchAndResolveQueryData(context); - } - // Allocate element into the current batch handle = _currentIndex++; _currentBatch.Count++; } -void QueryHeapDX12::BeginQuery(GPUContextDX12* context, ElementHandle& handle) -{ - AllocQuery(context, handle); - - context->GetCommandList()->BeginQuery(_queryHeap, _queryType, handle); -} - -void QueryHeapDX12::EndQuery(GPUContextDX12* context, ElementHandle& handle) -{ - AllocQuery(context, handle); - - context->GetCommandList()->EndQuery(_queryHeap, _queryType, handle); -} - bool QueryHeapDX12::IsReady(ElementHandle& handle) { // Current batch is not ready (not ended) @@ -150,7 +133,7 @@ bool QueryHeapDX12::IsReady(ElementHandle& handle) return true; } -void* QueryHeapDX12::ResolveQuery(ElementHandle& handle) +void* QueryHeapDX12::Resolve(ElementHandle& handle, uint64* timestampFrequency) { // Prevent queries from the current batch ASSERT(!_currentBatch.ContainsElement(handle)); @@ -192,10 +175,15 @@ void* QueryHeapDX12::ResolveQuery(ElementHandle& handle) // All elements got its results so we can remove this batch _batches.RemoveAt(i); + // Cache timestamps frequency for later + _timestampFrequency = batch.TimestampFrequency; + break; } } + if (timestampFrequency) + *timestampFrequency = _timestampFrequency; return _resultData.Get() + handle * _resultSize; } @@ -204,7 +192,7 @@ void QueryHeapDX12::StartQueryBatch() ASSERT(!_currentBatch.Open); // Clear the current batch - _currentBatch.Clear(); + _currentBatch = QueryBatch(); // Loop active index on overflow if (_currentIndex >= GetQueryHeapCount()) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h index 7e68502e3..e8ab92a73 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h @@ -10,6 +10,31 @@ class GPUContextDX12; class GPUBuffer; #include "CommandQueueDX12.h" +#include "Engine/Graphics/Enums.h" + +/// +/// GPU query ID packed into 64-bits. +/// +struct GPUQueryDX12 +{ + union + { + struct + { + uint16 Type; + uint16 Heap; + uint16 Element; + uint16 SecondaryElement; + }; + uint64 Raw; + }; + + static int32 GetQueriesCount(GPUQueryType type) + { + // Timer queries need to know duration via GPU timer queries difference + return type == GPUQueryType::Timer ? 2 : 1; + } +}; /// /// GPU queries heap for DirectX 12 backend. @@ -17,14 +42,12 @@ class GPUBuffer; class QueryHeapDX12 { public: - /// /// The query element handle. /// - typedef int32 ElementHandle; + typedef uint16 ElementHandle; private: - struct QueryBatch { /// @@ -35,71 +58,54 @@ private: /// /// The first element in the batch (inclusive). /// - int32 Start = 0; + uint32 Start = 0; /// /// The amount of elements added to this batch. /// - int32 Count = 0; + uint32 Count = 0; + + /// + /// The GPU clock frequency for timer queries. + /// + uint64 TimestampFrequency = 0; /// /// Is the batch still open for more begin/end queries. /// bool Open = false; - /// - /// Clears this batch. - /// - inline void Clear() - { - Sync = SyncPointDX12(); - Start = 0; - Count = 0; - Open = false; - } - /// /// Checks if this query batch contains a given element contains the element. /// /// The index of the element. /// True if element is in this query, otherwise false. - bool ContainsElement(int32 elementIndex) const + bool ContainsElement(uint32 elementIndex) const { return elementIndex >= Start && elementIndex < Start + Count; } }; private: - - GPUDeviceDX12* _device; - ID3D12QueryHeap* _queryHeap; - ID3D12Resource* _resultBuffer; - D3D12_QUERY_TYPE _queryType; - D3D12_QUERY_HEAP_TYPE _queryHeapType; - int32 _currentIndex; - int32 _resultSize; - int32 _queryHeapCount; + GPUDeviceDX12* _device = nullptr; + ID3D12Resource* _resultBuffer = nullptr; + uint32 _currentIndex = 0; + uint32 _resultSize = 0; + uint32 _queryHeapCount = 0; QueryBatch _currentBatch; Array _batches; Array _resultData; + uint64 _timestampFrequency; public: - - /// - /// Initializes a new instance of the class. - /// - /// The device. - /// Type of the query heap. - /// The query heap count. - QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount); - -public: - /// /// Initializes this instance. /// + /// The device. + /// Type of the query heap. + /// The size of the heap. /// True if failed, otherwise false. - bool Init(); + bool Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size); /// /// Destroys this instance. @@ -107,12 +113,14 @@ public: void Destroy(); public: + GPUQueryType Type; + ID3D12QueryHeap* QueryHeap = nullptr; + D3D12_QUERY_TYPE QueryType = D3D12_QUERY_TYPE_OCCLUSION; /// /// Gets the query heap capacity. /// - /// The queries count. - FORCE_INLINE int32 GetQueryHeapCount() const + FORCE_INLINE uint32 GetQueryHeapCount() const { return _queryHeapCount; } @@ -120,8 +128,7 @@ public: /// /// Gets the size of the result value (in bytes). /// - /// The size of the query result value (in bytes). - FORCE_INLINE int32 GetResultSize() const + FORCE_INLINE uint32 GetResultSize() const { return _resultSize; } @@ -129,40 +136,30 @@ public: /// /// Gets the result buffer (CPU readable via Map/Unmap). /// - /// The query results buffer. FORCE_INLINE ID3D12Resource* GetResultBuffer() const { return _resultBuffer; } public: - /// /// Stops tracking the current batch of begin/end query calls that will be resolved together. This implicitly starts a new batch. /// /// The context. void EndQueryBatchAndResolveQueryData(GPUContextDX12* context); + /// + /// Checks if can alloc a new query (without rolling the existing batch). + /// + /// How many elements to allocate? + /// True if can alloc new query within the same batch. + bool CanAlloc(int32 count = 1) const; + /// /// Allocates the query heap element. /// - /// The context. /// The result handle. - void AllocQuery(GPUContextDX12* context, ElementHandle& handle); - - /// - /// Calls BeginQuery on command list for the given query heap slot. - /// - /// The context. - /// The query handle. - void BeginQuery(GPUContextDX12* context, ElementHandle& handle); - - /// - /// Calls EndQuery on command list for the given query heap slot. - /// - /// The context. - /// The query handle. - void EndQuery(GPUContextDX12* context, ElementHandle& handle); + void Alloc(ElementHandle& handle); /// /// Determines whether the specified query handle is ready to read data (command list has been executed by the GPU). @@ -175,11 +172,11 @@ public: /// Resolves the query (or skips if already resolved). /// /// The result handle. + /// The optional pointer to GPU timestamps frequency value to store. /// The pointer to the resolved query data. - void* ResolveQuery(ElementHandle& handle); + void* Resolve(ElementHandle& handle, uint64* timestampFrequency = nullptr); private: - /// /// Starts tracking a new batch of begin/end query calls that will be resolved together /// diff --git a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h index 22786c157..a20861174 100644 --- a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h +++ b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h @@ -160,6 +160,15 @@ public: { } + uint64 BeginQuery(GPUQueryType type) override + { + return 0; + } + + void EndQuery(uint64 queryID) override + { + } + void SetViewport(const Viewport& viewport) override { } diff --git a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp index a1582102f..3f9733d52 100644 --- a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp +++ b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp @@ -144,6 +144,11 @@ void GPUDeviceNull::WaitForGPU() { } +bool GPUDeviceNull::GetQueryResult(uint64 queryID, uint64& result, bool wait) +{ + return false; +} + GPUTexture* GPUDeviceNull::CreateTexture(const StringView& name) { PROFILE_MEM(GraphicsTextures); diff --git a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h index 4e2d2e93e..29a31a7dd 100644 --- a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h +++ b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h @@ -20,18 +20,15 @@ class GPUDeviceNull : public GPUDevice friend GPUSwapChainNull; private: - GPUContextNull* _mainContext; GPUAdapterNull* _adapter; public: - static GPUDevice* Create(); GPUDeviceNull(); ~GPUDeviceNull(); public: - // [GPUDevice] GPUContext* GetMainContext() override; GPUAdapter* GetAdapter() const override; @@ -41,6 +38,7 @@ public: void Draw() override; void Dispose() override; void WaitForGPU() override; + bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override; GPUTexture* CreateTexture(const StringView& name) override; GPUShader* CreateShader(const StringView& name) override; GPUPipelineState* CreatePipelineState() override; diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp index 36eacccf9..889265187 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp @@ -6,7 +6,7 @@ #include "RenderToolsVulkan.h" #include "QueueVulkan.h" #include "GPUContextVulkan.h" -#if VULKAN_USE_QUERIES +#if VULKAN_USE_TIMER_QUERIES #include "GPUTimerQueryVulkan.h" #endif #include "DescriptorSetVulkan.h" @@ -243,6 +243,7 @@ void CmdBufferPoolVulkan::RefreshFenceStatus(const CmdBufferVulkan* skipCmdBuffe CmdBufferManagerVulkan::CmdBufferManagerVulkan(GPUDeviceVulkan* device, GPUContextVulkan* context) : _device(device) + , _context(context) , _pool(device) , _queue(context->GetQueue()) , _activeCmdBuffer(nullptr) @@ -259,12 +260,28 @@ void CmdBufferManagerVulkan::SubmitActiveCmdBuffer(SemaphoreVulkan* signalSemaph if (_activeCmdBuffer->IsInsideRenderPass()) _activeCmdBuffer->EndRenderPass(); -#if VULKAN_USE_QUERIES - // Pause all active queries - for (int32 i = 0; i < _queriesInProgress.Count(); i++) +#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES + // Pause all active timer queries + auto queries = _activeTimerQueries.Get(); +#if GPU_VULKAN_QUERY_NEW + for (int32 i = 0; i < _activeTimerQueries.Count(); i++) { - _queriesInProgress.Get()[i]->Interrupt(_activeCmdBuffer); + GPUQueryVulkan query; + query.Raw = queries[i]; + + // End active query to get time from start until submission + auto pool = _device->QueryPools[query.PoolIndex]; + vkCmdWriteTimestamp(_activeCmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, pool->GetHandle(), query.SecondQueryIndex); + pool->MarkQueryAsStarted(query.SecondQueryIndex); + // TODO: somehow handle ending this query properly by stopping split query instead + //_context->EndQuery(query.Raw); + + // TODO: reimplement timer queries pause/resume to be more exact? } +#else + for (int32 i = 0; i < _activeTimerQueries.Count(); i++) + queries->Interrupt(_activeCmdBuffer); +#endif #endif _activeCmdBuffer->End(); @@ -317,27 +334,37 @@ void CmdBufferManagerVulkan::PrepareForNewActiveCommandBuffer() _activeCmdBuffer->Begin(); -#if VULKAN_USE_QUERIES - // Resume any paused queries with the new command buffer - for (int32 i = 0; i < _queriesInProgress.Count(); i++) +#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES + // Resume any paused timer queries with the new command buffer + auto queries = _activeTimerQueries.Get(); +#if GPU_VULKAN_QUERY_NEW + for (int32 i = 0; i < _activeTimerQueries.Count(); i++) { - _queriesInProgress.Get()[i]->Resume(_activeCmdBuffer); + GPUQueryVulkan query; + query.Raw = queries[i]; + //_activeTimerQueries.Get()[i]->Resume(_activeCmdBuffer); + } +#else + for (int32 i = 0; i < _activeTimerQueries.Count(); i++) + { + queries->Resume(_activeCmdBuffer); } #endif +#endif } -void CmdBufferManagerVulkan::OnQueryBegin(GPUTimerQueryVulkan* query) +#if GPU_VULKAN_QUERY_NEW && GPU_VULKAN_PAUSE_QUERIES + +void CmdBufferManagerVulkan::OnTimerQueryBegin(QueryType query) { -#if VULKAN_USE_QUERIES - _queriesInProgress.Add(query); -#endif + _activeTimerQueries.Add(query); } -void CmdBufferManagerVulkan::OnQueryEnd(GPUTimerQueryVulkan* query) +void CmdBufferManagerVulkan::OnTimerQueryEnd(QueryType query) { -#if VULKAN_USE_QUERIES - _queriesInProgress.Remove(query); -#endif + _activeTimerQueries.Remove(query); } #endif + +#endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h index 7cb3ee104..7ac1ef885 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h @@ -168,10 +168,18 @@ class CmdBufferManagerVulkan { private: GPUDeviceVulkan* _device; + GPUContextVulkan* _context; CmdBufferPoolVulkan _pool; QueueVulkan* _queue; CmdBufferVulkan* _activeCmdBuffer; - Array _queriesInProgress; +#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES +#if GPU_VULKAN_QUERY_NEW + typedef uint64 QueryType; +#else + typedef GPUTimerQueryVulkan* QueryType; +#endif + Array _activeTimerQueries; +#endif public: CmdBufferManagerVulkan(GPUDeviceVulkan* device, GPUContextVulkan* context); @@ -192,11 +200,6 @@ public: return _activeCmdBuffer != nullptr; } - FORCE_INLINE bool HasQueriesInProgress() const - { - return _queriesInProgress.Count() != 0; - } - FORCE_INLINE CmdBufferVulkan* GetCmdBuffer() { if (!_activeCmdBuffer) @@ -207,14 +210,16 @@ public: public: void SubmitActiveCmdBuffer(SemaphoreVulkan* signalSemaphore = nullptr); void WaitForCmdBuffer(CmdBufferVulkan* cmdBuffer, float timeInSecondsToWait = 1.0f); - void RefreshFenceStatus(CmdBufferVulkan* skipCmdBuffer = nullptr) + void RefreshFenceStatus(const CmdBufferVulkan* skipCmdBuffer = nullptr) { _pool.RefreshFenceStatus(skipCmdBuffer); } void PrepareForNewActiveCommandBuffer(); - void OnQueryBegin(GPUTimerQueryVulkan* query); - void OnQueryEnd(GPUTimerQueryVulkan* query); +#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES + void OnTimerQueryBegin(QueryType query); + void OnTimerQueryEnd(QueryType query); +#endif }; #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/Config.h b/Source/Engine/GraphicsDevice/Vulkan/Config.h index 1f30c301a..7a4129a5c 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/Config.h +++ b/Source/Engine/GraphicsDevice/Vulkan/Config.h @@ -45,8 +45,14 @@ #endif #endif -#ifndef VULKAN_USE_QUERIES -#define VULKAN_USE_QUERIES 1 +#ifndef VULKAN_USE_TIMER_QUERIES +#define VULKAN_USE_TIMER_QUERIES 1 #endif +// Toggles GPUTimerQueryVulkan to use BeginQuery/EndQuery via GPuContext rather than old custom implementation +#define GPU_VULKAN_QUERY_NEW 1 + +// Toggles pausing and resuming all GPU timer queries when command buffer is being flushed (for more exact timings) +#define GPU_VULKAN_PAUSE_QUERIES 0 + #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index 979ccc0f8..cd11ef534 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -1300,6 +1300,72 @@ void GPUContextVulkan::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, ui RENDER_STAT_DRAW_CALL(0, 0); } +uint64 GPUContextVulkan::BeginQuery(GPUQueryType type) +{ + // Check if timer queries are supported + if (type == GPUQueryType::Timer && _device->PhysicalDeviceLimits.timestampComputeAndGraphics != VK_TRUE) + return 0; + + // Allocate query + auto poolIndex = _device->GetOrCreateQueryPool(type); + auto pool = _device->QueryPools[poolIndex]; + uint32 index = 0; + const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); + if (!pool->AcquireQuery(cmdBuffer, index)) + return 0; + GPUQueryVulkan query; + query.PoolIndex = (uint16)poolIndex; + query.QueryIndex = (uint16)index; + query.SecondQueryIndex = 0; + query.Dummy = 1; // Ensure Raw is never 0, even for the first query + + // Begin query + switch (type) + { + case GPUQueryType::Timer: + // Timer queries need 2 slots (begin + end) + pool->AcquireQuery(cmdBuffer, index); + query.SecondQueryIndex = (uint16)index; + + vkCmdWriteTimestamp(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, pool->GetHandle(), query.QueryIndex); +#if GPU_VULKAN_PAUSE_QUERIES + _cmdBufferManager->OnTimerQueryBegin(query.Raw); +#endif + break; + case GPUQueryType::Occlusion: + vkCmdBeginQuery(cmdBuffer->GetHandle(), pool->GetHandle(), query.QueryIndex, 0); + break; + } + pool->MarkQueryAsStarted(query.QueryIndex); + + return query.Raw; +} + +void GPUContextVulkan::EndQuery(uint64 queryID) +{ + if (!queryID) + return; + GPUQueryVulkan query; + query.Raw = queryID; + auto pool = _device->QueryPools[query.PoolIndex]; + + // End query + const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); + switch (pool->Type) + { + case GPUQueryType::Timer: + vkCmdWriteTimestamp(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, pool->GetHandle(), query.SecondQueryIndex); + pool->MarkQueryAsStarted(query.SecondQueryIndex); +#if GPU_VULKAN_PAUSE_QUERIES + _cmdBufferManager->OnTimerQueryEnd(query.Raw); +#endif + break; + case GPUQueryType::Occlusion: + vkCmdEndQuery(cmdBuffer->GetHandle(), pool->GetHandle(), query.QueryIndex); + break; + } +} + void GPUContextVulkan::SetViewport(const Viewport& viewport) { vkCmdSetViewport(_cmdBufferManager->GetCmdBuffer()->GetHandle(), 0, 1, (VkViewport*)&viewport); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index fa94aa139..f77e42eb7 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -189,6 +189,8 @@ public: void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override; void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override; void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override; + uint64 BeginQuery(GPUQueryType type) override; + void EndQuery(uint64 queryID) override; void SetViewport(const Viewport& viewport) override; void SetScissor(const Rectangle& scissorRect) override; GPUPipelineState* GetState() const override; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp index bc33d9d86..03467defd 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp @@ -627,14 +627,14 @@ RenderPassVulkan::~RenderPassVulkan() Device->DeferredDeletionQueue.EnqueueResource(DeferredDeletionQueueVulkan::Type::RenderPass, Handle); } -QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type) +QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type) : _device(device) , _handle(VK_NULL_HANDLE) - , _type(type) + , Type(type) { VkQueryPoolCreateInfo createInfo; RenderToolsVulkan::ZeroStruct(createInfo, VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); - createInfo.queryType = type; + createInfo.queryType = type == GPUQueryType::Occlusion ? VK_QUERY_TYPE_OCCLUSION : VK_QUERY_TYPE_TIMESTAMP; createInfo.queryCount = capacity; VALIDATE_VULKAN_RESULT(vkCreateQueryPool(device->Device, &createInfo, nullptr, &_handle)); @@ -667,7 +667,7 @@ void QueryPoolVulkan::Reset(CmdBufferVulkan* cmdBuffer) #endif -BufferedQueryPoolVulkan::BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type) +BufferedQueryPoolVulkan::BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type) : QueryPoolVulkan(device, capacity, type) , _lastBeginIndex(0) { @@ -720,6 +720,16 @@ void BufferedQueryPoolVulkan::ReleaseQuery(uint32 queryIndex) _lastBeginIndex = (uint32)queryIndex; } } + if (_usedQueryBits[word] == 0) + { + // Check if pool got empty and reset the pointer back to start + for (int32 wordIndex = 0; wordIndex < _usedQueryBits.Count(); wordIndex++) + { + if (_usedQueryBits[wordIndex]) + return; + } + _lastBeginIndex = 0; + } } void BufferedQueryPoolVulkan::MarkQueryAsStarted(uint32 queryIndex) @@ -729,7 +739,7 @@ void BufferedQueryPoolVulkan::MarkQueryAsStarted(uint32 queryIndex) _startedQueryBits[word] = _startedQueryBits[word] | bit; } -bool BufferedQueryPoolVulkan::GetResults(GPUContextVulkan* context, uint32 index, uint64& result) +bool BufferedQueryPoolVulkan::GetResults(uint32 index, uint64& result) { const uint64 bit = (uint64)(index % 64); const uint64 bitMask = (uint64)1 << bit; @@ -1228,22 +1238,20 @@ GPUDeviceVulkan::~GPUDeviceVulkan() GPUDeviceVulkan::Dispose(); } -BufferedQueryPoolVulkan* GPUDeviceVulkan::FindAvailableQueryPool(VkQueryType queryType) +int32 GPUDeviceVulkan::GetOrCreateQueryPool(GPUQueryType type) { - auto& pools = queryType == VK_QUERY_TYPE_OCCLUSION ? OcclusionQueryPools : TimestampQueryPools; - - // Try to use pool with available space inside - for (int32 i = 0; i < pools.Count(); i++) + auto pools = QueryPools.Get(); + for (int32 i = 0; i < QueryPools.Count(); i++) { - auto pool = pools.Get()[i]; - if (pool->HasRoom()) - return pool; + auto pool = pools[i]; + if (pool->Type == type && pool->HasRoom()) + return i; } - // Create new pool - const auto pool = New(this, queryType == VK_QUERY_TYPE_OCCLUSION ? 4096 : 1024, queryType); - pools.Add(pool); - return pool; + PROFILE_CPU_NAMED("Create Create Pool"); + auto pool = New(this, type == GPUQueryType::Occlusion ? 4096 : 1024, type); + QueryPools.Add(pool); + return QueryPools.Count() - 1; } RenderPassVulkan* GPUDeviceVulkan::GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout) @@ -1752,6 +1760,10 @@ bool GPUDeviceVulkan::Init() limits.MaximumTexture3DSize = PhysicalDeviceLimits.maxImageDimension3D; limits.MaximumTextureCubeSize = PhysicalDeviceLimits.maxImageDimensionCube; limits.MaximumSamplerAnisotropy = PhysicalDeviceLimits.maxSamplerAnisotropy; + if (PhysicalDeviceLimits.timestampComputeAndGraphics != VK_TRUE) + { + LOG(Warning, "Timer Queries are unsupported by this device"); + } for (int32 i = 0; i < static_cast(PixelFormat::MAX); i++) { @@ -1982,6 +1994,16 @@ void GPUDeviceVulkan::DrawBegin() // Base GPUDevice::DrawBegin(); + // Put back used queries to the pool + for (auto& query : QueriesToRelease) + { + auto pool = QueryPools[query.PoolIndex]; + pool->ReleaseQuery(query.QueryIndex); + if (pool->Type == GPUQueryType::Timer) + pool->ReleaseQuery(query.SecondQueryIndex); + } + QueriesToRelease.Clear(); + // Flush resources DeferredDeletionQueue.ReleaseResources(); DescriptorPoolsManager->GC(); @@ -2022,8 +2044,7 @@ void GPUDeviceVulkan::Dispose() _layouts.ClearDelete(); HelperResources.Dispose(); UploadBuffer.Dispose(); - TimestampQueryPools.ClearDelete(); - OcclusionQueryPools.ClearDelete(); + QueryPools.ClearDelete(); SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader); Delete(DescriptorPoolsManager); SAFE_DELETE(MainContext); @@ -2084,6 +2105,61 @@ void GPUDeviceVulkan::WaitForGPU() } } +bool GPUDeviceVulkan::GetQueryResult(uint64 queryID, uint64& result, bool wait) +{ + if (!queryID) + return false; + GPUQueryVulkan query; + query.Raw = queryID; + auto pool = QueryPools[query.PoolIndex]; + +RETRY: + bool hasData; + uint64 resultSecondary; + switch (pool->Type) + { + case GPUQueryType::Timer: + hasData = pool->GetResults(query.QueryIndex, result) && pool->GetResults(query.SecondQueryIndex, resultSecondary); +#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES + if (hasData) + { + // Check if dependant queries have completed (timer queries can be split when active command buffer get submitted) + // TODO: impl this + } +#endif + if (hasData) + { + if (resultSecondary >= result) + { + // Convert GPU timestamps to nanoseconds and then to microseconds + double nanoseconds = double(resultSecondary - result) * double(PhysicalDeviceLimits.timestampPeriod); + result = (uint64)(nanoseconds * 0.001); + } + else + result = 0; + } + break; + case GPUQueryType::Occlusion: + hasData = pool->GetResults(query.QueryIndex, result); + break; + } + + if (!hasData && wait) + { + // Wait until data is ready + Platform::Yield(); + goto RETRY; + } + + if (hasData) + { + // Auto-release query on the next frame + QueriesToRelease.Add(query); + } + + return hasData; +} + GPUTexture* GPUDeviceVulkan::CreateTexture(const StringView& name) { PROFILE_MEM(GraphicsTextures); diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h index 09fa93f3e..7dd8ef0ab 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h @@ -28,6 +28,24 @@ class GPUDeviceVulkan; class UniformBufferUploaderVulkan; class DescriptorPoolsManagerVulkan; +/// +/// GPU query ID packed into 64-bits. +/// +struct GPUQueryVulkan +{ + union + { + struct + { + uint16 PoolIndex; + uint16 QueryIndex; + uint16 SecondQueryIndex; + uint16 Dummy; + }; + uint64 Raw; + }; +}; + class SemaphoreVulkan { private: @@ -261,16 +279,17 @@ protected: GPUDeviceVulkan* _device; VkQueryPool _handle; - const VkQueryType _type; #if VULKAN_RESET_QUERY_POOLS Array _resetRanges; #endif public: - QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type); + QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type); ~QueryPoolVulkan(); public: + const GPUQueryType Type; + inline VkQueryPool GetHandle() const { return _handle; @@ -294,11 +313,11 @@ private: int32 _lastBeginIndex; public: - BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type); + BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type); bool AcquireQuery(CmdBufferVulkan* cmdBuffer, uint32& resultIndex); void ReleaseQuery(uint32 queryIndex); void MarkQueryAsStarted(uint32 queryIndex); - bool GetResults(GPUContextVulkan* context, uint32 index, uint64& result); + bool GetResults(uint32 index, uint64& result); bool HasRoom() const; }; @@ -498,14 +517,13 @@ public: VkPhysicalDeviceFeatures PhysicalDeviceFeatures; VkPhysicalDeviceVulkan12Features PhysicalDeviceFeatures12; - Array TimestampQueryPools; - Array OcclusionQueryPools; - + Array QueryPools; + Array QueriesToRelease; #if VULKAN_RESET_QUERY_POOLS Array QueriesToReset; #endif - BufferedQueryPoolVulkan* FindAvailableQueryPool(VkQueryType queryType); + int32 GetOrCreateQueryPool(GPUQueryType type); RenderPassVulkan* GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout); FramebufferVulkan* GetOrCreateFramebuffer(FramebufferVulkan::Key& key, VkExtent2D& extent, uint32 layers); PipelineLayoutVulkan* GetOrCreateLayout(DescriptorSetLayoutInfoVulkan& key); @@ -553,6 +571,7 @@ public: void DrawBegin() override; void Dispose() override; void WaitForGPU() override; + bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override; GPUTexture* CreateTexture(const StringView& name) override; GPUShader* CreateShader(const StringView& name) override; GPUPipelineState* CreatePipelineState() override; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp index 2dd3b07d5..ecd0dbdfe 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp @@ -11,6 +11,78 @@ GPUTimerQueryVulkan::GPUTimerQueryVulkan(GPUDeviceVulkan* device) { } +#if !VULKAN_USE_TIMER_QUERIES + +void GPUTimerQueryVulkan::OnReleaseGPU() +{ +} + +void GPUTimerQueryVulkan::Begin() +{ +} + +void GPUTimerQueryVulkan::End() +{ +} + +bool GPUTimerQueryVulkan::HasResult() +{ + return true; +} + +float GPUTimerQueryVulkan::GetResult() +{ + return 0; +} + +#elif GPU_VULKAN_QUERY_NEW + +void GPUTimerQueryVulkan::OnReleaseGPU() +{ + _hasResult = false; + _endCalled = false; + _timeDelta = 0.0f; +} + +void GPUTimerQueryVulkan::Begin() +{ + const auto context = _device->GetMainContext(); + _query = context->BeginQuery(GPUQueryType::Timer); + _hasResult = false; + _endCalled = false; +} + +void GPUTimerQueryVulkan::End() +{ + if (_endCalled) + return; + const auto context = _device->GetMainContext(); + context->EndQuery(_query); + _endCalled = true; +} + +bool GPUTimerQueryVulkan::HasResult() +{ + if (!_endCalled) + return false; + if (_hasResult) + return true; + uint64 result; + return _device->GetQueryResult(_query, result, false); +} + +float GPUTimerQueryVulkan::GetResult() +{ + if (_hasResult) + return _timeDelta; + uint64 result; + _timeDelta = _device->GetQueryResult(_query, result, true) ? (float)((double)result / 1000.0) : 0.0f; + _hasResult = true; + return _timeDelta; +} + +#else + void GPUTimerQueryVulkan::Interrupt(CmdBufferVulkan* cmdBuffer) { if (!_interrupted) @@ -38,8 +110,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query) { if (query.Pool) { - const auto context = (GPUContextVulkan*)_device->GetMainContext(); - if (query.Pool->GetResults(context, query.Index, query.Result)) + if (query.Pool->GetResults(query.Index, query.Result)) { // Release query query.Pool->ReleaseQuery(query.Index); @@ -58,7 +129,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query) void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const { - auto pool = _device->FindAvailableQueryPool(VK_QUERY_TYPE_TIMESTAMP); + auto pool = _device->QueryPools[_device->GetOrCreateQueryPool(GPUQueryType::Timer)]; uint32 index; if (pool->AcquireQuery(cmdBuffer, index)) { @@ -76,7 +147,6 @@ void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& quer bool GPUTimerQueryVulkan::TryGetResult() { -#if VULKAN_USE_QUERIES // Try get queries value (if not already) for (int32 i = 0; i < _queries.Count(); i++) { @@ -115,20 +185,12 @@ bool GPUTimerQueryVulkan::TryGetResult() e.End.Pool->ReleaseQuery(e.End.Index); } _queries.Clear(); -#else - _timeDelta = 0.0f; - _hasResult = true; -#endif return true; } bool GPUTimerQueryVulkan::UseQueries() { -#if VULKAN_USE_QUERIES return _device->PhysicalDeviceLimits.timestampComputeAndGraphics == VK_TRUE; -#else - return false; -#endif } void GPUTimerQueryVulkan::OnReleaseGPU() @@ -150,7 +212,6 @@ void GPUTimerQueryVulkan::OnReleaseGPU() void GPUTimerQueryVulkan::Begin() { -#if VULKAN_USE_QUERIES if (UseQueries()) { const auto context = (GPUContextVulkan*)_device->GetMainContext(); @@ -162,12 +223,11 @@ void GPUTimerQueryVulkan::Begin() _queryIndex = 0; _interrupted = false; WriteTimestamp(cmdBuffer, e.Begin, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - context->GetCmdBufferManager()->OnQueryBegin(this); + context->GetCmdBufferManager()->OnTimerQueryBegin(this); ASSERT(_queries.IsEmpty()); _queries.Add(e); } -#endif _hasResult = false; _endCalled = false; @@ -178,7 +238,6 @@ void GPUTimerQueryVulkan::End() if (_endCalled) return; -#if VULKAN_USE_QUERIES if (UseQueries()) { const auto context = (GPUContextVulkan*)_device->GetMainContext(); @@ -188,9 +247,8 @@ void GPUTimerQueryVulkan::End() { WriteTimestamp(cmdBuffer, _queries[_queryIndex].End, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); } - context->GetCmdBufferManager()->OnQueryEnd(this); + context->GetCmdBufferManager()->OnTimerQueryEnd(this); } -#endif _endCalled = true; } @@ -213,3 +271,5 @@ float GPUTimerQueryVulkan::GetResult() } #endif + +#endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h index 18a56cac6..07a263fe3 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h @@ -13,6 +13,13 @@ class GPUTimerQueryVulkan : public GPUResourceVulkan { private: +#if !VULKAN_USE_TIMER_QUERIES +#elif GPU_VULKAN_QUERY_NEW + bool _hasResult = false; + bool _endCalled = false; + float _timeDelta = 0.0f; + uint64 _query = 0; +#else struct Query { BufferedQueryPoolVulkan* Pool; @@ -32,6 +39,7 @@ private: float _timeDelta = 0.0f; int32 _queryIndex; Array> _queries; +#endif public: /// @@ -40,6 +48,7 @@ public: /// The graphics device. GPUTimerQueryVulkan(GPUDeviceVulkan* device); +#if !GPU_VULKAN_QUERY_NEW public: /// /// Interrupts an in-progress query, allowing the command buffer to submitted. Interrupted queries must be resumed using Resume(). @@ -58,6 +67,7 @@ private: void WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const; bool TryGetResult(); bool UseQueries(); +#endif public: // [GPUTimerQuery] diff --git a/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h b/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h index 880749b72..05ed07792 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h +++ b/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h @@ -9,7 +9,7 @@ #define VULKAN_BACK_BUFFERS_COUNT 3 // General/Validation Error:0 VK_ERROR_INITIALIZATION_FAILED: Could not create MTLCounterSampleBuffer for query pool of type VK_QUERY_TYPE_TIMESTAMP. Reverting to emulated behavior. (Error code 0): Cannot allocate sample buffer -#define VULKAN_USE_QUERIES 0 +#define VULKAN_USE_TIMER_QUERIES 0 /// /// The implementation for the Vulkan API support for Mac platform. diff --git a/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h b/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h index db27c76cd..8db71ec6e 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h +++ b/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h @@ -9,7 +9,7 @@ #define VULKAN_BACK_BUFFERS_COUNT 3 // General/Validation Error:0 VK_ERROR_INITIALIZATION_FAILED: Could not create MTLCounterSampleBuffer for query pool of type VK_QUERY_TYPE_TIMESTAMP. Reverting to emulated behavior. (Error code 0): Cannot allocate sample buffer -#define VULKAN_USE_QUERIES 0 +#define VULKAN_USE_TIMER_QUERIES 0 /// /// The implementation for the Vulkan API support for iOS platform.