From 847f6411e7bc848cb048a0cda4e4e54f2c6db603 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 16 Jan 2026 13:24:56 +0100 Subject: [PATCH] Migrate `ProfilerGPU` to new lightweight queries API --- .../DirectX/DX11/GPUContextDX11.cpp | 6 +- .../DirectX/DX11/GPUDeviceDX11.cpp | 3 +- .../DirectX/DX11/GPUDeviceDX11.h | 2 +- Source/Engine/Profiler/ProfilerGPU.cpp | 72 ++++++++----------- Source/Engine/Profiler/ProfilerGPU.h | 16 ++--- 5 files changed, 44 insertions(+), 55 deletions(-) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 92a957ffd..5e273304d 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -583,7 +583,7 @@ uint64 GPUContextDX11::BeginQuery(GPUQueryType type) auto& query = _device->_queries.AddOne(); query.Type = type; D3D11_QUERY_DESC queryDesc; - queryDesc.Query = D3D11_QUERY_TIMESTAMP; + queryDesc.Query = type == GPUQueryType::Occlusion ? D3D11_QUERY_OCCLUSION : D3D11_QUERY_TIMESTAMP; queryDesc.MiscFlags = 0; HRESULT hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.Query); LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0); @@ -608,7 +608,7 @@ uint64 GPUContextDX11::BeginQuery(GPUQueryType type) auto& query = _device->_queries[queryIndex]; ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Ready); ASSERT_LOW_LAYER(query.Type == type); - query.State = GPUQueryDataDX11::Active; + query.State = GPUQueryDataDX11::Begin; auto context = _device->GetIM(); if (type == GPUQueryType::Timer) { @@ -633,6 +633,8 @@ void GPUContextDX11::EndQuery(uint64 queryID) GPUQueryDX11 q; q.Raw = queryID; auto& query = _device->_queries[q.Index]; + ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Begin); + query.State = GPUQueryDataDX11::End; auto context = _device->GetIM(); context->End(query.Query); if (q.Type == (uint16)GPUQueryType::Timer) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index 2d8a0f8c7..112c79f0f 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -921,6 +921,7 @@ bool GPUDeviceDX11::GetQueryResult(uint64 queryID, uint64& result, bool wait) result = query.Result; return true; } + ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::End); auto context = GetIM(); RETRY: @@ -935,7 +936,7 @@ RETRY: context->GetData(query.TimerBeginQuery, &timeBegin, sizeof(timeBegin), 0); context->GetData(query.Query, &timeEnd, sizeof(timeEnd), 0); - if (disjointData.Disjoint == FALSE) + if (disjointData.Disjoint == FALSE && disjointData.Frequency > 0) { result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / disjointData.Frequency : 0; } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h index 9657ebc59..51e83345a 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h @@ -41,7 +41,7 @@ struct GPUQueryDataDX11 ID3D11Query* TimerBeginQuery = nullptr; ID3D11Query* DisjointQuery = nullptr; uint64 Result = 0; - enum States { Ready, Active, Finished } State = Ready; + enum States { Ready, Begin, End, Finished } State = Ready; GPUQueryType Type = GPUQueryType::MAX; void Release(); diff --git a/Source/Engine/Profiler/ProfilerGPU.cpp b/Source/Engine/Profiler/ProfilerGPU.cpp index 9330663f4..5df47680c 100644 --- a/Source/Engine/Profiler/ProfilerGPU.cpp +++ b/Source/Engine/Profiler/ProfilerGPU.cpp @@ -7,14 +7,11 @@ #include "Engine/Core/Log.h" #include "Engine/Engine/Engine.h" #include "Engine/Graphics/GPUDevice.h" -#include "Engine/Graphics/GPUTimerQuery.h" #include "Engine/Graphics/GPUContext.h" RenderStatsData RenderStatsData::Counter; int32 ProfilerGPU::_depth = 0; -Array ProfilerGPU::_timerQueriesPool; -Array ProfilerGPU::_timerQueriesFree; bool ProfilerGPU::Enabled = false; bool ProfilerGPU::EventsEnabled = false; int32 ProfilerGPU::CurrentBuffer = 0; @@ -25,11 +22,18 @@ bool ProfilerGPU::EventBuffer::HasData() const return _isResolved && _data.HasItems(); } -void ProfilerGPU::EventBuffer::EndAll() +void ProfilerGPU::EventBuffer::EndAllQueries() { + auto context = GPUDevice::Instance->GetMainContext(); + auto queries = _data.Get(); for (int32 i = 0; i < _data.Count(); i++) { - _data[i].Timer->End(); + auto& e = queries[i]; + if (e.QueryActive) + { + e.QueryActive = false; + context->EndQuery(e.Query); + } } } @@ -38,21 +42,21 @@ void ProfilerGPU::EventBuffer::TryResolve() if (_isResolved || _data.IsEmpty()) return; - // Check all the queries from the back to the front (in some cases inner queries are not finished) - for (int32 i = _data.Count() - 1; i >= 0; i--) - { - if (!_data[i].Timer->HasResult()) - return; - } - - // Collect queries results and free them + // Collect queries results PROFILE_MEM(Profiler); + auto device = GPUDevice::Instance; + auto queries = _data.Get(); for (int32 i = 0; i < _data.Count(); i++) { - auto& e = _data[i]; - e.Time = e.Timer->GetResult(); - _timerQueriesFree.Add(e.Timer); - e.Timer = nullptr; + auto& e = queries[i]; + ASSERT_LOW_LAYER(!e.QueryActive); + uint64 time; + if (device->GetQueryResult(e.Query, time, false)) + { + e.Time = (float)time * 0.001f; // Convert to milliseconds + } + else + return; // Skip if one of the queries is not yet ready (frame still in-flight) } _isResolved = true; @@ -81,28 +85,12 @@ void ProfilerGPU::EventBuffer::Clear() PresentTime = 0.0f; } -GPUTimerQuery* ProfilerGPU::GetTimerQuery() -{ - GPUTimerQuery* result; - if (_timerQueriesFree.HasItems()) - { - result = _timerQueriesFree.Last(); - _timerQueriesFree.RemoveLast(); - } - else - { - PROFILE_MEM(Profiler); - result = GPUDevice::Instance->CreateTimerQuery(); - _timerQueriesPool.Add(result); - } - return result; -} - int32 ProfilerGPU::BeginEvent(const Char* name) { + auto context = GPUDevice::Instance->GetMainContext(); #if GPU_ALLOW_PROFILE_EVENTS if (EventsEnabled) - GPUDevice::Instance->GetMainContext()->EventBegin(name); + context->EventBegin(name); #endif if (!Enabled) return -1; @@ -110,9 +98,9 @@ int32 ProfilerGPU::BeginEvent(const Char* name) Event e; e.Name = name; e.Stats = RenderStatsData::Counter; - e.Timer = GetTimerQuery(); - e.Timer->Begin(); + e.Query = context->BeginQuery(GPUQueryType::Timer); e.Depth = _depth++; + e.QueryActive = true; auto& buffer = Buffers[CurrentBuffer]; const auto index = buffer.Add(e); @@ -121,9 +109,10 @@ int32 ProfilerGPU::BeginEvent(const Char* name) void ProfilerGPU::EndEvent(int32 index) { + auto context = GPUDevice::Instance->GetMainContext(); #if GPU_ALLOW_PROFILE_EVENTS if (EventsEnabled) - GPUDevice::Instance->GetMainContext()->EventEnd(); + context->EventEnd(); #endif if (index == -1) return; @@ -131,8 +120,9 @@ void ProfilerGPU::EndEvent(int32 index) auto& buffer = Buffers[CurrentBuffer]; auto e = buffer.Get(index); + e->QueryActive = false; e->Stats.Mix(RenderStatsData::Counter); - e->Timer->End(); + context->EndQuery(e->Query); } void ProfilerGPU::BeginFrame() @@ -155,7 +145,7 @@ void ProfilerGPU::OnPresent() { // End all current frame queries to prevent invalid event duration values auto& buffer = Buffers[CurrentBuffer]; - buffer.EndAll(); + buffer.EndAllQueries(); } void ProfilerGPU::OnPresentTime(float time) @@ -211,8 +201,6 @@ bool ProfilerGPU::GetLastFrameData(float& drawTimeMs, float& presentTimeMs, Rend void ProfilerGPU::Dispose() { - _timerQueriesPool.ClearDelete(); - _timerQueriesFree.Clear(); } #endif diff --git a/Source/Engine/Profiler/ProfilerGPU.h b/Source/Engine/Profiler/ProfilerGPU.h index 82e387768..811e58d2d 100644 --- a/Source/Engine/Profiler/ProfilerGPU.h +++ b/Source/Engine/Profiler/ProfilerGPU.h @@ -7,8 +7,6 @@ #include "Engine/Scripting/ScriptingType.h" #include "RenderStats.h" -class GPUTimerQuery; - #if COMPILE_WITH_PROFILER // Profiler events buffers capacity (tweaked manually) @@ -38,7 +36,7 @@ public: /// /// The timer query used to get the exact event time on a GPU. Assigned and managed by the internal profiler layer. /// - API_FIELD() GPUTimerQuery* Timer; + API_FIELD() uint64 Query; /// /// The rendering stats for this event. When event is active it holds the stats on event begin. @@ -54,6 +52,11 @@ public: /// The event depth. Value 0 is used for the root events. /// API_FIELD() int32 Depth; + + /// + /// True if event timer query is active. + /// + API_FIELD() bool QueryActive; }; /// @@ -84,7 +87,7 @@ public: /// /// Ends all used timer queries. /// - void EndAll(); + void EndAllQueries(); /// /// Tries the resolve this frame. Skips if already resolved or has no collected events. @@ -123,11 +126,6 @@ public: private: static int32 _depth; - static Array _timerQueriesPool; - static Array _timerQueriesFree; - - static GPUTimerQuery* GetTimerQuery(); - public: /// /// True if GPU profiling is enabled, otherwise false to disable events collecting and GPU timer queries usage. Can be changed during rendering.