Migrate ProfilerGPU to new lightweight queries API

This commit is contained in:
Wojtek Figat
2026-01-16 13:24:56 +01:00
parent 0d7c04682d
commit 847f6411e7
5 changed files with 44 additions and 55 deletions

View File

@@ -583,7 +583,7 @@ uint64 GPUContextDX11::BeginQuery(GPUQueryType type)
auto& query = _device->_queries.AddOne();
query.Type = type;
D3D11_QUERY_DESC queryDesc;
queryDesc.Query = D3D11_QUERY_TIMESTAMP;
queryDesc.Query = type == GPUQueryType::Occlusion ? D3D11_QUERY_OCCLUSION : D3D11_QUERY_TIMESTAMP;
queryDesc.MiscFlags = 0;
HRESULT hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.Query);
LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
@@ -608,7 +608,7 @@ uint64 GPUContextDX11::BeginQuery(GPUQueryType type)
auto& query = _device->_queries[queryIndex];
ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Ready);
ASSERT_LOW_LAYER(query.Type == type);
query.State = GPUQueryDataDX11::Active;
query.State = GPUQueryDataDX11::Begin;
auto context = _device->GetIM();
if (type == GPUQueryType::Timer)
{
@@ -633,6 +633,8 @@ void GPUContextDX11::EndQuery(uint64 queryID)
GPUQueryDX11 q;
q.Raw = queryID;
auto& query = _device->_queries[q.Index];
ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Begin);
query.State = GPUQueryDataDX11::End;
auto context = _device->GetIM();
context->End(query.Query);
if (q.Type == (uint16)GPUQueryType::Timer)

View File

@@ -921,6 +921,7 @@ bool GPUDeviceDX11::GetQueryResult(uint64 queryID, uint64& result, bool wait)
result = query.Result;
return true;
}
ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::End);
auto context = GetIM();
RETRY:
@@ -935,7 +936,7 @@ RETRY:
context->GetData(query.TimerBeginQuery, &timeBegin, sizeof(timeBegin), 0);
context->GetData(query.Query, &timeEnd, sizeof(timeEnd), 0);
if (disjointData.Disjoint == FALSE)
if (disjointData.Disjoint == FALSE && disjointData.Frequency > 0)
{
result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / disjointData.Frequency : 0;
}

View File

@@ -41,7 +41,7 @@ struct GPUQueryDataDX11
ID3D11Query* TimerBeginQuery = nullptr;
ID3D11Query* DisjointQuery = nullptr;
uint64 Result = 0;
enum States { Ready, Active, Finished } State = Ready;
enum States { Ready, Begin, End, Finished } State = Ready;
GPUQueryType Type = GPUQueryType::MAX;
void Release();

View File

@@ -7,14 +7,11 @@
#include "Engine/Core/Log.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUTimerQuery.h"
#include "Engine/Graphics/GPUContext.h"
RenderStatsData RenderStatsData::Counter;
int32 ProfilerGPU::_depth = 0;
Array<GPUTimerQuery*> ProfilerGPU::_timerQueriesPool;
Array<GPUTimerQuery*> ProfilerGPU::_timerQueriesFree;
bool ProfilerGPU::Enabled = false;
bool ProfilerGPU::EventsEnabled = false;
int32 ProfilerGPU::CurrentBuffer = 0;
@@ -25,11 +22,18 @@ bool ProfilerGPU::EventBuffer::HasData() const
return _isResolved && _data.HasItems();
}
void ProfilerGPU::EventBuffer::EndAll()
void ProfilerGPU::EventBuffer::EndAllQueries()
{
auto context = GPUDevice::Instance->GetMainContext();
auto queries = _data.Get();
for (int32 i = 0; i < _data.Count(); i++)
{
_data[i].Timer->End();
auto& e = queries[i];
if (e.QueryActive)
{
e.QueryActive = false;
context->EndQuery(e.Query);
}
}
}
@@ -38,21 +42,21 @@ void ProfilerGPU::EventBuffer::TryResolve()
if (_isResolved || _data.IsEmpty())
return;
// Check all the queries from the back to the front (in some cases inner queries are not finished)
for (int32 i = _data.Count() - 1; i >= 0; i--)
{
if (!_data[i].Timer->HasResult())
return;
}
// Collect queries results and free them
// Collect queries results
PROFILE_MEM(Profiler);
auto device = GPUDevice::Instance;
auto queries = _data.Get();
for (int32 i = 0; i < _data.Count(); i++)
{
auto& e = _data[i];
e.Time = e.Timer->GetResult();
_timerQueriesFree.Add(e.Timer);
e.Timer = nullptr;
auto& e = queries[i];
ASSERT_LOW_LAYER(!e.QueryActive);
uint64 time;
if (device->GetQueryResult(e.Query, time, false))
{
e.Time = (float)time * 0.001f; // Convert to milliseconds
}
else
return; // Skip if one of the queries is not yet ready (frame still in-flight)
}
_isResolved = true;
@@ -81,28 +85,12 @@ void ProfilerGPU::EventBuffer::Clear()
PresentTime = 0.0f;
}
GPUTimerQuery* ProfilerGPU::GetTimerQuery()
{
GPUTimerQuery* result;
if (_timerQueriesFree.HasItems())
{
result = _timerQueriesFree.Last();
_timerQueriesFree.RemoveLast();
}
else
{
PROFILE_MEM(Profiler);
result = GPUDevice::Instance->CreateTimerQuery();
_timerQueriesPool.Add(result);
}
return result;
}
int32 ProfilerGPU::BeginEvent(const Char* name)
{
auto context = GPUDevice::Instance->GetMainContext();
#if GPU_ALLOW_PROFILE_EVENTS
if (EventsEnabled)
GPUDevice::Instance->GetMainContext()->EventBegin(name);
context->EventBegin(name);
#endif
if (!Enabled)
return -1;
@@ -110,9 +98,9 @@ int32 ProfilerGPU::BeginEvent(const Char* name)
Event e;
e.Name = name;
e.Stats = RenderStatsData::Counter;
e.Timer = GetTimerQuery();
e.Timer->Begin();
e.Query = context->BeginQuery(GPUQueryType::Timer);
e.Depth = _depth++;
e.QueryActive = true;
auto& buffer = Buffers[CurrentBuffer];
const auto index = buffer.Add(e);
@@ -121,9 +109,10 @@ int32 ProfilerGPU::BeginEvent(const Char* name)
void ProfilerGPU::EndEvent(int32 index)
{
auto context = GPUDevice::Instance->GetMainContext();
#if GPU_ALLOW_PROFILE_EVENTS
if (EventsEnabled)
GPUDevice::Instance->GetMainContext()->EventEnd();
context->EventEnd();
#endif
if (index == -1)
return;
@@ -131,8 +120,9 @@ void ProfilerGPU::EndEvent(int32 index)
auto& buffer = Buffers[CurrentBuffer];
auto e = buffer.Get(index);
e->QueryActive = false;
e->Stats.Mix(RenderStatsData::Counter);
e->Timer->End();
context->EndQuery(e->Query);
}
void ProfilerGPU::BeginFrame()
@@ -155,7 +145,7 @@ void ProfilerGPU::OnPresent()
{
// End all current frame queries to prevent invalid event duration values
auto& buffer = Buffers[CurrentBuffer];
buffer.EndAll();
buffer.EndAllQueries();
}
void ProfilerGPU::OnPresentTime(float time)
@@ -211,8 +201,6 @@ bool ProfilerGPU::GetLastFrameData(float& drawTimeMs, float& presentTimeMs, Rend
void ProfilerGPU::Dispose()
{
_timerQueriesPool.ClearDelete();
_timerQueriesFree.Clear();
}
#endif

View File

@@ -7,8 +7,6 @@
#include "Engine/Scripting/ScriptingType.h"
#include "RenderStats.h"
class GPUTimerQuery;
#if COMPILE_WITH_PROFILER
// Profiler events buffers capacity (tweaked manually)
@@ -38,7 +36,7 @@ public:
/// <summary>
/// The timer query used to get the exact event time on a GPU. Assigned and managed by the internal profiler layer.
/// </summary>
API_FIELD() GPUTimerQuery* Timer;
API_FIELD() uint64 Query;
/// <summary>
/// The rendering stats for this event. When event is active it holds the stats on event begin.
@@ -54,6 +52,11 @@ public:
/// The event depth. Value 0 is used for the root events.
/// </summary>
API_FIELD() int32 Depth;
/// <summary>
/// True if event timer query is active.
/// </summary>
API_FIELD() bool QueryActive;
};
/// <summary>
@@ -84,7 +87,7 @@ public:
/// <summary>
/// Ends all used timer queries.
/// </summary>
void EndAll();
void EndAllQueries();
/// <summary>
/// Tries the resolve this frame. Skips if already resolved or has no collected events.
@@ -123,11 +126,6 @@ public:
private:
static int32 _depth;
static Array<GPUTimerQuery*> _timerQueriesPool;
static Array<GPUTimerQuery*> _timerQueriesFree;
static GPUTimerQuery* GetTimerQuery();
public:
/// <summary>
/// True if GPU profiling is enabled, otherwise false to disable events collecting and GPU timer queries usage. Can be changed during rendering.