Add new GPU Query API that is lightweight and supports occlusion queries

This commit is contained in:
Wojtek Figat
2026-01-16 10:40:30 +01:00
parent d2d7a871ce
commit 9ac231c403
31 changed files with 829 additions and 254 deletions

View File

@@ -566,6 +566,81 @@ void GPUContextDX11::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint
RENDER_STAT_DRAW_CALL(0, 0);
}
uint64 GPUContextDX11::BeginQuery(GPUQueryType type)
{
// Allocate a pooled query
uint16 queryIndex;
static_assert(ARRAY_COUNT(_device->_readyQueries) == (int32)GPUQueryType::MAX, "Invalid query types count");
if (_device->_readyQueries[(int32)type].HasItems())
{
// Use query from cached list
queryIndex = _device->_readyQueries[(int32)type].Pop();
}
else
{
// Add a new query
queryIndex = _device->_queries.Count();
auto& query = _device->_queries.AddOne();
query.Type = type;
D3D11_QUERY_DESC queryDesc;
queryDesc.Query = D3D11_QUERY_TIMESTAMP;
queryDesc.MiscFlags = 0;
HRESULT hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.Query);
LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
if (type == GPUQueryType::Timer)
{
// Timer queries need additional one for begin and end disjoint
hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.TimerBeginQuery);
LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.DisjointQuery);
LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
}
}
static_assert(sizeof(GPUQueryDX11) == sizeof(uint64), "Invalid query size.");
GPUQueryDX11 q = {};
q.Type = (uint16)type;
q.Index = queryIndex;
q.Padding = 1; // Ensure Raw is never 0, even for the first query
// Begin query
{
auto& query = _device->_queries[queryIndex];
ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Ready);
ASSERT_LOW_LAYER(query.Type == type);
query.State = GPUQueryDataDX11::Active;
auto context = _device->GetIM();
if (type == GPUQueryType::Timer)
{
context->Begin(query.DisjointQuery);
context->End(query.TimerBeginQuery);
}
else
{
context->Begin(query.Query);
}
}
return q.Raw;
}
void GPUContextDX11::EndQuery(uint64 queryID)
{
if (!queryID)
return;
// End query
GPUQueryDX11 q;
q.Raw = queryID;
auto& query = _device->_queries[q.Index];
auto context = _device->GetIM();
context->End(query.Query);
if (q.Type == (uint16)GPUQueryType::Timer)
{
context->End(query.DisjointQuery);
}
}
void GPUContextDX11::SetViewport(const Viewport& viewport)
{
_context->RSSetViewports(1, (D3D11_VIEWPORT*)&viewport);

View File

@@ -154,6 +154,8 @@ public:
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
uint64 BeginQuery(GPUQueryType type) override;
void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
GPUPipelineState* GetState() const override;

View File

@@ -175,6 +175,15 @@ GPUVertexLayoutDX11::GPUVertexLayoutDX11(GPUDeviceDX11* device, const Elements&
}
}
void GPUQueryDataDX11::Release()
{
SAFE_RELEASE(Query);
SAFE_RELEASE(TimerBeginQuery);
SAFE_RELEASE(DisjointQuery);
Result = 0;
State = Ready;
}
GPUDevice* GPUDeviceDX11::Create()
{
// Configuration
@@ -801,6 +810,11 @@ void GPUDeviceDX11::Dispose()
{
SAFE_RELEASE(RasterizerStates[i]);
}
for (auto& query : _queries)
query.Release();
_queries.Clear();
for (auto& e : _readyQueries)
e.Clear();
// Clear DirectX stuff
SAFE_DELETE(_mainContext);
@@ -877,6 +891,88 @@ void GPUDeviceDX11::DrawEnd()
infoQueue->ClearStoredMessages();
}
#endif
// Auto-return finished queries back to the pool
auto* queries = _queries.Get();
int32 queriesCount = _queries.Count();
for (int32 i = 0; i < queriesCount; i++)
{
auto& query = queries[i];
if (query.State == GPUQueryDataDX11::Finished)
{
query.State = GPUQueryDataDX11::Ready;
query.Result = 0;
_readyQueries[(int32)query.Type].Push(i);
}
}
}
bool GPUDeviceDX11::GetQueryResult(uint64 queryID, uint64& result, bool wait)
{
if (!queryID)
return false;
GPUQueryDX11 q;
q.Raw = queryID;
auto& query = _queries[q.Index];
if (query.State == GPUQueryDataDX11::Finished)
{
// Use resolved result
result = query.Result;
return true;
}
auto context = GetIM();
RETRY:
bool hasData;
if (q.Type == (uint16)GPUQueryType::Timer)
{
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData;
hasData = context->GetData(query.DisjointQuery, &disjointData, sizeof(disjointData), 0) == S_OK;
if (hasData)
{
UINT64 timeBegin = 0, timeEnd = 0;
context->GetData(query.TimerBeginQuery, &timeBegin, sizeof(timeBegin), 0);
context->GetData(query.Query, &timeEnd, sizeof(timeEnd), 0);
if (disjointData.Disjoint == FALSE)
{
result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / disjointData.Frequency : 0;
}
else
{
result = 0;
#if !BUILD_RELEASE
static bool LogOnce = true;
if (LogOnce)
{
LogOnce = false;
LOG(Warning, "Unreliable GPU timer query detected.");
}
#endif
}
}
}
else
{
hasData = context->GetData(query.Query, &result, sizeof(uint64), 0) == S_OK;
}
if (!hasData && wait)
{
// Wait until data is ready
Platform::Yield();
goto RETRY;
}
if (hasData)
{
// Query has valid data now (until auto-recycle back to pool)
query.State = GPUQueryDataDX11::Finished;
query.Result = result;
}
return hasData;
}
GPUTexture* GPUDeviceDX11::CreateTexture(const StringView& name)

View File

@@ -15,6 +15,38 @@ enum class StencilOperation : byte;
class GPUContextDX11;
class GPUSwapChainDX11;
/// <summary>
/// GPU query ID packed into 64-bits.
/// </summary>
struct GPUQueryDX11
{
union
{
struct
{
uint16 Type;
uint16 Index;
uint32 Padding;
};
uint64 Raw;
};
};
/// <summary>
/// GPU query data (reusable via pooling).
/// </summary>
struct GPUQueryDataDX11
{
ID3D11Query* Query = nullptr;
ID3D11Query* TimerBeginQuery = nullptr;
ID3D11Query* DisjointQuery = nullptr;
uint64 Result = 0;
enum States { Ready, Active, Finished } State = Ready;
GPUQueryType Type = GPUQueryType::MAX;
void Release();
};
/// <summary>
/// Implementation of Graphics Device for DirectX 11 backend.
/// </summary>
@@ -60,6 +92,8 @@ private:
GPUContextDX11* _mainContext = nullptr;
bool _allowTearing = false;
GPUBuffer* _dummyVB = nullptr;
Array<GPUQueryDataDX11> _queries;
Array<uint16> _readyQueries[2]; // Timer and Occlusion
// Static Samplers
ID3D11SamplerState* _samplerLinearClamp = nullptr;
@@ -124,6 +158,7 @@ public:
void Dispose() override;
void WaitForGPU() override;
void DrawEnd() override;
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
GPUTexture* CreateTexture(const StringView& name) override;
GPUShader* CreateShader(const StringView& name) override;
GPUPipelineState* CreatePipelineState() override;

View File

@@ -1275,6 +1275,31 @@ void GPUContextDX12::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint
RENDER_STAT_DRAW_CALL(0, 0);
}
uint64 GPUContextDX12::BeginQuery(GPUQueryType type)
{
auto query = _device->AllocQuery(type);
if (query.Raw)
{
auto heap = _device->QueryHeaps[query.Heap];
if (type == GPUQueryType::Timer) // Timer queries call End twice on different queries to calculate duration between GPU time clocks
_commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.SecondaryElement);
else
_commandList->BeginQuery(heap->QueryHeap, heap->QueryType, query.Element);
}
return query.Raw;
}
void GPUContextDX12::EndQuery(uint64 queryID)
{
if (queryID)
{
GPUQueryDX12 query;
query.Raw = queryID;
auto heap = _device->QueryHeaps[query.Heap];
_commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.Element);
}
}
void GPUContextDX12::SetViewport(const Viewport& viewport)
{
_commandList->RSSetViewports(1, (D3D12_VIEWPORT*)&viewport);

View File

@@ -197,6 +197,8 @@ public:
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
uint64 BeginQuery(GPUQueryType type) override;
void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
GPUPipelineState* GetState() const override;

View File

@@ -555,7 +555,6 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
, _commandQueue(nullptr)
, _mainContext(nullptr)
, UploadBuffer(this)
, TimestampQueryHeap(this, D3D12_QUERY_HEAP_TYPE_TIMESTAMP, DX12_BACK_BUFFER_COUNT * 1024)
, Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false)
, Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false)
, Heap_DSV(this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 64, false)
@@ -883,9 +882,6 @@ bool GPUDeviceDX12::Init()
VALIDATE_DIRECTX_CALL(_device->CreateRootSignature(0, signatureBlob->GetBufferPointer(), signatureBlob->GetBufferSize(), IID_PPV_ARGS(&_rootSignature)));
}
if (TimestampQueryHeap.Init())
return true;
// Cached command signatures
{
DrawIndirectCommandSignature = New<CommandSignatureDX12>(this, 1);
@@ -927,8 +923,9 @@ void GPUDeviceDX12::RenderEnd()
// Base
GPUDeviceDX::RenderEnd();
// Resolve the timestamp queries
TimestampQueryHeap.EndQueryBatchAndResolveQueryData(_mainContext);
// Resolve the queries
for (auto heap : QueryHeaps)
heap->EndQueryBatchAndResolveQueryData(_mainContext);
}
GPUDeviceDX12::~GPUDeviceDX12()
@@ -957,11 +954,47 @@ ID3D12CommandQueue* GPUDeviceDX12::GetCommandQueueDX12() const
return _commandQueue->GetCommandQueue();
}
GPUQueryDX12 GPUDeviceDX12::AllocQuery(GPUQueryType type)
{
// Get query heap with free space
int32 heapIndex = 0;
int32 count = GPUQueryDX12::GetQueriesCount(type);
for (; heapIndex < QueryHeaps.Count(); heapIndex++)
{
auto heap = QueryHeaps[heapIndex];
if (heap->Type == type && heap->CanAlloc(count))
break;
}
if (heapIndex == QueryHeaps.Count())
{
// Allocate a new query heap
auto heap = New<QueryHeapDX12>();
int32 size = type == GPUQueryType::Occlusion ? 4096 : 1024;
if (heap->Init(this, type, size))
{
Delete(heap);
return {};
}
QueryHeaps.Add(heap);
}
// Alloc query from the heap
GPUQueryDX12 query = {};
{
static_assert(sizeof(GPUQueryDX12) == sizeof(uint64), "Invalid DX12 query size.");
query.Type = (uint16)type;
query.Heap = heapIndex;
auto heap = QueryHeaps[heapIndex];
heap->Alloc(query.Element);
if (count == 2)
heap->Alloc(query.SecondaryElement);
}
return query;
}
void GPUDeviceDX12::Dispose()
{
GPUDeviceLock lock(this);
// Check if has been disposed already
if (_state == DeviceState::Disposed)
return;
@@ -982,7 +1015,12 @@ void GPUDeviceDX12::Dispose()
for (auto& srv : _nullSrv)
srv.Release();
_nullUav.Release();
TimestampQueryHeap.Destroy();
for (auto* heap : QueryHeaps)
{
heap->Destroy();
Delete(heap);
}
QueryHeaps.Clear();
DX_SAFE_RELEASE_CHECK(_rootSignature, 0);
Heap_CBV_SRV_UAV.ReleaseGPU();
Heap_RTV.ReleaseGPU();
@@ -1012,6 +1050,28 @@ void GPUDeviceDX12::WaitForGPU()
_commandQueue->WaitForGPU();
}
bool GPUDeviceDX12::GetQueryResult(uint64 queryID, uint64& result, bool wait)
{
GPUQueryDX12 query;
query.Raw = queryID;
auto heap = QueryHeaps[query.Heap];
int32 count = GPUQueryDX12::GetQueriesCount((GPUQueryType)query.Type);
if (!wait && (!heap->IsReady(query.Element) || (count != 2 || !heap->IsReady(query.SecondaryElement))))
return false;
if (query.Type == (uint16)GPUQueryType::Timer)
{
uint64 timestampFrequency = 1;
const uint64 timeBegin = *(uint64*)heap->Resolve(query.SecondaryElement);
const uint64 timeEnd = *(uint64*)heap->Resolve(query.Element, &timestampFrequency);
result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / timestampFrequency : 0;
}
else
{
result = *(uint64*)heap->Resolve(query.Element);
}
return true;
}
GPUTexture* GPUDeviceDX12::CreateTexture(const StringView& name)
{
PROFILE_MEM(GraphicsTextures);

View File

@@ -65,21 +65,13 @@ public:
~GPUDeviceDX12();
public:
/// <summary>
/// Data uploading utility via pages.
/// </summary>
UploadBufferDX12 UploadBuffer;
/// <summary>
/// The timestamp queries heap.
/// </summary>
QueryHeapDX12 TimestampQueryHeap;
bool AllowTearing = false;
CommandSignatureDX12* DispatchIndirectCommandSignature = nullptr;
CommandSignatureDX12* DrawIndexedIndirectCommandSignature = nullptr;
CommandSignatureDX12* DrawIndirectCommandSignature = nullptr;
GPUBuffer* DummyVB = nullptr;
Array<QueryHeapDX12*, InlinedAllocation<8>> QueryHeaps;
D3D12_CPU_DESCRIPTOR_HANDLE NullSRV(D3D12_SRV_DIMENSION dimension) const;
D3D12_CPU_DESCRIPTOR_HANDLE NullUAV() const;
@@ -136,6 +128,8 @@ public:
return _mainContext;
}
GPUQueryDX12 AllocQuery(GPUQueryType type);
public:
DescriptorHeapPoolDX12 Heap_CBV_SRV_UAV;
@@ -185,6 +179,7 @@ public:
void RenderEnd() override;
void Dispose() final override;
void WaitForGPU() override;
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
GPUTexture* CreateTexture(const StringView& name) override;
GPUShader* CreateShader(const StringView& name) override;
GPUPipelineState* CreatePipelineState() override;

View File

@@ -20,9 +20,7 @@ void GPUTimerQueryDX12::OnReleaseGPU()
void GPUTimerQueryDX12::Begin()
{
const auto context = _device->GetMainContextDX12();
auto& heap = _device->TimestampQueryHeap;
heap.EndQuery(context, _begin);
_query = context->BeginQuery(GPUQueryType::Timer);
_hasResult = false;
_endCalled = false;
}
@@ -31,14 +29,8 @@ void GPUTimerQueryDX12::End()
{
if (_endCalled)
return;
const auto context = _device->GetMainContextDX12();
auto& heap = _device->TimestampQueryHeap;
heap.EndQuery(context, _end);
const auto queue = _device->GetCommandQueue()->GetCommandQueue();
VALIDATE_DIRECTX_CALL(queue->GetTimestampFrequency(&_gpuFrequency));
context->EndQuery(_query);
_endCalled = true;
}
@@ -48,33 +40,16 @@ bool GPUTimerQueryDX12::HasResult()
return false;
if (_hasResult)
return true;
auto& heap = _device->TimestampQueryHeap;
return heap.IsReady(_end) && heap.IsReady(_begin);
uint64 result;
return _device->GetQueryResult(_query, result, false);
}
float GPUTimerQueryDX12::GetResult()
{
if (_hasResult)
{
return _timeDelta;
}
const uint64 timeBegin = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_begin);
const uint64 timeEnd = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_end);
// Calculate event duration in milliseconds
if (timeEnd > timeBegin)
{
const uint64 delta = timeEnd - timeBegin;
const double frequency = double(_gpuFrequency);
_timeDelta = static_cast<float>((delta / frequency) * 1000.0);
}
else
{
_timeDelta = 0.0f;
}
uint64 result;
_timeDelta = _device->GetQueryResult(_query, result, true) ? (float)((double)result / 1000.0) : 0.0f;
_hasResult = true;
return _timeDelta;
}

View File

@@ -17,9 +17,7 @@ private:
bool _hasResult = false;
bool _endCalled = false;
float _timeDelta = 0.0f;
uint64 _gpuFrequency = 0;
QueryHeapDX12::ElementHandle _begin;
QueryHeapDX12::ElementHandle _end;
uint64 _query = 0;
public:

View File

@@ -7,42 +7,34 @@
#include "GPUContextDX12.h"
#include "../RenderToolsDX.h"
QueryHeapDX12::QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount)
: _device(device)
, _queryHeap(nullptr)
, _resultBuffer(nullptr)
, _queryHeapType(queryHeapType)
, _currentIndex(0)
, _queryHeapCount(queryHeapCount)
bool QueryHeapDX12::Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size)
{
if (queryHeapType == D3D12_QUERY_HEAP_TYPE_OCCLUSION)
{
_resultSize = sizeof(uint64);
_queryType = D3D12_QUERY_TYPE_OCCLUSION;
}
else if (queryHeapType == D3D12_QUERY_HEAP_TYPE_TIMESTAMP)
{
_resultSize = sizeof(uint64);
_queryType = D3D12_QUERY_TYPE_TIMESTAMP;
}
else
{
MISSING_CODE("Not support D3D12 query heap type.");
}
}
bool QueryHeapDX12::Init()
{
_resultData.Resize(_resultSize * _queryHeapCount);
// Create the query heap
D3D12_QUERY_HEAP_DESC heapDesc;
heapDesc.Type = _queryHeapType;
Type = type;
_device = device;
_queryHeapCount = size;
D3D12_QUERY_HEAP_DESC heapDesc = {};
heapDesc.Count = _queryHeapCount;
heapDesc.NodeMask = 0;
HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&_queryHeap));
switch (type)
{
case GPUQueryType::Timer:
_resultSize = sizeof(uint64);
QueryType = D3D12_QUERY_TYPE_TIMESTAMP;
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
break;
case GPUQueryType::Occlusion:
_resultSize = sizeof(uint64);
QueryType = D3D12_QUERY_TYPE_OCCLUSION;
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
break;
case GPUQueryType::MAX:
return true;
}
_resultData.Resize(_resultSize * _queryHeapCount);
HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&QueryHeap));
LOG_DIRECTX_RESULT_WITH_RETURN(result, true);
DX_SET_DEBUG_NAME(_queryHeap, "Query Heap");
DX_SET_DEBUG_NAME(QueryHeap, "Query Heap");
// Create the result buffer
D3D12_HEAP_PROPERTIES heapProperties;
@@ -77,8 +69,8 @@ bool QueryHeapDX12::Init()
void QueryHeapDX12::Destroy()
{
SAFE_RELEASE(_resultBuffer);
SAFE_RELEASE(_queryHeap);
_currentBatch.Clear();
SAFE_RELEASE(QueryHeap);
_currentBatch = QueryBatch();
_resultData.SetCapacity(0);
}
@@ -92,45 +84,36 @@ void QueryHeapDX12::EndQueryBatchAndResolveQueryData(GPUContextDX12* context)
_currentBatch.Open = false;
// Resolve the batch
const int32 offset = _currentBatch.Start * _resultSize;
context->GetCommandList()->ResolveQueryData(_queryHeap, _queryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
_currentBatch.Sync = _device->GetCommandQueue()->GetSyncPoint();
const uint32 offset = _currentBatch.Start * _resultSize;
context->GetCommandList()->ResolveQueryData(QueryHeap, QueryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
const auto queue = _device->GetCommandQueue();
_currentBatch.Sync = queue->GetSyncPoint();
// Get GPU clock frequency for timer queries
if (Type == GPUQueryType::Timer)
{
VALIDATE_DIRECTX_CALL(queue->GetCommandQueue()->GetTimestampFrequency(&_currentBatch.TimestampFrequency));
}
// Begin a new query batch
_batches.Add(_currentBatch);
StartQueryBatch();
}
void QueryHeapDX12::AllocQuery(GPUContextDX12* context, ElementHandle& handle)
bool QueryHeapDX12::CanAlloc(int32 count) const
{
return _currentBatch.Open && _currentIndex + count <= GetQueryHeapCount();
}
void QueryHeapDX12::Alloc(ElementHandle& handle)
{
ASSERT(_currentBatch.Open);
// Check if need to start from the buffer head
if (_currentIndex >= GetQueryHeapCount())
{
// We're in the middle of a batch, but we're at the end of the heap so split the batch in two
EndQueryBatchAndResolveQueryData(context);
}
// Allocate element into the current batch
handle = _currentIndex++;
_currentBatch.Count++;
}
void QueryHeapDX12::BeginQuery(GPUContextDX12* context, ElementHandle& handle)
{
AllocQuery(context, handle);
context->GetCommandList()->BeginQuery(_queryHeap, _queryType, handle);
}
void QueryHeapDX12::EndQuery(GPUContextDX12* context, ElementHandle& handle)
{
AllocQuery(context, handle);
context->GetCommandList()->EndQuery(_queryHeap, _queryType, handle);
}
bool QueryHeapDX12::IsReady(ElementHandle& handle)
{
// Current batch is not ready (not ended)
@@ -150,7 +133,7 @@ bool QueryHeapDX12::IsReady(ElementHandle& handle)
return true;
}
void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
void* QueryHeapDX12::Resolve(ElementHandle& handle, uint64* timestampFrequency)
{
// Prevent queries from the current batch
ASSERT(!_currentBatch.ContainsElement(handle));
@@ -192,10 +175,15 @@ void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
// All elements got its results so we can remove this batch
_batches.RemoveAt(i);
// Cache timestamps frequency for later
_timestampFrequency = batch.TimestampFrequency;
break;
}
}
if (timestampFrequency)
*timestampFrequency = _timestampFrequency;
return _resultData.Get() + handle * _resultSize;
}
@@ -204,7 +192,7 @@ void QueryHeapDX12::StartQueryBatch()
ASSERT(!_currentBatch.Open);
// Clear the current batch
_currentBatch.Clear();
_currentBatch = QueryBatch();
// Loop active index on overflow
if (_currentIndex >= GetQueryHeapCount())

View File

@@ -10,6 +10,31 @@ class GPUContextDX12;
class GPUBuffer;
#include "CommandQueueDX12.h"
#include "Engine/Graphics/Enums.h"
/// <summary>
/// GPU query ID packed into 64-bits.
/// </summary>
struct GPUQueryDX12
{
union
{
struct
{
uint16 Type;
uint16 Heap;
uint16 Element;
uint16 SecondaryElement;
};
uint64 Raw;
};
static int32 GetQueriesCount(GPUQueryType type)
{
// Timer queries need to know duration via GPU timer queries difference
return type == GPUQueryType::Timer ? 2 : 1;
}
};
/// <summary>
/// GPU queries heap for DirectX 12 backend.
@@ -17,14 +42,12 @@ class GPUBuffer;
class QueryHeapDX12
{
public:
/// <summary>
/// The query element handle.
/// </summary>
typedef int32 ElementHandle;
typedef uint16 ElementHandle;
private:
struct QueryBatch
{
/// <summary>
@@ -35,71 +58,54 @@ private:
/// <summary>
/// The first element in the batch (inclusive).
/// </summary>
int32 Start = 0;
uint32 Start = 0;
/// <summary>
/// The amount of elements added to this batch.
/// </summary>
int32 Count = 0;
uint32 Count = 0;
/// <summary>
/// The GPU clock frequency for timer queries.
/// </summary>
uint64 TimestampFrequency = 0;
/// <summary>
/// Is the batch still open for more begin/end queries.
/// </summary>
bool Open = false;
/// <summary>
/// Clears this batch.
/// </summary>
inline void Clear()
{
Sync = SyncPointDX12();
Start = 0;
Count = 0;
Open = false;
}
/// <summary>
/// Checks if this query batch contains a given element contains the element.
/// </summary>
/// <param name="elementIndex">The index of the element.</param>
/// <returns>True if element is in this query, otherwise false.</returns>
bool ContainsElement(int32 elementIndex) const
bool ContainsElement(uint32 elementIndex) const
{
return elementIndex >= Start && elementIndex < Start + Count;
}
};
private:
GPUDeviceDX12* _device;
ID3D12QueryHeap* _queryHeap;
ID3D12Resource* _resultBuffer;
D3D12_QUERY_TYPE _queryType;
D3D12_QUERY_HEAP_TYPE _queryHeapType;
int32 _currentIndex;
int32 _resultSize;
int32 _queryHeapCount;
GPUDeviceDX12* _device = nullptr;
ID3D12Resource* _resultBuffer = nullptr;
uint32 _currentIndex = 0;
uint32 _resultSize = 0;
uint32 _queryHeapCount = 0;
QueryBatch _currentBatch;
Array<QueryBatch> _batches;
Array<byte> _resultData;
uint64 _timestampFrequency;
public:
/// <summary>
/// Initializes a new instance of the <see cref="QueryHeapDX12"/> class.
/// </summary>
/// <param name="device">The device.</param>
/// <param name="queryHeapType">Type of the query heap.</param>
/// <param name="queryHeapCount">The query heap count.</param>
QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount);
public:
/// <summary>
/// Initializes this instance.
/// </summary>
/// <param name="device">The device.</param>
/// <param name="type">Type of the query heap.</param>
/// <param name="size">The size of the heap.</param>
/// <returns>True if failed, otherwise false.</returns>
bool Init();
bool Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size);
/// <summary>
/// Destroys this instance.
@@ -107,12 +113,14 @@ public:
void Destroy();
public:
GPUQueryType Type;
ID3D12QueryHeap* QueryHeap = nullptr;
D3D12_QUERY_TYPE QueryType = D3D12_QUERY_TYPE_OCCLUSION;
/// <summary>
/// Gets the query heap capacity.
/// </summary>
/// <returns>The queries count.</returns>
FORCE_INLINE int32 GetQueryHeapCount() const
FORCE_INLINE uint32 GetQueryHeapCount() const
{
return _queryHeapCount;
}
@@ -120,8 +128,7 @@ public:
/// <summary>
/// Gets the size of the result value (in bytes).
/// </summary>
/// <returns>The size of the query result value (in bytes).</returns>
FORCE_INLINE int32 GetResultSize() const
FORCE_INLINE uint32 GetResultSize() const
{
return _resultSize;
}
@@ -129,40 +136,30 @@ public:
/// <summary>
/// Gets the result buffer (CPU readable via Map/Unmap).
/// </summary>
/// <returns>The query results buffer.</returns>
FORCE_INLINE ID3D12Resource* GetResultBuffer() const
{
return _resultBuffer;
}
public:
/// <summary>
/// Stops tracking the current batch of begin/end query calls that will be resolved together. This implicitly starts a new batch.
/// </summary>
/// <param name="context">The context.</param>
void EndQueryBatchAndResolveQueryData(GPUContextDX12* context);
/// <summary>
/// Checks if can alloc a new query (without rolling the existing batch).
/// </summary>
/// <param name="count">How many elements to allocate?</param>
/// <returns>True if can alloc new query within the same batch.</returns>
bool CanAlloc(int32 count = 1) const;
/// <summary>
/// Allocates the query heap element.
/// </summary>
/// <param name="context">The context.</param>
/// <param name="handle">The result handle.</param>
void AllocQuery(GPUContextDX12* context, ElementHandle& handle);
/// <summary>
/// Calls BeginQuery on command list for the given query heap slot.
/// </summary>
/// <param name="context">The context.</param>
/// <param name="handle">The query handle.</param>
void BeginQuery(GPUContextDX12* context, ElementHandle& handle);
/// <summary>
/// Calls EndQuery on command list for the given query heap slot.
/// </summary>
/// <param name="context">The context.</param>
/// <param name="handle">The query handle.</param>
void EndQuery(GPUContextDX12* context, ElementHandle& handle);
void Alloc(ElementHandle& handle);
/// <summary>
/// Determines whether the specified query handle is ready to read data (command list has been executed by the GPU).
@@ -175,11 +172,11 @@ public:
/// Resolves the query (or skips if already resolved).
/// </summary>
/// <param name="handle">The result handle.</param>
/// <param name="timestampFrequency">The optional pointer to GPU timestamps frequency value to store.</param>
/// <returns>The pointer to the resolved query data.</returns>
void* ResolveQuery(ElementHandle& handle);
void* Resolve(ElementHandle& handle, uint64* timestampFrequency = nullptr);
private:
/// <summary>
/// Starts tracking a new batch of begin/end query calls that will be resolved together
/// </summary>