Add new GPU Query API that is lightweight and supports occlusion queries

This commit is contained in:
Wojtek Figat
2026-01-16 10:40:30 +01:00
parent d2d7a871ce
commit 9ac231c403
31 changed files with 829 additions and 254 deletions

View File

@@ -7,42 +7,34 @@
#include "GPUContextDX12.h"
#include "../RenderToolsDX.h"
QueryHeapDX12::QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount)
: _device(device)
, _queryHeap(nullptr)
, _resultBuffer(nullptr)
, _queryHeapType(queryHeapType)
, _currentIndex(0)
, _queryHeapCount(queryHeapCount)
bool QueryHeapDX12::Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size)
{
if (queryHeapType == D3D12_QUERY_HEAP_TYPE_OCCLUSION)
{
_resultSize = sizeof(uint64);
_queryType = D3D12_QUERY_TYPE_OCCLUSION;
}
else if (queryHeapType == D3D12_QUERY_HEAP_TYPE_TIMESTAMP)
{
_resultSize = sizeof(uint64);
_queryType = D3D12_QUERY_TYPE_TIMESTAMP;
}
else
{
MISSING_CODE("Not support D3D12 query heap type.");
}
}
bool QueryHeapDX12::Init()
{
_resultData.Resize(_resultSize * _queryHeapCount);
// Create the query heap
D3D12_QUERY_HEAP_DESC heapDesc;
heapDesc.Type = _queryHeapType;
Type = type;
_device = device;
_queryHeapCount = size;
D3D12_QUERY_HEAP_DESC heapDesc = {};
heapDesc.Count = _queryHeapCount;
heapDesc.NodeMask = 0;
HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&_queryHeap));
switch (type)
{
case GPUQueryType::Timer:
_resultSize = sizeof(uint64);
QueryType = D3D12_QUERY_TYPE_TIMESTAMP;
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
break;
case GPUQueryType::Occlusion:
_resultSize = sizeof(uint64);
QueryType = D3D12_QUERY_TYPE_OCCLUSION;
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
break;
case GPUQueryType::MAX:
return true;
}
_resultData.Resize(_resultSize * _queryHeapCount);
HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&QueryHeap));
LOG_DIRECTX_RESULT_WITH_RETURN(result, true);
DX_SET_DEBUG_NAME(_queryHeap, "Query Heap");
DX_SET_DEBUG_NAME(QueryHeap, "Query Heap");
// Create the result buffer
D3D12_HEAP_PROPERTIES heapProperties;
@@ -77,8 +69,8 @@ bool QueryHeapDX12::Init()
void QueryHeapDX12::Destroy()
{
SAFE_RELEASE(_resultBuffer);
SAFE_RELEASE(_queryHeap);
_currentBatch.Clear();
SAFE_RELEASE(QueryHeap);
_currentBatch = QueryBatch();
_resultData.SetCapacity(0);
}
@@ -92,45 +84,36 @@ void QueryHeapDX12::EndQueryBatchAndResolveQueryData(GPUContextDX12* context)
_currentBatch.Open = false;
// Resolve the batch
const int32 offset = _currentBatch.Start * _resultSize;
context->GetCommandList()->ResolveQueryData(_queryHeap, _queryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
_currentBatch.Sync = _device->GetCommandQueue()->GetSyncPoint();
const uint32 offset = _currentBatch.Start * _resultSize;
context->GetCommandList()->ResolveQueryData(QueryHeap, QueryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
const auto queue = _device->GetCommandQueue();
_currentBatch.Sync = queue->GetSyncPoint();
// Get GPU clock frequency for timer queries
if (Type == GPUQueryType::Timer)
{
VALIDATE_DIRECTX_CALL(queue->GetCommandQueue()->GetTimestampFrequency(&_currentBatch.TimestampFrequency));
}
// Begin a new query batch
_batches.Add(_currentBatch);
StartQueryBatch();
}
void QueryHeapDX12::AllocQuery(GPUContextDX12* context, ElementHandle& handle)
bool QueryHeapDX12::CanAlloc(int32 count) const
{
return _currentBatch.Open && _currentIndex + count <= GetQueryHeapCount();
}
void QueryHeapDX12::Alloc(ElementHandle& handle)
{
ASSERT(_currentBatch.Open);
// Check if need to start from the buffer head
if (_currentIndex >= GetQueryHeapCount())
{
// We're in the middle of a batch, but we're at the end of the heap so split the batch in two
EndQueryBatchAndResolveQueryData(context);
}
// Allocate element into the current batch
handle = _currentIndex++;
_currentBatch.Count++;
}
void QueryHeapDX12::BeginQuery(GPUContextDX12* context, ElementHandle& handle)
{
AllocQuery(context, handle);
context->GetCommandList()->BeginQuery(_queryHeap, _queryType, handle);
}
void QueryHeapDX12::EndQuery(GPUContextDX12* context, ElementHandle& handle)
{
AllocQuery(context, handle);
context->GetCommandList()->EndQuery(_queryHeap, _queryType, handle);
}
bool QueryHeapDX12::IsReady(ElementHandle& handle)
{
// Current batch is not ready (not ended)
@@ -150,7 +133,7 @@ bool QueryHeapDX12::IsReady(ElementHandle& handle)
return true;
}
void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
void* QueryHeapDX12::Resolve(ElementHandle& handle, uint64* timestampFrequency)
{
// Prevent queries from the current batch
ASSERT(!_currentBatch.ContainsElement(handle));
@@ -192,10 +175,15 @@ void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
// All elements got its results so we can remove this batch
_batches.RemoveAt(i);
// Cache timestamps frequency for later
_timestampFrequency = batch.TimestampFrequency;
break;
}
}
if (timestampFrequency)
*timestampFrequency = _timestampFrequency;
return _resultData.Get() + handle * _resultSize;
}
@@ -204,7 +192,7 @@ void QueryHeapDX12::StartQueryBatch()
ASSERT(!_currentBatch.Open);
// Clear the current batch
_currentBatch.Clear();
_currentBatch = QueryBatch();
// Loop active index on overflow
if (_currentIndex >= GetQueryHeapCount())