Files
FlaxEngine/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp

209 lines
6.4 KiB
C++

// Copyright (c) Wojciech Figat. All rights reserved.
#if GRAPHICS_API_DIRECTX12
#include "QueryHeapDX12.h"
#include "GPUDeviceDX12.h"
#include "GPUContextDX12.h"
#include "../RenderToolsDX.h"
bool QueryHeapDX12::Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size)
{
// Create the query heap
Type = type;
_device = device;
_queryHeapCount = size;
D3D12_QUERY_HEAP_DESC heapDesc = {};
heapDesc.Count = _queryHeapCount;
heapDesc.NodeMask = 0;
switch (type)
{
case GPUQueryType::Timer:
_resultSize = sizeof(uint64);
QueryType = D3D12_QUERY_TYPE_TIMESTAMP;
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
break;
case GPUQueryType::Occlusion:
_resultSize = sizeof(uint64);
QueryType = D3D12_QUERY_TYPE_OCCLUSION;
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
break;
case GPUQueryType::MAX:
return true;
}
_resultData.Resize(_resultSize * _queryHeapCount);
HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&QueryHeap));
LOG_DIRECTX_RESULT_WITH_RETURN(result, true);
DX_SET_DEBUG_NAME(QueryHeap, "Query Heap");
// Create the result buffer
D3D12_HEAP_PROPERTIES heapProperties;
heapProperties.Type = D3D12_HEAP_TYPE_READBACK;
heapProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heapProperties.CreationNodeMask = 1;
heapProperties.VisibleNodeMask = 1;
D3D12_RESOURCE_DESC resourceDesc;
resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
resourceDesc.Alignment = 0;
resourceDesc.Width = _resultData.Count();
resourceDesc.Height = 1;
resourceDesc.DepthOrArraySize = 1;
resourceDesc.MipLevels = 1;
resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
resourceDesc.SampleDesc.Count = 1;
resourceDesc.SampleDesc.Quality = 0;
resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
result = _device->GetDevice()->CreateCommittedResource(&heapProperties, D3D12_HEAP_FLAG_NONE, &resourceDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&_resultBuffer));
LOG_DIRECTX_RESULT_WITH_RETURN(result, true);
DX_SET_DEBUG_NAME(_resultBuffer, "Query Heap Result Buffer");
// Start out with an open query batch
_currentBatch.Open = false;
StartQueryBatch();
return false;
}
void QueryHeapDX12::Destroy()
{
SAFE_RELEASE(_resultBuffer);
SAFE_RELEASE(QueryHeap);
_currentBatch = QueryBatch();
_resultData.SetCapacity(0);
}
void QueryHeapDX12::EndQueryBatchAndResolveQueryData(GPUContextDX12* context)
{
ASSERT(_currentBatch.Open);
if (_currentBatch.Count == 0)
return;
// Close the current batch
_currentBatch.Open = false;
// Resolve the batch
const uint32 offset = _currentBatch.Start * _resultSize;
context->GetCommandList()->ResolveQueryData(QueryHeap, QueryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
const auto queue = _device->GetCommandQueue();
_currentBatch.Sync = queue->GetSyncPoint();
// Get GPU clock frequency for timer queries
if (Type == GPUQueryType::Timer)
{
VALIDATE_DIRECTX_CALL(queue->GetCommandQueue()->GetTimestampFrequency(&_currentBatch.TimestampFrequency));
}
// Begin a new query batch
_batches.Add(_currentBatch);
StartQueryBatch();
}
bool QueryHeapDX12::CanAlloc(int32 count) const
{
return _currentBatch.Open && _currentIndex + count <= GetQueryHeapCount();
}
void QueryHeapDX12::Alloc(ElementHandle& handle)
{
ASSERT(_currentBatch.Open);
// Allocate element into the current batch
handle = _currentIndex++;
_currentBatch.Count++;
}
bool QueryHeapDX12::IsReady(ElementHandle& handle)
{
// Current batch is not ready (not ended)
if (_currentBatch.ContainsElement(handle))
return false;
for (int32 i = 0; i < _batches.Count(); i++)
{
auto& batch = _batches[i];
if (batch.ContainsElement(handle))
{
ASSERT(batch.Sync.IsValid());
return batch.Sync.IsComplete();
}
}
return true;
}
void* QueryHeapDX12::Resolve(ElementHandle& handle, uint64* timestampFrequency)
{
// Prevent queries from the current batch
ASSERT(!_currentBatch.ContainsElement(handle));
// Find the batch that contains this element to resolve it
for (int32 i = 0; i < _batches.Count(); i++)
{
auto& batch = _batches[i];
if (batch.ContainsElement(handle))
{
ASSERT(batch.Sync.IsValid());
// Ensure that end point has been already executed
if (!batch.Sync.IsComplete())
{
if (batch.Sync.IsOpen())
{
// The query is on a command list that hasn't been submitted yet
LOG(Warning, "Stalling the rendering and flushing GPU commands to wait for a query that hasn't been submitted to the GPU yet.");
_device->WaitForGPU();
}
batch.Sync.WaitForCompletion();
}
// Map the query values readback buffer
D3D12_RANGE range;
range.Begin = batch.Start * _resultSize;
range.End = range.Begin + batch.Count * _resultSize;
void* mapped = nullptr;
VALIDATE_DIRECTX_CALL(_resultBuffer->Map(0, &range, &mapped));
// Copy the results data
Platform::MemoryCopy(_resultData.Get() + range.Begin, (byte*)mapped + range.Begin, batch.Count * _resultSize);
// Unmap with an empty range to indicate nothing was written by the CPU
_resultBuffer->Unmap(0, nullptr);
// All elements got its results so we can remove this batch
_batches.RemoveAt(i);
// Cache timestamps frequency for later
_timestampFrequency = batch.TimestampFrequency;
break;
}
}
if (timestampFrequency)
*timestampFrequency = _timestampFrequency;
return _resultData.Get() + handle * _resultSize;
}
void QueryHeapDX12::StartQueryBatch()
{
ASSERT(!_currentBatch.Open);
// Clear the current batch
_currentBatch = QueryBatch();
// Loop active index on overflow
if (_currentIndex >= GetQueryHeapCount())
{
_currentIndex = 0;
}
// Start a new batch
_currentBatch.Start = _currentIndex;
_currentBatch.Open = true;
}
#endif