diff --git a/Source/Engine/Graphics/Enums.h b/Source/Engine/Graphics/Enums.h
index 107fe3533..96a9fed7b 100644
--- a/Source/Engine/Graphics/Enums.h
+++ b/Source/Engine/Graphics/Enums.h
@@ -349,6 +349,24 @@ API_ENUM(Attributes="Flags") enum class GPUResourceMapMode
DECLARE_ENUM_OPERATORS(GPUResourceMapMode);
+///
+/// GPU resources types.
+///
+enum class GPUQueryType
+{
+ ///
+ /// Measures duration of GPU commands execution. Returns time in microseconds (1/1000 ms).
+ ///
+ Timer = 0,
+
+ ///
+ /// Tests object visibility by counting number of pixel samples that are not culled (by depth or stencil tests).
+ ///
+ Occlusion = 1,
+
+ MAX
+};
+
///
/// Primitives types.
///
diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h
index 1144d6f49..5d1c3a020 100644
--- a/Source/Engine/Graphics/GPUContext.h
+++ b/Source/Engine/Graphics/GPUContext.h
@@ -28,6 +28,7 @@ class GPUBufferView;
class GPUVertexLayout;
struct GPUPass;
enum class GPUResourceAccess;
+enum class GPUQueryType;
// Gets the GPU texture view. Checks if pointer is not null and texture has one or more mip levels loaded.
#define GET_TEXTURE_VIEW_SAFE(t) (t && t->ResidentMipLevels() > 0 ? t->View() : nullptr)
@@ -554,6 +555,20 @@ public:
/// The aligned byte offset for arguments.
API_FUNCTION() virtual void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) = 0;
+public:
+ ///
+ /// Begins the GPU query that will measure commands until EndQuery.
+ ///
+ /// Query type.
+ /// Unique identifier of the query used to EndQuery and then GetQueryResult to read the query result data.
+ virtual uint64 BeginQuery(GPUQueryType type) = 0;
+
+ ///
+ /// Ends the GPU query. Use GPUDevice::GetQueryResult to read the results back.
+ ///
+ /// Query identifier returned by BeginQuery.
+ virtual void EndQuery(uint64 queryID) = 0;
+
public:
///
/// Sets the rendering viewport and scissor rectangle.
diff --git a/Source/Engine/Graphics/GPUDevice.h b/Source/Engine/Graphics/GPUDevice.h
index 8f9393a5b..d424ec67d 100644
--- a/Source/Engine/Graphics/GPUDevice.h
+++ b/Source/Engine/Graphics/GPUDevice.h
@@ -370,6 +370,16 @@ public:
///
virtual void WaitForGPU() = 0;
+ ///
+ /// Reads the query result from the GPU.
+ ///
+ /// GPU query results are short-lived, meaning that in the frame that results are ready, they won't be available in the next frame, as queries are reused.
+ /// Query identifier returned by GPUContext::BeginQuery.
+ /// The output result data of the query. Valid only when function returns true.
+ /// True if wait for the GPU to end processing commands for sync data ready. Otherwise, if query is incomplete then function will return value of false without result.
+ /// True if got valid query result, otherwise false. If called with wait enabled then device failed to readback the query data.
+ virtual bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) = 0;
+
public:
void AddResource(GPUResource* resource);
void RemoveResource(GPUResource* resource);
diff --git a/Source/Engine/Graphics/GPUResource.h b/Source/Engine/Graphics/GPUResource.h
index d612a846d..1b6178036 100644
--- a/Source/Engine/Graphics/GPUResource.h
+++ b/Source/Engine/Graphics/GPUResource.h
@@ -32,7 +32,7 @@ API_ENUM() enum class GPUResourceType
PipelineState,
// GPU binding descriptor
Descriptor,
- // GPU timer query
+ // GPU timer or occlusion query
Query,
// GPU texture sampler
Sampler,
diff --git a/Source/Engine/Graphics/Graphics.Build.cs b/Source/Engine/Graphics/Graphics.Build.cs
index 1c59349ec..51b37144b 100644
--- a/Source/Engine/Graphics/Graphics.Build.cs
+++ b/Source/Engine/Graphics/Graphics.Build.cs
@@ -40,6 +40,7 @@ public abstract class GraphicsDeviceBaseModule : EngineModule
public class Graphics : EngineModule
{
private static bool _logMissingVulkanSDK;
+ private static bool _logMissingWindowsSDK;
///
public override void Setup(BuildOptions options)
@@ -59,7 +60,7 @@ public class Graphics : EngineModule
if (windowsToolchain != null && windowsToolchain.SDK != Flax.Build.Platforms.WindowsPlatformSDK.v8_1)
options.PrivateDependencies.Add("GraphicsDeviceDX12");
else
- Log.WarningOnce(string.Format("Building for {0} without Vulkan rendering backend (Vulkan SDK is missing)", options.Platform.Target), ref _logMissingVulkanSDK);
+ Log.WarningOnce(string.Format("Building for {0} without D3D12 rendering backend (Windows SDK is missing)", options.Platform.Target), ref _logMissingWindowsSDK);
break;
case TargetPlatform.UWP:
options.PrivateDependencies.Add("GraphicsDeviceDX11");
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
index f623f53b5..92a957ffd 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
@@ -566,6 +566,81 @@ void GPUContextDX11::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint
RENDER_STAT_DRAW_CALL(0, 0);
}
+uint64 GPUContextDX11::BeginQuery(GPUQueryType type)
+{
+ // Allocate a pooled query
+ uint16 queryIndex;
+ static_assert(ARRAY_COUNT(_device->_readyQueries) == (int32)GPUQueryType::MAX, "Invalid query types count");
+ if (_device->_readyQueries[(int32)type].HasItems())
+ {
+ // Use query from cached list
+ queryIndex = _device->_readyQueries[(int32)type].Pop();
+ }
+ else
+ {
+ // Add a new query
+ queryIndex = _device->_queries.Count();
+ auto& query = _device->_queries.AddOne();
+ query.Type = type;
+ D3D11_QUERY_DESC queryDesc;
+ queryDesc.Query = D3D11_QUERY_TIMESTAMP;
+ queryDesc.MiscFlags = 0;
+ HRESULT hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.Query);
+ LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
+ if (type == GPUQueryType::Timer)
+ {
+ // Timer queries need additional one for begin and end disjoint
+ hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.TimerBeginQuery);
+ LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
+ queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
+ hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.DisjointQuery);
+ LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
+ }
+ }
+ static_assert(sizeof(GPUQueryDX11) == sizeof(uint64), "Invalid query size.");
+ GPUQueryDX11 q = {};
+ q.Type = (uint16)type;
+ q.Index = queryIndex;
+ q.Padding = 1; // Ensure Raw is never 0, even for the first query
+
+ // Begin query
+ {
+ auto& query = _device->_queries[queryIndex];
+ ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Ready);
+ ASSERT_LOW_LAYER(query.Type == type);
+ query.State = GPUQueryDataDX11::Active;
+ auto context = _device->GetIM();
+ if (type == GPUQueryType::Timer)
+ {
+ context->Begin(query.DisjointQuery);
+ context->End(query.TimerBeginQuery);
+ }
+ else
+ {
+ context->Begin(query.Query);
+ }
+ }
+
+ return q.Raw;
+}
+
+void GPUContextDX11::EndQuery(uint64 queryID)
+{
+ if (!queryID)
+ return;
+
+ // End query
+ GPUQueryDX11 q;
+ q.Raw = queryID;
+ auto& query = _device->_queries[q.Index];
+ auto context = _device->GetIM();
+ context->End(query.Query);
+ if (q.Type == (uint16)GPUQueryType::Timer)
+ {
+ context->End(query.DisjointQuery);
+ }
+}
+
void GPUContextDX11::SetViewport(const Viewport& viewport)
{
_context->RSSetViewports(1, (D3D11_VIEWPORT*)&viewport);
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
index eee2699df..5e3c14e9e 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
@@ -154,6 +154,8 @@ public:
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
+ uint64 BeginQuery(GPUQueryType type) override;
+ void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
GPUPipelineState* GetState() const override;
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp
index b187ed3c7..2d8a0f8c7 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp
@@ -175,6 +175,15 @@ GPUVertexLayoutDX11::GPUVertexLayoutDX11(GPUDeviceDX11* device, const Elements&
}
}
+void GPUQueryDataDX11::Release()
+{
+ SAFE_RELEASE(Query);
+ SAFE_RELEASE(TimerBeginQuery);
+ SAFE_RELEASE(DisjointQuery);
+ Result = 0;
+ State = Ready;
+}
+
GPUDevice* GPUDeviceDX11::Create()
{
// Configuration
@@ -801,6 +810,11 @@ void GPUDeviceDX11::Dispose()
{
SAFE_RELEASE(RasterizerStates[i]);
}
+ for (auto& query : _queries)
+ query.Release();
+ _queries.Clear();
+ for (auto& e : _readyQueries)
+ e.Clear();
// Clear DirectX stuff
SAFE_DELETE(_mainContext);
@@ -877,6 +891,88 @@ void GPUDeviceDX11::DrawEnd()
infoQueue->ClearStoredMessages();
}
#endif
+
+ // Auto-return finished queries back to the pool
+ auto* queries = _queries.Get();
+ int32 queriesCount = _queries.Count();
+ for (int32 i = 0; i < queriesCount; i++)
+ {
+ auto& query = queries[i];
+ if (query.State == GPUQueryDataDX11::Finished)
+ {
+ query.State = GPUQueryDataDX11::Ready;
+ query.Result = 0;
+ _readyQueries[(int32)query.Type].Push(i);
+ }
+ }
+}
+
+bool GPUDeviceDX11::GetQueryResult(uint64 queryID, uint64& result, bool wait)
+{
+ if (!queryID)
+ return false;
+
+ GPUQueryDX11 q;
+ q.Raw = queryID;
+ auto& query = _queries[q.Index];
+ if (query.State == GPUQueryDataDX11::Finished)
+ {
+ // Use resolved result
+ result = query.Result;
+ return true;
+ }
+ auto context = GetIM();
+
+RETRY:
+ bool hasData;
+ if (q.Type == (uint16)GPUQueryType::Timer)
+ {
+ D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData;
+ hasData = context->GetData(query.DisjointQuery, &disjointData, sizeof(disjointData), 0) == S_OK;
+ if (hasData)
+ {
+ UINT64 timeBegin = 0, timeEnd = 0;
+ context->GetData(query.TimerBeginQuery, &timeBegin, sizeof(timeBegin), 0);
+ context->GetData(query.Query, &timeEnd, sizeof(timeEnd), 0);
+
+ if (disjointData.Disjoint == FALSE)
+ {
+ result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / disjointData.Frequency : 0;
+ }
+ else
+ {
+ result = 0;
+#if !BUILD_RELEASE
+ static bool LogOnce = true;
+ if (LogOnce)
+ {
+ LogOnce = false;
+ LOG(Warning, "Unreliable GPU timer query detected.");
+ }
+#endif
+ }
+ }
+ }
+ else
+ {
+ hasData = context->GetData(query.Query, &result, sizeof(uint64), 0) == S_OK;
+ }
+
+ if (!hasData && wait)
+ {
+ // Wait until data is ready
+ Platform::Yield();
+ goto RETRY;
+ }
+
+ if (hasData)
+ {
+ // Query has valid data now (until auto-recycle back to pool)
+ query.State = GPUQueryDataDX11::Finished;
+ query.Result = result;
+ }
+
+ return hasData;
}
GPUTexture* GPUDeviceDX11::CreateTexture(const StringView& name)
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h
index 403a10a4c..9657ebc59 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.h
@@ -15,6 +15,38 @@ enum class StencilOperation : byte;
class GPUContextDX11;
class GPUSwapChainDX11;
+///
+/// GPU query ID packed into 64-bits.
+///
+struct GPUQueryDX11
+{
+ union
+ {
+ struct
+ {
+ uint16 Type;
+ uint16 Index;
+ uint32 Padding;
+ };
+ uint64 Raw;
+ };
+};
+
+///
+/// GPU query data (reusable via pooling).
+///
+struct GPUQueryDataDX11
+{
+ ID3D11Query* Query = nullptr;
+ ID3D11Query* TimerBeginQuery = nullptr;
+ ID3D11Query* DisjointQuery = nullptr;
+ uint64 Result = 0;
+ enum States { Ready, Active, Finished } State = Ready;
+ GPUQueryType Type = GPUQueryType::MAX;
+
+ void Release();
+};
+
///
/// Implementation of Graphics Device for DirectX 11 backend.
///
@@ -60,6 +92,8 @@ private:
GPUContextDX11* _mainContext = nullptr;
bool _allowTearing = false;
GPUBuffer* _dummyVB = nullptr;
+ Array _queries;
+ Array _readyQueries[2]; // Timer and Occlusion
// Static Samplers
ID3D11SamplerState* _samplerLinearClamp = nullptr;
@@ -124,6 +158,7 @@ public:
void Dispose() override;
void WaitForGPU() override;
void DrawEnd() override;
+ bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
GPUTexture* CreateTexture(const StringView& name) override;
GPUShader* CreateShader(const StringView& name) override;
GPUPipelineState* CreatePipelineState() override;
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp
index 98143c7c3..eb654bb07 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp
@@ -1275,6 +1275,31 @@ void GPUContextDX12::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint
RENDER_STAT_DRAW_CALL(0, 0);
}
+uint64 GPUContextDX12::BeginQuery(GPUQueryType type)
+{
+ auto query = _device->AllocQuery(type);
+ if (query.Raw)
+ {
+ auto heap = _device->QueryHeaps[query.Heap];
+ if (type == GPUQueryType::Timer) // Timer queries call End twice on different queries to calculate duration between GPU time clocks
+ _commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.SecondaryElement);
+ else
+ _commandList->BeginQuery(heap->QueryHeap, heap->QueryType, query.Element);
+ }
+ return query.Raw;
+}
+
+void GPUContextDX12::EndQuery(uint64 queryID)
+{
+ if (queryID)
+ {
+ GPUQueryDX12 query;
+ query.Raw = queryID;
+ auto heap = _device->QueryHeaps[query.Heap];
+ _commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.Element);
+ }
+}
+
void GPUContextDX12::SetViewport(const Viewport& viewport)
{
_commandList->RSSetViewports(1, (D3D12_VIEWPORT*)&viewport);
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h
index 51f24f4a6..8c13b8ce4 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h
@@ -197,6 +197,8 @@ public:
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
+ uint64 BeginQuery(GPUQueryType type) override;
+ void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
GPUPipelineState* GetState() const override;
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp
index 4b9298b6c..447ff14a0 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp
@@ -555,7 +555,6 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
, _commandQueue(nullptr)
, _mainContext(nullptr)
, UploadBuffer(this)
- , TimestampQueryHeap(this, D3D12_QUERY_HEAP_TYPE_TIMESTAMP, DX12_BACK_BUFFER_COUNT * 1024)
, Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false)
, Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false)
, Heap_DSV(this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 64, false)
@@ -883,9 +882,6 @@ bool GPUDeviceDX12::Init()
VALIDATE_DIRECTX_CALL(_device->CreateRootSignature(0, signatureBlob->GetBufferPointer(), signatureBlob->GetBufferSize(), IID_PPV_ARGS(&_rootSignature)));
}
- if (TimestampQueryHeap.Init())
- return true;
-
// Cached command signatures
{
DrawIndirectCommandSignature = New(this, 1);
@@ -927,8 +923,9 @@ void GPUDeviceDX12::RenderEnd()
// Base
GPUDeviceDX::RenderEnd();
- // Resolve the timestamp queries
- TimestampQueryHeap.EndQueryBatchAndResolveQueryData(_mainContext);
+ // Resolve the queries
+ for (auto heap : QueryHeaps)
+ heap->EndQueryBatchAndResolveQueryData(_mainContext);
}
GPUDeviceDX12::~GPUDeviceDX12()
@@ -957,11 +954,47 @@ ID3D12CommandQueue* GPUDeviceDX12::GetCommandQueueDX12() const
return _commandQueue->GetCommandQueue();
}
+GPUQueryDX12 GPUDeviceDX12::AllocQuery(GPUQueryType type)
+{
+ // Get query heap with free space
+ int32 heapIndex = 0;
+ int32 count = GPUQueryDX12::GetQueriesCount(type);
+ for (; heapIndex < QueryHeaps.Count(); heapIndex++)
+ {
+ auto heap = QueryHeaps[heapIndex];
+ if (heap->Type == type && heap->CanAlloc(count))
+ break;
+ }
+ if (heapIndex == QueryHeaps.Count())
+ {
+ // Allocate a new query heap
+ auto heap = New();
+ int32 size = type == GPUQueryType::Occlusion ? 4096 : 1024;
+ if (heap->Init(this, type, size))
+ {
+ Delete(heap);
+ return {};
+ }
+ QueryHeaps.Add(heap);
+ }
+
+ // Alloc query from the heap
+ GPUQueryDX12 query = {};
+ {
+ static_assert(sizeof(GPUQueryDX12) == sizeof(uint64), "Invalid DX12 query size.");
+ query.Type = (uint16)type;
+ query.Heap = heapIndex;
+ auto heap = QueryHeaps[heapIndex];
+ heap->Alloc(query.Element);
+ if (count == 2)
+ heap->Alloc(query.SecondaryElement);
+ }
+ return query;
+}
+
void GPUDeviceDX12::Dispose()
{
GPUDeviceLock lock(this);
-
- // Check if has been disposed already
if (_state == DeviceState::Disposed)
return;
@@ -982,7 +1015,12 @@ void GPUDeviceDX12::Dispose()
for (auto& srv : _nullSrv)
srv.Release();
_nullUav.Release();
- TimestampQueryHeap.Destroy();
+ for (auto* heap : QueryHeaps)
+ {
+ heap->Destroy();
+ Delete(heap);
+ }
+ QueryHeaps.Clear();
DX_SAFE_RELEASE_CHECK(_rootSignature, 0);
Heap_CBV_SRV_UAV.ReleaseGPU();
Heap_RTV.ReleaseGPU();
@@ -1012,6 +1050,28 @@ void GPUDeviceDX12::WaitForGPU()
_commandQueue->WaitForGPU();
}
+bool GPUDeviceDX12::GetQueryResult(uint64 queryID, uint64& result, bool wait)
+{
+ GPUQueryDX12 query;
+ query.Raw = queryID;
+ auto heap = QueryHeaps[query.Heap];
+ int32 count = GPUQueryDX12::GetQueriesCount((GPUQueryType)query.Type);
+ if (!wait && (!heap->IsReady(query.Element) || (count != 2 || !heap->IsReady(query.SecondaryElement))))
+ return false;
+ if (query.Type == (uint16)GPUQueryType::Timer)
+ {
+ uint64 timestampFrequency = 1;
+ const uint64 timeBegin = *(uint64*)heap->Resolve(query.SecondaryElement);
+ const uint64 timeEnd = *(uint64*)heap->Resolve(query.Element, ×tampFrequency);
+ result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / timestampFrequency : 0;
+ }
+ else
+ {
+ result = *(uint64*)heap->Resolve(query.Element);
+ }
+ return true;
+}
+
GPUTexture* GPUDeviceDX12::CreateTexture(const StringView& name)
{
PROFILE_MEM(GraphicsTextures);
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h
index e9c1cacaa..582af6109 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.h
@@ -65,21 +65,13 @@ public:
~GPUDeviceDX12();
public:
- ///
- /// Data uploading utility via pages.
- ///
UploadBufferDX12 UploadBuffer;
-
- ///
- /// The timestamp queries heap.
- ///
- QueryHeapDX12 TimestampQueryHeap;
-
bool AllowTearing = false;
CommandSignatureDX12* DispatchIndirectCommandSignature = nullptr;
CommandSignatureDX12* DrawIndexedIndirectCommandSignature = nullptr;
CommandSignatureDX12* DrawIndirectCommandSignature = nullptr;
GPUBuffer* DummyVB = nullptr;
+ Array> QueryHeaps;
D3D12_CPU_DESCRIPTOR_HANDLE NullSRV(D3D12_SRV_DIMENSION dimension) const;
D3D12_CPU_DESCRIPTOR_HANDLE NullUAV() const;
@@ -136,6 +128,8 @@ public:
return _mainContext;
}
+ GPUQueryDX12 AllocQuery(GPUQueryType type);
+
public:
DescriptorHeapPoolDX12 Heap_CBV_SRV_UAV;
@@ -185,6 +179,7 @@ public:
void RenderEnd() override;
void Dispose() final override;
void WaitForGPU() override;
+ bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
GPUTexture* CreateTexture(const StringView& name) override;
GPUShader* CreateShader(const StringView& name) override;
GPUPipelineState* CreatePipelineState() override;
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp
index ca19ebc2b..c64f0a3c9 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.cpp
@@ -20,9 +20,7 @@ void GPUTimerQueryDX12::OnReleaseGPU()
void GPUTimerQueryDX12::Begin()
{
const auto context = _device->GetMainContextDX12();
- auto& heap = _device->TimestampQueryHeap;
- heap.EndQuery(context, _begin);
-
+ _query = context->BeginQuery(GPUQueryType::Timer);
_hasResult = false;
_endCalled = false;
}
@@ -31,14 +29,8 @@ void GPUTimerQueryDX12::End()
{
if (_endCalled)
return;
-
const auto context = _device->GetMainContextDX12();
- auto& heap = _device->TimestampQueryHeap;
- heap.EndQuery(context, _end);
-
- const auto queue = _device->GetCommandQueue()->GetCommandQueue();
- VALIDATE_DIRECTX_CALL(queue->GetTimestampFrequency(&_gpuFrequency));
-
+ context->EndQuery(_query);
_endCalled = true;
}
@@ -48,33 +40,16 @@ bool GPUTimerQueryDX12::HasResult()
return false;
if (_hasResult)
return true;
-
- auto& heap = _device->TimestampQueryHeap;
- return heap.IsReady(_end) && heap.IsReady(_begin);
+ uint64 result;
+ return _device->GetQueryResult(_query, result, false);
}
float GPUTimerQueryDX12::GetResult()
{
if (_hasResult)
- {
return _timeDelta;
- }
-
- const uint64 timeBegin = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_begin);
- const uint64 timeEnd = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_end);
-
- // Calculate event duration in milliseconds
- if (timeEnd > timeBegin)
- {
- const uint64 delta = timeEnd - timeBegin;
- const double frequency = double(_gpuFrequency);
- _timeDelta = static_cast((delta / frequency) * 1000.0);
- }
- else
- {
- _timeDelta = 0.0f;
- }
-
+ uint64 result;
+ _timeDelta = _device->GetQueryResult(_query, result, true) ? (float)((double)result / 1000.0) : 0.0f;
_hasResult = true;
return _timeDelta;
}
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h
index 22e8713d5..d43a366c1 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTimerQueryDX12.h
@@ -17,9 +17,7 @@ private:
bool _hasResult = false;
bool _endCalled = false;
float _timeDelta = 0.0f;
- uint64 _gpuFrequency = 0;
- QueryHeapDX12::ElementHandle _begin;
- QueryHeapDX12::ElementHandle _end;
+ uint64 _query = 0;
public:
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp
index 5977a9404..5a24c5b4c 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.cpp
@@ -7,42 +7,34 @@
#include "GPUContextDX12.h"
#include "../RenderToolsDX.h"
-QueryHeapDX12::QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount)
- : _device(device)
- , _queryHeap(nullptr)
- , _resultBuffer(nullptr)
- , _queryHeapType(queryHeapType)
- , _currentIndex(0)
- , _queryHeapCount(queryHeapCount)
+bool QueryHeapDX12::Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size)
{
- if (queryHeapType == D3D12_QUERY_HEAP_TYPE_OCCLUSION)
- {
- _resultSize = sizeof(uint64);
- _queryType = D3D12_QUERY_TYPE_OCCLUSION;
- }
- else if (queryHeapType == D3D12_QUERY_HEAP_TYPE_TIMESTAMP)
- {
- _resultSize = sizeof(uint64);
- _queryType = D3D12_QUERY_TYPE_TIMESTAMP;
- }
- else
- {
- MISSING_CODE("Not support D3D12 query heap type.");
- }
-}
-
-bool QueryHeapDX12::Init()
-{
- _resultData.Resize(_resultSize * _queryHeapCount);
-
// Create the query heap
- D3D12_QUERY_HEAP_DESC heapDesc;
- heapDesc.Type = _queryHeapType;
+ Type = type;
+ _device = device;
+ _queryHeapCount = size;
+ D3D12_QUERY_HEAP_DESC heapDesc = {};
heapDesc.Count = _queryHeapCount;
heapDesc.NodeMask = 0;
- HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&_queryHeap));
+ switch (type)
+ {
+ case GPUQueryType::Timer:
+ _resultSize = sizeof(uint64);
+ QueryType = D3D12_QUERY_TYPE_TIMESTAMP;
+ heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
+ break;
+ case GPUQueryType::Occlusion:
+ _resultSize = sizeof(uint64);
+ QueryType = D3D12_QUERY_TYPE_OCCLUSION;
+ heapDesc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
+ break;
+ case GPUQueryType::MAX:
+ return true;
+ }
+ _resultData.Resize(_resultSize * _queryHeapCount);
+ HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&QueryHeap));
LOG_DIRECTX_RESULT_WITH_RETURN(result, true);
- DX_SET_DEBUG_NAME(_queryHeap, "Query Heap");
+ DX_SET_DEBUG_NAME(QueryHeap, "Query Heap");
// Create the result buffer
D3D12_HEAP_PROPERTIES heapProperties;
@@ -77,8 +69,8 @@ bool QueryHeapDX12::Init()
void QueryHeapDX12::Destroy()
{
SAFE_RELEASE(_resultBuffer);
- SAFE_RELEASE(_queryHeap);
- _currentBatch.Clear();
+ SAFE_RELEASE(QueryHeap);
+ _currentBatch = QueryBatch();
_resultData.SetCapacity(0);
}
@@ -92,45 +84,36 @@ void QueryHeapDX12::EndQueryBatchAndResolveQueryData(GPUContextDX12* context)
_currentBatch.Open = false;
// Resolve the batch
- const int32 offset = _currentBatch.Start * _resultSize;
- context->GetCommandList()->ResolveQueryData(_queryHeap, _queryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
- _currentBatch.Sync = _device->GetCommandQueue()->GetSyncPoint();
+ const uint32 offset = _currentBatch.Start * _resultSize;
+ context->GetCommandList()->ResolveQueryData(QueryHeap, QueryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
+ const auto queue = _device->GetCommandQueue();
+ _currentBatch.Sync = queue->GetSyncPoint();
+
+ // Get GPU clock frequency for timer queries
+ if (Type == GPUQueryType::Timer)
+ {
+ VALIDATE_DIRECTX_CALL(queue->GetCommandQueue()->GetTimestampFrequency(&_currentBatch.TimestampFrequency));
+ }
// Begin a new query batch
_batches.Add(_currentBatch);
StartQueryBatch();
}
-void QueryHeapDX12::AllocQuery(GPUContextDX12* context, ElementHandle& handle)
+bool QueryHeapDX12::CanAlloc(int32 count) const
+{
+ return _currentBatch.Open && _currentIndex + count <= GetQueryHeapCount();
+}
+
+void QueryHeapDX12::Alloc(ElementHandle& handle)
{
ASSERT(_currentBatch.Open);
- // Check if need to start from the buffer head
- if (_currentIndex >= GetQueryHeapCount())
- {
- // We're in the middle of a batch, but we're at the end of the heap so split the batch in two
- EndQueryBatchAndResolveQueryData(context);
- }
-
// Allocate element into the current batch
handle = _currentIndex++;
_currentBatch.Count++;
}
-void QueryHeapDX12::BeginQuery(GPUContextDX12* context, ElementHandle& handle)
-{
- AllocQuery(context, handle);
-
- context->GetCommandList()->BeginQuery(_queryHeap, _queryType, handle);
-}
-
-void QueryHeapDX12::EndQuery(GPUContextDX12* context, ElementHandle& handle)
-{
- AllocQuery(context, handle);
-
- context->GetCommandList()->EndQuery(_queryHeap, _queryType, handle);
-}
-
bool QueryHeapDX12::IsReady(ElementHandle& handle)
{
// Current batch is not ready (not ended)
@@ -150,7 +133,7 @@ bool QueryHeapDX12::IsReady(ElementHandle& handle)
return true;
}
-void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
+void* QueryHeapDX12::Resolve(ElementHandle& handle, uint64* timestampFrequency)
{
// Prevent queries from the current batch
ASSERT(!_currentBatch.ContainsElement(handle));
@@ -192,10 +175,15 @@ void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
// All elements got its results so we can remove this batch
_batches.RemoveAt(i);
+ // Cache timestamps frequency for later
+ _timestampFrequency = batch.TimestampFrequency;
+
break;
}
}
+ if (timestampFrequency)
+ *timestampFrequency = _timestampFrequency;
return _resultData.Get() + handle * _resultSize;
}
@@ -204,7 +192,7 @@ void QueryHeapDX12::StartQueryBatch()
ASSERT(!_currentBatch.Open);
// Clear the current batch
- _currentBatch.Clear();
+ _currentBatch = QueryBatch();
// Loop active index on overflow
if (_currentIndex >= GetQueryHeapCount())
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h
index 7e68502e3..e8ab92a73 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/QueryHeapDX12.h
@@ -10,6 +10,31 @@ class GPUContextDX12;
class GPUBuffer;
#include "CommandQueueDX12.h"
+#include "Engine/Graphics/Enums.h"
+
+///
+/// GPU query ID packed into 64-bits.
+///
+struct GPUQueryDX12
+{
+ union
+ {
+ struct
+ {
+ uint16 Type;
+ uint16 Heap;
+ uint16 Element;
+ uint16 SecondaryElement;
+ };
+ uint64 Raw;
+ };
+
+ static int32 GetQueriesCount(GPUQueryType type)
+ {
+ // Timer queries need to know duration via GPU timer queries difference
+ return type == GPUQueryType::Timer ? 2 : 1;
+ }
+};
///
/// GPU queries heap for DirectX 12 backend.
@@ -17,14 +42,12 @@ class GPUBuffer;
class QueryHeapDX12
{
public:
-
///
/// The query element handle.
///
- typedef int32 ElementHandle;
+ typedef uint16 ElementHandle;
private:
-
struct QueryBatch
{
///
@@ -35,71 +58,54 @@ private:
///
/// The first element in the batch (inclusive).
///
- int32 Start = 0;
+ uint32 Start = 0;
///
/// The amount of elements added to this batch.
///
- int32 Count = 0;
+ uint32 Count = 0;
+
+ ///
+ /// The GPU clock frequency for timer queries.
+ ///
+ uint64 TimestampFrequency = 0;
///
/// Is the batch still open for more begin/end queries.
///
bool Open = false;
- ///
- /// Clears this batch.
- ///
- inline void Clear()
- {
- Sync = SyncPointDX12();
- Start = 0;
- Count = 0;
- Open = false;
- }
-
///
/// Checks if this query batch contains a given element contains the element.
///
/// The index of the element.
/// True if element is in this query, otherwise false.
- bool ContainsElement(int32 elementIndex) const
+ bool ContainsElement(uint32 elementIndex) const
{
return elementIndex >= Start && elementIndex < Start + Count;
}
};
private:
-
- GPUDeviceDX12* _device;
- ID3D12QueryHeap* _queryHeap;
- ID3D12Resource* _resultBuffer;
- D3D12_QUERY_TYPE _queryType;
- D3D12_QUERY_HEAP_TYPE _queryHeapType;
- int32 _currentIndex;
- int32 _resultSize;
- int32 _queryHeapCount;
+ GPUDeviceDX12* _device = nullptr;
+ ID3D12Resource* _resultBuffer = nullptr;
+ uint32 _currentIndex = 0;
+ uint32 _resultSize = 0;
+ uint32 _queryHeapCount = 0;
QueryBatch _currentBatch;
Array _batches;
Array _resultData;
+ uint64 _timestampFrequency;
public:
-
- ///
- /// Initializes a new instance of the class.
- ///
- /// The device.
- /// Type of the query heap.
- /// The query heap count.
- QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount);
-
-public:
-
///
/// Initializes this instance.
///
+ /// The device.
+ /// Type of the query heap.
+ /// The size of the heap.
/// True if failed, otherwise false.
- bool Init();
+ bool Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size);
///
/// Destroys this instance.
@@ -107,12 +113,14 @@ public:
void Destroy();
public:
+ GPUQueryType Type;
+ ID3D12QueryHeap* QueryHeap = nullptr;
+ D3D12_QUERY_TYPE QueryType = D3D12_QUERY_TYPE_OCCLUSION;
///
/// Gets the query heap capacity.
///
- /// The queries count.
- FORCE_INLINE int32 GetQueryHeapCount() const
+ FORCE_INLINE uint32 GetQueryHeapCount() const
{
return _queryHeapCount;
}
@@ -120,8 +128,7 @@ public:
///
/// Gets the size of the result value (in bytes).
///
- /// The size of the query result value (in bytes).
- FORCE_INLINE int32 GetResultSize() const
+ FORCE_INLINE uint32 GetResultSize() const
{
return _resultSize;
}
@@ -129,40 +136,30 @@ public:
///
/// Gets the result buffer (CPU readable via Map/Unmap).
///
- /// The query results buffer.
FORCE_INLINE ID3D12Resource* GetResultBuffer() const
{
return _resultBuffer;
}
public:
-
///
/// Stops tracking the current batch of begin/end query calls that will be resolved together. This implicitly starts a new batch.
///
/// The context.
void EndQueryBatchAndResolveQueryData(GPUContextDX12* context);
+ ///
+ /// Checks if can alloc a new query (without rolling the existing batch).
+ ///
+ /// How many elements to allocate?
+ /// True if can alloc new query within the same batch.
+ bool CanAlloc(int32 count = 1) const;
+
///
/// Allocates the query heap element.
///
- /// The context.
/// The result handle.
- void AllocQuery(GPUContextDX12* context, ElementHandle& handle);
-
- ///
- /// Calls BeginQuery on command list for the given query heap slot.
- ///
- /// The context.
- /// The query handle.
- void BeginQuery(GPUContextDX12* context, ElementHandle& handle);
-
- ///
- /// Calls EndQuery on command list for the given query heap slot.
- ///
- /// The context.
- /// The query handle.
- void EndQuery(GPUContextDX12* context, ElementHandle& handle);
+ void Alloc(ElementHandle& handle);
///
/// Determines whether the specified query handle is ready to read data (command list has been executed by the GPU).
@@ -175,11 +172,11 @@ public:
/// Resolves the query (or skips if already resolved).
///
/// The result handle.
+ /// The optional pointer to GPU timestamps frequency value to store.
/// The pointer to the resolved query data.
- void* ResolveQuery(ElementHandle& handle);
+ void* Resolve(ElementHandle& handle, uint64* timestampFrequency = nullptr);
private:
-
///
/// Starts tracking a new batch of begin/end query calls that will be resolved together
///
diff --git a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h
index 22786c157..a20861174 100644
--- a/Source/Engine/GraphicsDevice/Null/GPUContextNull.h
+++ b/Source/Engine/GraphicsDevice/Null/GPUContextNull.h
@@ -160,6 +160,15 @@ public:
{
}
+ uint64 BeginQuery(GPUQueryType type) override
+ {
+ return 0;
+ }
+
+ void EndQuery(uint64 queryID) override
+ {
+ }
+
void SetViewport(const Viewport& viewport) override
{
}
diff --git a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp
index a1582102f..3f9733d52 100644
--- a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp
+++ b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.cpp
@@ -144,6 +144,11 @@ void GPUDeviceNull::WaitForGPU()
{
}
+bool GPUDeviceNull::GetQueryResult(uint64 queryID, uint64& result, bool wait)
+{
+ return false;
+}
+
GPUTexture* GPUDeviceNull::CreateTexture(const StringView& name)
{
PROFILE_MEM(GraphicsTextures);
diff --git a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h
index 4e2d2e93e..29a31a7dd 100644
--- a/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h
+++ b/Source/Engine/GraphicsDevice/Null/GPUDeviceNull.h
@@ -20,18 +20,15 @@ class GPUDeviceNull : public GPUDevice
friend GPUSwapChainNull;
private:
-
GPUContextNull* _mainContext;
GPUAdapterNull* _adapter;
public:
-
static GPUDevice* Create();
GPUDeviceNull();
~GPUDeviceNull();
public:
-
// [GPUDevice]
GPUContext* GetMainContext() override;
GPUAdapter* GetAdapter() const override;
@@ -41,6 +38,7 @@ public:
void Draw() override;
void Dispose() override;
void WaitForGPU() override;
+ bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
GPUTexture* CreateTexture(const StringView& name) override;
GPUShader* CreateShader(const StringView& name) override;
GPUPipelineState* CreatePipelineState() override;
diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp
index 36eacccf9..889265187 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp
@@ -6,7 +6,7 @@
#include "RenderToolsVulkan.h"
#include "QueueVulkan.h"
#include "GPUContextVulkan.h"
-#if VULKAN_USE_QUERIES
+#if VULKAN_USE_TIMER_QUERIES
#include "GPUTimerQueryVulkan.h"
#endif
#include "DescriptorSetVulkan.h"
@@ -243,6 +243,7 @@ void CmdBufferPoolVulkan::RefreshFenceStatus(const CmdBufferVulkan* skipCmdBuffe
CmdBufferManagerVulkan::CmdBufferManagerVulkan(GPUDeviceVulkan* device, GPUContextVulkan* context)
: _device(device)
+ , _context(context)
, _pool(device)
, _queue(context->GetQueue())
, _activeCmdBuffer(nullptr)
@@ -259,12 +260,28 @@ void CmdBufferManagerVulkan::SubmitActiveCmdBuffer(SemaphoreVulkan* signalSemaph
if (_activeCmdBuffer->IsInsideRenderPass())
_activeCmdBuffer->EndRenderPass();
-#if VULKAN_USE_QUERIES
- // Pause all active queries
- for (int32 i = 0; i < _queriesInProgress.Count(); i++)
+#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
+ // Pause all active timer queries
+ auto queries = _activeTimerQueries.Get();
+#if GPU_VULKAN_QUERY_NEW
+ for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
{
- _queriesInProgress.Get()[i]->Interrupt(_activeCmdBuffer);
+ GPUQueryVulkan query;
+ query.Raw = queries[i];
+
+ // End active query to get time from start until submission
+ auto pool = _device->QueryPools[query.PoolIndex];
+ vkCmdWriteTimestamp(_activeCmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, pool->GetHandle(), query.SecondQueryIndex);
+ pool->MarkQueryAsStarted(query.SecondQueryIndex);
+ // TODO: somehow handle ending this query properly by stopping split query instead
+ //_context->EndQuery(query.Raw);
+
+ // TODO: reimplement timer queries pause/resume to be more exact?
}
+#else
+ for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
+ queries->Interrupt(_activeCmdBuffer);
+#endif
#endif
_activeCmdBuffer->End();
@@ -317,27 +334,37 @@ void CmdBufferManagerVulkan::PrepareForNewActiveCommandBuffer()
_activeCmdBuffer->Begin();
-#if VULKAN_USE_QUERIES
- // Resume any paused queries with the new command buffer
- for (int32 i = 0; i < _queriesInProgress.Count(); i++)
+#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
+ // Resume any paused timer queries with the new command buffer
+ auto queries = _activeTimerQueries.Get();
+#if GPU_VULKAN_QUERY_NEW
+ for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
{
- _queriesInProgress.Get()[i]->Resume(_activeCmdBuffer);
+ GPUQueryVulkan query;
+ query.Raw = queries[i];
+ //_activeTimerQueries.Get()[i]->Resume(_activeCmdBuffer);
+ }
+#else
+ for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
+ {
+ queries->Resume(_activeCmdBuffer);
}
#endif
+#endif
}
-void CmdBufferManagerVulkan::OnQueryBegin(GPUTimerQueryVulkan* query)
+#if GPU_VULKAN_QUERY_NEW && GPU_VULKAN_PAUSE_QUERIES
+
+void CmdBufferManagerVulkan::OnTimerQueryBegin(QueryType query)
{
-#if VULKAN_USE_QUERIES
- _queriesInProgress.Add(query);
-#endif
+ _activeTimerQueries.Add(query);
}
-void CmdBufferManagerVulkan::OnQueryEnd(GPUTimerQueryVulkan* query)
+void CmdBufferManagerVulkan::OnTimerQueryEnd(QueryType query)
{
-#if VULKAN_USE_QUERIES
- _queriesInProgress.Remove(query);
-#endif
+ _activeTimerQueries.Remove(query);
}
#endif
+
+#endif
diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h
index 7cb3ee104..7ac1ef885 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h
@@ -168,10 +168,18 @@ class CmdBufferManagerVulkan
{
private:
GPUDeviceVulkan* _device;
+ GPUContextVulkan* _context;
CmdBufferPoolVulkan _pool;
QueueVulkan* _queue;
CmdBufferVulkan* _activeCmdBuffer;
- Array _queriesInProgress;
+#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
+#if GPU_VULKAN_QUERY_NEW
+ typedef uint64 QueryType;
+#else
+ typedef GPUTimerQueryVulkan* QueryType;
+#endif
+ Array _activeTimerQueries;
+#endif
public:
CmdBufferManagerVulkan(GPUDeviceVulkan* device, GPUContextVulkan* context);
@@ -192,11 +200,6 @@ public:
return _activeCmdBuffer != nullptr;
}
- FORCE_INLINE bool HasQueriesInProgress() const
- {
- return _queriesInProgress.Count() != 0;
- }
-
FORCE_INLINE CmdBufferVulkan* GetCmdBuffer()
{
if (!_activeCmdBuffer)
@@ -207,14 +210,16 @@ public:
public:
void SubmitActiveCmdBuffer(SemaphoreVulkan* signalSemaphore = nullptr);
void WaitForCmdBuffer(CmdBufferVulkan* cmdBuffer, float timeInSecondsToWait = 1.0f);
- void RefreshFenceStatus(CmdBufferVulkan* skipCmdBuffer = nullptr)
+ void RefreshFenceStatus(const CmdBufferVulkan* skipCmdBuffer = nullptr)
{
_pool.RefreshFenceStatus(skipCmdBuffer);
}
void PrepareForNewActiveCommandBuffer();
- void OnQueryBegin(GPUTimerQueryVulkan* query);
- void OnQueryEnd(GPUTimerQueryVulkan* query);
+#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
+ void OnTimerQueryBegin(QueryType query);
+ void OnTimerQueryEnd(QueryType query);
+#endif
};
#endif
diff --git a/Source/Engine/GraphicsDevice/Vulkan/Config.h b/Source/Engine/GraphicsDevice/Vulkan/Config.h
index 1f30c301a..7a4129a5c 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/Config.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/Config.h
@@ -45,8 +45,14 @@
#endif
#endif
-#ifndef VULKAN_USE_QUERIES
-#define VULKAN_USE_QUERIES 1
+#ifndef VULKAN_USE_TIMER_QUERIES
+#define VULKAN_USE_TIMER_QUERIES 1
#endif
+// Toggles GPUTimerQueryVulkan to use BeginQuery/EndQuery via GPuContext rather than old custom implementation
+#define GPU_VULKAN_QUERY_NEW 1
+
+// Toggles pausing and resuming all GPU timer queries when command buffer is being flushed (for more exact timings)
+#define GPU_VULKAN_PAUSE_QUERIES 0
+
#endif
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
index 979ccc0f8..cd11ef534 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp
@@ -1300,6 +1300,72 @@ void GPUContextVulkan::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, ui
RENDER_STAT_DRAW_CALL(0, 0);
}
+uint64 GPUContextVulkan::BeginQuery(GPUQueryType type)
+{
+ // Check if timer queries are supported
+ if (type == GPUQueryType::Timer && _device->PhysicalDeviceLimits.timestampComputeAndGraphics != VK_TRUE)
+ return 0;
+
+ // Allocate query
+ auto poolIndex = _device->GetOrCreateQueryPool(type);
+ auto pool = _device->QueryPools[poolIndex];
+ uint32 index = 0;
+ const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
+ if (!pool->AcquireQuery(cmdBuffer, index))
+ return 0;
+ GPUQueryVulkan query;
+ query.PoolIndex = (uint16)poolIndex;
+ query.QueryIndex = (uint16)index;
+ query.SecondQueryIndex = 0;
+ query.Dummy = 1; // Ensure Raw is never 0, even for the first query
+
+ // Begin query
+ switch (type)
+ {
+ case GPUQueryType::Timer:
+ // Timer queries need 2 slots (begin + end)
+ pool->AcquireQuery(cmdBuffer, index);
+ query.SecondQueryIndex = (uint16)index;
+
+ vkCmdWriteTimestamp(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, pool->GetHandle(), query.QueryIndex);
+#if GPU_VULKAN_PAUSE_QUERIES
+ _cmdBufferManager->OnTimerQueryBegin(query.Raw);
+#endif
+ break;
+ case GPUQueryType::Occlusion:
+ vkCmdBeginQuery(cmdBuffer->GetHandle(), pool->GetHandle(), query.QueryIndex, 0);
+ break;
+ }
+ pool->MarkQueryAsStarted(query.QueryIndex);
+
+ return query.Raw;
+}
+
+void GPUContextVulkan::EndQuery(uint64 queryID)
+{
+ if (!queryID)
+ return;
+ GPUQueryVulkan query;
+ query.Raw = queryID;
+ auto pool = _device->QueryPools[query.PoolIndex];
+
+ // End query
+ const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
+ switch (pool->Type)
+ {
+ case GPUQueryType::Timer:
+ vkCmdWriteTimestamp(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, pool->GetHandle(), query.SecondQueryIndex);
+ pool->MarkQueryAsStarted(query.SecondQueryIndex);
+#if GPU_VULKAN_PAUSE_QUERIES
+ _cmdBufferManager->OnTimerQueryEnd(query.Raw);
+#endif
+ break;
+ case GPUQueryType::Occlusion:
+ vkCmdEndQuery(cmdBuffer->GetHandle(), pool->GetHandle(), query.QueryIndex);
+ break;
+ }
+}
+
void GPUContextVulkan::SetViewport(const Viewport& viewport)
{
vkCmdSetViewport(_cmdBufferManager->GetCmdBuffer()->GetHandle(), 0, 1, (VkViewport*)&viewport);
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h
index fa94aa139..f77e42eb7 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h
@@ -189,6 +189,8 @@ public:
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
+ uint64 BeginQuery(GPUQueryType type) override;
+ void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
GPUPipelineState* GetState() const override;
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp
index bc33d9d86..03467defd 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.cpp
@@ -627,14 +627,14 @@ RenderPassVulkan::~RenderPassVulkan()
Device->DeferredDeletionQueue.EnqueueResource(DeferredDeletionQueueVulkan::Type::RenderPass, Handle);
}
-QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type)
+QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type)
: _device(device)
, _handle(VK_NULL_HANDLE)
- , _type(type)
+ , Type(type)
{
VkQueryPoolCreateInfo createInfo;
RenderToolsVulkan::ZeroStruct(createInfo, VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
- createInfo.queryType = type;
+ createInfo.queryType = type == GPUQueryType::Occlusion ? VK_QUERY_TYPE_OCCLUSION : VK_QUERY_TYPE_TIMESTAMP;
createInfo.queryCount = capacity;
VALIDATE_VULKAN_RESULT(vkCreateQueryPool(device->Device, &createInfo, nullptr, &_handle));
@@ -667,7 +667,7 @@ void QueryPoolVulkan::Reset(CmdBufferVulkan* cmdBuffer)
#endif
-BufferedQueryPoolVulkan::BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type)
+BufferedQueryPoolVulkan::BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type)
: QueryPoolVulkan(device, capacity, type)
, _lastBeginIndex(0)
{
@@ -720,6 +720,16 @@ void BufferedQueryPoolVulkan::ReleaseQuery(uint32 queryIndex)
_lastBeginIndex = (uint32)queryIndex;
}
}
+ if (_usedQueryBits[word] == 0)
+ {
+ // Check if pool got empty and reset the pointer back to start
+ for (int32 wordIndex = 0; wordIndex < _usedQueryBits.Count(); wordIndex++)
+ {
+ if (_usedQueryBits[wordIndex])
+ return;
+ }
+ _lastBeginIndex = 0;
+ }
}
void BufferedQueryPoolVulkan::MarkQueryAsStarted(uint32 queryIndex)
@@ -729,7 +739,7 @@ void BufferedQueryPoolVulkan::MarkQueryAsStarted(uint32 queryIndex)
_startedQueryBits[word] = _startedQueryBits[word] | bit;
}
-bool BufferedQueryPoolVulkan::GetResults(GPUContextVulkan* context, uint32 index, uint64& result)
+bool BufferedQueryPoolVulkan::GetResults(uint32 index, uint64& result)
{
const uint64 bit = (uint64)(index % 64);
const uint64 bitMask = (uint64)1 << bit;
@@ -1228,22 +1238,20 @@ GPUDeviceVulkan::~GPUDeviceVulkan()
GPUDeviceVulkan::Dispose();
}
-BufferedQueryPoolVulkan* GPUDeviceVulkan::FindAvailableQueryPool(VkQueryType queryType)
+int32 GPUDeviceVulkan::GetOrCreateQueryPool(GPUQueryType type)
{
- auto& pools = queryType == VK_QUERY_TYPE_OCCLUSION ? OcclusionQueryPools : TimestampQueryPools;
-
- // Try to use pool with available space inside
- for (int32 i = 0; i < pools.Count(); i++)
+ auto pools = QueryPools.Get();
+ for (int32 i = 0; i < QueryPools.Count(); i++)
{
- auto pool = pools.Get()[i];
- if (pool->HasRoom())
- return pool;
+ auto pool = pools[i];
+ if (pool->Type == type && pool->HasRoom())
+ return i;
}
- // Create new pool
- const auto pool = New(this, queryType == VK_QUERY_TYPE_OCCLUSION ? 4096 : 1024, queryType);
- pools.Add(pool);
- return pool;
+ PROFILE_CPU_NAMED("Create Create Pool");
+ auto pool = New(this, type == GPUQueryType::Occlusion ? 4096 : 1024, type);
+ QueryPools.Add(pool);
+ return QueryPools.Count() - 1;
}
RenderPassVulkan* GPUDeviceVulkan::GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout)
@@ -1752,6 +1760,10 @@ bool GPUDeviceVulkan::Init()
limits.MaximumTexture3DSize = PhysicalDeviceLimits.maxImageDimension3D;
limits.MaximumTextureCubeSize = PhysicalDeviceLimits.maxImageDimensionCube;
limits.MaximumSamplerAnisotropy = PhysicalDeviceLimits.maxSamplerAnisotropy;
+ if (PhysicalDeviceLimits.timestampComputeAndGraphics != VK_TRUE)
+ {
+ LOG(Warning, "Timer Queries are unsupported by this device");
+ }
for (int32 i = 0; i < static_cast(PixelFormat::MAX); i++)
{
@@ -1982,6 +1994,16 @@ void GPUDeviceVulkan::DrawBegin()
// Base
GPUDevice::DrawBegin();
+ // Put back used queries to the pool
+ for (auto& query : QueriesToRelease)
+ {
+ auto pool = QueryPools[query.PoolIndex];
+ pool->ReleaseQuery(query.QueryIndex);
+ if (pool->Type == GPUQueryType::Timer)
+ pool->ReleaseQuery(query.SecondQueryIndex);
+ }
+ QueriesToRelease.Clear();
+
// Flush resources
DeferredDeletionQueue.ReleaseResources();
DescriptorPoolsManager->GC();
@@ -2022,8 +2044,7 @@ void GPUDeviceVulkan::Dispose()
_layouts.ClearDelete();
HelperResources.Dispose();
UploadBuffer.Dispose();
- TimestampQueryPools.ClearDelete();
- OcclusionQueryPools.ClearDelete();
+ QueryPools.ClearDelete();
SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader);
Delete(DescriptorPoolsManager);
SAFE_DELETE(MainContext);
@@ -2084,6 +2105,61 @@ void GPUDeviceVulkan::WaitForGPU()
}
}
+bool GPUDeviceVulkan::GetQueryResult(uint64 queryID, uint64& result, bool wait)
+{
+ if (!queryID)
+ return false;
+ GPUQueryVulkan query;
+ query.Raw = queryID;
+ auto pool = QueryPools[query.PoolIndex];
+
+RETRY:
+ bool hasData;
+ uint64 resultSecondary;
+ switch (pool->Type)
+ {
+ case GPUQueryType::Timer:
+ hasData = pool->GetResults(query.QueryIndex, result) && pool->GetResults(query.SecondQueryIndex, resultSecondary);
+#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
+ if (hasData)
+ {
+ // Check if dependant queries have completed (timer queries can be split when active command buffer get submitted)
+ // TODO: impl this
+ }
+#endif
+ if (hasData)
+ {
+ if (resultSecondary >= result)
+ {
+ // Convert GPU timestamps to nanoseconds and then to microseconds
+ double nanoseconds = double(resultSecondary - result) * double(PhysicalDeviceLimits.timestampPeriod);
+ result = (uint64)(nanoseconds * 0.001);
+ }
+ else
+ result = 0;
+ }
+ break;
+ case GPUQueryType::Occlusion:
+ hasData = pool->GetResults(query.QueryIndex, result);
+ break;
+ }
+
+ if (!hasData && wait)
+ {
+ // Wait until data is ready
+ Platform::Yield();
+ goto RETRY;
+ }
+
+ if (hasData)
+ {
+ // Auto-release query on the next frame
+ QueriesToRelease.Add(query);
+ }
+
+ return hasData;
+}
+
GPUTexture* GPUDeviceVulkan::CreateTexture(const StringView& name)
{
PROFILE_MEM(GraphicsTextures);
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h
index 09fa93f3e..7dd8ef0ab 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.h
@@ -28,6 +28,24 @@ class GPUDeviceVulkan;
class UniformBufferUploaderVulkan;
class DescriptorPoolsManagerVulkan;
+///
+/// GPU query ID packed into 64-bits.
+///
+struct GPUQueryVulkan
+{
+ union
+ {
+ struct
+ {
+ uint16 PoolIndex;
+ uint16 QueryIndex;
+ uint16 SecondQueryIndex;
+ uint16 Dummy;
+ };
+ uint64 Raw;
+ };
+};
+
class SemaphoreVulkan
{
private:
@@ -261,16 +279,17 @@ protected:
GPUDeviceVulkan* _device;
VkQueryPool _handle;
- const VkQueryType _type;
#if VULKAN_RESET_QUERY_POOLS
Array _resetRanges;
#endif
public:
- QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type);
+ QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type);
~QueryPoolVulkan();
public:
+ const GPUQueryType Type;
+
inline VkQueryPool GetHandle() const
{
return _handle;
@@ -294,11 +313,11 @@ private:
int32 _lastBeginIndex;
public:
- BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type);
+ BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type);
bool AcquireQuery(CmdBufferVulkan* cmdBuffer, uint32& resultIndex);
void ReleaseQuery(uint32 queryIndex);
void MarkQueryAsStarted(uint32 queryIndex);
- bool GetResults(GPUContextVulkan* context, uint32 index, uint64& result);
+ bool GetResults(uint32 index, uint64& result);
bool HasRoom() const;
};
@@ -498,14 +517,13 @@ public:
VkPhysicalDeviceFeatures PhysicalDeviceFeatures;
VkPhysicalDeviceVulkan12Features PhysicalDeviceFeatures12;
- Array TimestampQueryPools;
- Array OcclusionQueryPools;
-
+ Array QueryPools;
+ Array QueriesToRelease;
#if VULKAN_RESET_QUERY_POOLS
Array QueriesToReset;
#endif
- BufferedQueryPoolVulkan* FindAvailableQueryPool(VkQueryType queryType);
+ int32 GetOrCreateQueryPool(GPUQueryType type);
RenderPassVulkan* GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout);
FramebufferVulkan* GetOrCreateFramebuffer(FramebufferVulkan::Key& key, VkExtent2D& extent, uint32 layers);
PipelineLayoutVulkan* GetOrCreateLayout(DescriptorSetLayoutInfoVulkan& key);
@@ -553,6 +571,7 @@ public:
void DrawBegin() override;
void Dispose() override;
void WaitForGPU() override;
+ bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
GPUTexture* CreateTexture(const StringView& name) override;
GPUShader* CreateShader(const StringView& name) override;
GPUPipelineState* CreatePipelineState() override;
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp
index 2dd3b07d5..ecd0dbdfe 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.cpp
@@ -11,6 +11,78 @@ GPUTimerQueryVulkan::GPUTimerQueryVulkan(GPUDeviceVulkan* device)
{
}
+#if !VULKAN_USE_TIMER_QUERIES
+
+void GPUTimerQueryVulkan::OnReleaseGPU()
+{
+}
+
+void GPUTimerQueryVulkan::Begin()
+{
+}
+
+void GPUTimerQueryVulkan::End()
+{
+}
+
+bool GPUTimerQueryVulkan::HasResult()
+{
+ return true;
+}
+
+float GPUTimerQueryVulkan::GetResult()
+{
+ return 0;
+}
+
+#elif GPU_VULKAN_QUERY_NEW
+
+void GPUTimerQueryVulkan::OnReleaseGPU()
+{
+ _hasResult = false;
+ _endCalled = false;
+ _timeDelta = 0.0f;
+}
+
+void GPUTimerQueryVulkan::Begin()
+{
+ const auto context = _device->GetMainContext();
+ _query = context->BeginQuery(GPUQueryType::Timer);
+ _hasResult = false;
+ _endCalled = false;
+}
+
+void GPUTimerQueryVulkan::End()
+{
+ if (_endCalled)
+ return;
+ const auto context = _device->GetMainContext();
+ context->EndQuery(_query);
+ _endCalled = true;
+}
+
+bool GPUTimerQueryVulkan::HasResult()
+{
+ if (!_endCalled)
+ return false;
+ if (_hasResult)
+ return true;
+ uint64 result;
+ return _device->GetQueryResult(_query, result, false);
+}
+
+float GPUTimerQueryVulkan::GetResult()
+{
+ if (_hasResult)
+ return _timeDelta;
+ uint64 result;
+ _timeDelta = _device->GetQueryResult(_query, result, true) ? (float)((double)result / 1000.0) : 0.0f;
+ _hasResult = true;
+ return _timeDelta;
+}
+
+#else
+
void GPUTimerQueryVulkan::Interrupt(CmdBufferVulkan* cmdBuffer)
{
if (!_interrupted)
@@ -38,8 +110,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query)
{
if (query.Pool)
{
- const auto context = (GPUContextVulkan*)_device->GetMainContext();
- if (query.Pool->GetResults(context, query.Index, query.Result))
+ if (query.Pool->GetResults(query.Index, query.Result))
{
// Release query
query.Pool->ReleaseQuery(query.Index);
@@ -58,7 +129,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query)
void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const
{
- auto pool = _device->FindAvailableQueryPool(VK_QUERY_TYPE_TIMESTAMP);
+ auto pool = _device->QueryPools[_device->GetOrCreateQueryPool(GPUQueryType::Timer)];
uint32 index;
if (pool->AcquireQuery(cmdBuffer, index))
{
@@ -76,7 +147,6 @@ void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& quer
bool GPUTimerQueryVulkan::TryGetResult()
{
-#if VULKAN_USE_QUERIES
// Try get queries value (if not already)
for (int32 i = 0; i < _queries.Count(); i++)
{
@@ -115,20 +185,12 @@ bool GPUTimerQueryVulkan::TryGetResult()
e.End.Pool->ReleaseQuery(e.End.Index);
}
_queries.Clear();
-#else
- _timeDelta = 0.0f;
- _hasResult = true;
-#endif
return true;
}
bool GPUTimerQueryVulkan::UseQueries()
{
-#if VULKAN_USE_QUERIES
return _device->PhysicalDeviceLimits.timestampComputeAndGraphics == VK_TRUE;
-#else
- return false;
-#endif
}
void GPUTimerQueryVulkan::OnReleaseGPU()
@@ -150,7 +212,6 @@ void GPUTimerQueryVulkan::OnReleaseGPU()
void GPUTimerQueryVulkan::Begin()
{
-#if VULKAN_USE_QUERIES
if (UseQueries())
{
const auto context = (GPUContextVulkan*)_device->GetMainContext();
@@ -162,12 +223,11 @@ void GPUTimerQueryVulkan::Begin()
_queryIndex = 0;
_interrupted = false;
WriteTimestamp(cmdBuffer, e.Begin, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- context->GetCmdBufferManager()->OnQueryBegin(this);
+ context->GetCmdBufferManager()->OnTimerQueryBegin(this);
ASSERT(_queries.IsEmpty());
_queries.Add(e);
}
-#endif
_hasResult = false;
_endCalled = false;
@@ -178,7 +238,6 @@ void GPUTimerQueryVulkan::End()
if (_endCalled)
return;
-#if VULKAN_USE_QUERIES
if (UseQueries())
{
const auto context = (GPUContextVulkan*)_device->GetMainContext();
@@ -188,9 +247,8 @@ void GPUTimerQueryVulkan::End()
{
WriteTimestamp(cmdBuffer, _queries[_queryIndex].End, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
}
- context->GetCmdBufferManager()->OnQueryEnd(this);
+ context->GetCmdBufferManager()->OnTimerQueryEnd(this);
}
-#endif
_endCalled = true;
}
@@ -213,3 +271,5 @@ float GPUTimerQueryVulkan::GetResult()
}
#endif
+
+#endif
diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h
index 18a56cac6..07a263fe3 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/GPUTimerQueryVulkan.h
@@ -13,6 +13,13 @@
class GPUTimerQueryVulkan : public GPUResourceVulkan
{
private:
+#if !VULKAN_USE_TIMER_QUERIES
+#elif GPU_VULKAN_QUERY_NEW
+ bool _hasResult = false;
+ bool _endCalled = false;
+ float _timeDelta = 0.0f;
+ uint64 _query = 0;
+#else
struct Query
{
BufferedQueryPoolVulkan* Pool;
@@ -32,6 +39,7 @@ private:
float _timeDelta = 0.0f;
int32 _queryIndex;
Array> _queries;
+#endif
public:
///
@@ -40,6 +48,7 @@ public:
/// The graphics device.
GPUTimerQueryVulkan(GPUDeviceVulkan* device);
+#if !GPU_VULKAN_QUERY_NEW
public:
///
/// Interrupts an in-progress query, allowing the command buffer to submitted. Interrupted queries must be resumed using Resume().
@@ -58,6 +67,7 @@ private:
void WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const;
bool TryGetResult();
bool UseQueries();
+#endif
public:
// [GPUTimerQuery]
diff --git a/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h b/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h
index 880749b72..05ed07792 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/Mac/MacVulkanPlatform.h
@@ -9,7 +9,7 @@
#define VULKAN_BACK_BUFFERS_COUNT 3
// General/Validation Error:0 VK_ERROR_INITIALIZATION_FAILED: Could not create MTLCounterSampleBuffer for query pool of type VK_QUERY_TYPE_TIMESTAMP. Reverting to emulated behavior. (Error code 0): Cannot allocate sample buffer
-#define VULKAN_USE_QUERIES 0
+#define VULKAN_USE_TIMER_QUERIES 0
///
/// The implementation for the Vulkan API support for Mac platform.
diff --git a/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h b/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h
index db27c76cd..8db71ec6e 100644
--- a/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h
+++ b/Source/Engine/GraphicsDevice/Vulkan/iOS/iOSVulkanPlatform.h
@@ -9,7 +9,7 @@
#define VULKAN_BACK_BUFFERS_COUNT 3
// General/Validation Error:0 VK_ERROR_INITIALIZATION_FAILED: Could not create MTLCounterSampleBuffer for query pool of type VK_QUERY_TYPE_TIMESTAMP. Reverting to emulated behavior. (Error code 0): Cannot allocate sample buffer
-#define VULKAN_USE_QUERIES 0
+#define VULKAN_USE_TIMER_QUERIES 0
///
/// The implementation for the Vulkan API support for iOS platform.