Add new GPU Query API that is lightweight and supports occlusion queries
This commit is contained in:
@@ -349,6 +349,24 @@ API_ENUM(Attributes="Flags") enum class GPUResourceMapMode
|
||||
|
||||
DECLARE_ENUM_OPERATORS(GPUResourceMapMode);
|
||||
|
||||
/// <summary>
|
||||
/// GPU resources types.
|
||||
/// </summary>
|
||||
enum class GPUQueryType
|
||||
{
|
||||
/// <summary>
|
||||
/// Measures duration of GPU commands execution. Returns time in microseconds (1/1000 ms).
|
||||
/// </summary>
|
||||
Timer = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Tests object visibility by counting number of pixel samples that are not culled (by depth or stencil tests).
|
||||
/// </summary>
|
||||
Occlusion = 1,
|
||||
|
||||
MAX
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Primitives types.
|
||||
/// </summary>
|
||||
|
||||
@@ -28,6 +28,7 @@ class GPUBufferView;
|
||||
class GPUVertexLayout;
|
||||
struct GPUPass;
|
||||
enum class GPUResourceAccess;
|
||||
enum class GPUQueryType;
|
||||
|
||||
// Gets the GPU texture view. Checks if pointer is not null and texture has one or more mip levels loaded.
|
||||
#define GET_TEXTURE_VIEW_SAFE(t) (t && t->ResidentMipLevels() > 0 ? t->View() : nullptr)
|
||||
@@ -554,6 +555,20 @@ public:
|
||||
/// <param name="offsetForArgs">The aligned byte offset for arguments.</param>
|
||||
API_FUNCTION() virtual void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) = 0;
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
/// Begins the GPU query that will measure commands until EndQuery.
|
||||
/// </summary>
|
||||
/// <param name="type">Query type.</param>
|
||||
/// <returns>Unique identifier of the query used to EndQuery and then GetQueryResult to read the query result data.</returns>
|
||||
virtual uint64 BeginQuery(GPUQueryType type) = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Ends the GPU query. Use GPUDevice::GetQueryResult to read the results back.
|
||||
/// </summary>
|
||||
/// <param name="queryID">Query identifier returned by BeginQuery.</param>
|
||||
virtual void EndQuery(uint64 queryID) = 0;
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
/// Sets the rendering viewport and scissor rectangle.
|
||||
|
||||
@@ -370,6 +370,16 @@ public:
|
||||
/// </summary>
|
||||
virtual void WaitForGPU() = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Reads the query result from the GPU.
|
||||
/// </summary>
|
||||
/// <remarks>GPU query results are short-lived, meaning that in the frame that results are ready, they won't be available in the next frame, as queries are reused.</remarks>
|
||||
/// <param name="queryID">Query identifier returned by GPUContext::BeginQuery.</param>
|
||||
/// <param name="result">The output result data of the query. Valid only when function returns true.</param>
|
||||
/// <param name="wait">True if wait for the GPU to end processing commands for sync data ready. Otherwise, if query is incomplete then function will return value of false without result.</param>
|
||||
/// <returns>True if got valid query result, otherwise false. If called with wait enabled then device failed to readback the query data.</returns>
|
||||
virtual bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) = 0;
|
||||
|
||||
public:
|
||||
void AddResource(GPUResource* resource);
|
||||
void RemoveResource(GPUResource* resource);
|
||||
|
||||
@@ -32,7 +32,7 @@ API_ENUM() enum class GPUResourceType
|
||||
PipelineState,
|
||||
// GPU binding descriptor
|
||||
Descriptor,
|
||||
// GPU timer query
|
||||
// GPU timer or occlusion query
|
||||
Query,
|
||||
// GPU texture sampler
|
||||
Sampler,
|
||||
|
||||
@@ -40,6 +40,7 @@ public abstract class GraphicsDeviceBaseModule : EngineModule
|
||||
public class Graphics : EngineModule
|
||||
{
|
||||
private static bool _logMissingVulkanSDK;
|
||||
private static bool _logMissingWindowsSDK;
|
||||
|
||||
/// <inheritdoc />
|
||||
public override void Setup(BuildOptions options)
|
||||
@@ -59,7 +60,7 @@ public class Graphics : EngineModule
|
||||
if (windowsToolchain != null && windowsToolchain.SDK != Flax.Build.Platforms.WindowsPlatformSDK.v8_1)
|
||||
options.PrivateDependencies.Add("GraphicsDeviceDX12");
|
||||
else
|
||||
Log.WarningOnce(string.Format("Building for {0} without Vulkan rendering backend (Vulkan SDK is missing)", options.Platform.Target), ref _logMissingVulkanSDK);
|
||||
Log.WarningOnce(string.Format("Building for {0} without D3D12 rendering backend (Windows SDK is missing)", options.Platform.Target), ref _logMissingWindowsSDK);
|
||||
break;
|
||||
case TargetPlatform.UWP:
|
||||
options.PrivateDependencies.Add("GraphicsDeviceDX11");
|
||||
|
||||
@@ -566,6 +566,81 @@ void GPUContextDX11::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint
|
||||
RENDER_STAT_DRAW_CALL(0, 0);
|
||||
}
|
||||
|
||||
uint64 GPUContextDX11::BeginQuery(GPUQueryType type)
|
||||
{
|
||||
// Allocate a pooled query
|
||||
uint16 queryIndex;
|
||||
static_assert(ARRAY_COUNT(_device->_readyQueries) == (int32)GPUQueryType::MAX, "Invalid query types count");
|
||||
if (_device->_readyQueries[(int32)type].HasItems())
|
||||
{
|
||||
// Use query from cached list
|
||||
queryIndex = _device->_readyQueries[(int32)type].Pop();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Add a new query
|
||||
queryIndex = _device->_queries.Count();
|
||||
auto& query = _device->_queries.AddOne();
|
||||
query.Type = type;
|
||||
D3D11_QUERY_DESC queryDesc;
|
||||
queryDesc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
queryDesc.MiscFlags = 0;
|
||||
HRESULT hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.Query);
|
||||
LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
|
||||
if (type == GPUQueryType::Timer)
|
||||
{
|
||||
// Timer queries need additional one for begin and end disjoint
|
||||
hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.TimerBeginQuery);
|
||||
LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
|
||||
queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
hr = _device->GetDevice()->CreateQuery(&queryDesc, &query.DisjointQuery);
|
||||
LOG_DIRECTX_RESULT_WITH_RETURN(hr, 0);
|
||||
}
|
||||
}
|
||||
static_assert(sizeof(GPUQueryDX11) == sizeof(uint64), "Invalid query size.");
|
||||
GPUQueryDX11 q = {};
|
||||
q.Type = (uint16)type;
|
||||
q.Index = queryIndex;
|
||||
q.Padding = 1; // Ensure Raw is never 0, even for the first query
|
||||
|
||||
// Begin query
|
||||
{
|
||||
auto& query = _device->_queries[queryIndex];
|
||||
ASSERT_LOW_LAYER(query.State == GPUQueryDataDX11::Ready);
|
||||
ASSERT_LOW_LAYER(query.Type == type);
|
||||
query.State = GPUQueryDataDX11::Active;
|
||||
auto context = _device->GetIM();
|
||||
if (type == GPUQueryType::Timer)
|
||||
{
|
||||
context->Begin(query.DisjointQuery);
|
||||
context->End(query.TimerBeginQuery);
|
||||
}
|
||||
else
|
||||
{
|
||||
context->Begin(query.Query);
|
||||
}
|
||||
}
|
||||
|
||||
return q.Raw;
|
||||
}
|
||||
|
||||
void GPUContextDX11::EndQuery(uint64 queryID)
|
||||
{
|
||||
if (!queryID)
|
||||
return;
|
||||
|
||||
// End query
|
||||
GPUQueryDX11 q;
|
||||
q.Raw = queryID;
|
||||
auto& query = _device->_queries[q.Index];
|
||||
auto context = _device->GetIM();
|
||||
context->End(query.Query);
|
||||
if (q.Type == (uint16)GPUQueryType::Timer)
|
||||
{
|
||||
context->End(query.DisjointQuery);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUContextDX11::SetViewport(const Viewport& viewport)
|
||||
{
|
||||
_context->RSSetViewports(1, (D3D11_VIEWPORT*)&viewport);
|
||||
|
||||
@@ -154,6 +154,8 @@ public:
|
||||
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
|
||||
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
|
||||
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
|
||||
uint64 BeginQuery(GPUQueryType type) override;
|
||||
void EndQuery(uint64 queryID) override;
|
||||
void SetViewport(const Viewport& viewport) override;
|
||||
void SetScissor(const Rectangle& scissorRect) override;
|
||||
GPUPipelineState* GetState() const override;
|
||||
|
||||
@@ -175,6 +175,15 @@ GPUVertexLayoutDX11::GPUVertexLayoutDX11(GPUDeviceDX11* device, const Elements&
|
||||
}
|
||||
}
|
||||
|
||||
void GPUQueryDataDX11::Release()
|
||||
{
|
||||
SAFE_RELEASE(Query);
|
||||
SAFE_RELEASE(TimerBeginQuery);
|
||||
SAFE_RELEASE(DisjointQuery);
|
||||
Result = 0;
|
||||
State = Ready;
|
||||
}
|
||||
|
||||
GPUDevice* GPUDeviceDX11::Create()
|
||||
{
|
||||
// Configuration
|
||||
@@ -801,6 +810,11 @@ void GPUDeviceDX11::Dispose()
|
||||
{
|
||||
SAFE_RELEASE(RasterizerStates[i]);
|
||||
}
|
||||
for (auto& query : _queries)
|
||||
query.Release();
|
||||
_queries.Clear();
|
||||
for (auto& e : _readyQueries)
|
||||
e.Clear();
|
||||
|
||||
// Clear DirectX stuff
|
||||
SAFE_DELETE(_mainContext);
|
||||
@@ -877,6 +891,88 @@ void GPUDeviceDX11::DrawEnd()
|
||||
infoQueue->ClearStoredMessages();
|
||||
}
|
||||
#endif
|
||||
|
||||
// Auto-return finished queries back to the pool
|
||||
auto* queries = _queries.Get();
|
||||
int32 queriesCount = _queries.Count();
|
||||
for (int32 i = 0; i < queriesCount; i++)
|
||||
{
|
||||
auto& query = queries[i];
|
||||
if (query.State == GPUQueryDataDX11::Finished)
|
||||
{
|
||||
query.State = GPUQueryDataDX11::Ready;
|
||||
query.Result = 0;
|
||||
_readyQueries[(int32)query.Type].Push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUDeviceDX11::GetQueryResult(uint64 queryID, uint64& result, bool wait)
|
||||
{
|
||||
if (!queryID)
|
||||
return false;
|
||||
|
||||
GPUQueryDX11 q;
|
||||
q.Raw = queryID;
|
||||
auto& query = _queries[q.Index];
|
||||
if (query.State == GPUQueryDataDX11::Finished)
|
||||
{
|
||||
// Use resolved result
|
||||
result = query.Result;
|
||||
return true;
|
||||
}
|
||||
auto context = GetIM();
|
||||
|
||||
RETRY:
|
||||
bool hasData;
|
||||
if (q.Type == (uint16)GPUQueryType::Timer)
|
||||
{
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData;
|
||||
hasData = context->GetData(query.DisjointQuery, &disjointData, sizeof(disjointData), 0) == S_OK;
|
||||
if (hasData)
|
||||
{
|
||||
UINT64 timeBegin = 0, timeEnd = 0;
|
||||
context->GetData(query.TimerBeginQuery, &timeBegin, sizeof(timeBegin), 0);
|
||||
context->GetData(query.Query, &timeEnd, sizeof(timeEnd), 0);
|
||||
|
||||
if (disjointData.Disjoint == FALSE)
|
||||
{
|
||||
result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / disjointData.Frequency : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = 0;
|
||||
#if !BUILD_RELEASE
|
||||
static bool LogOnce = true;
|
||||
if (LogOnce)
|
||||
{
|
||||
LogOnce = false;
|
||||
LOG(Warning, "Unreliable GPU timer query detected.");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
hasData = context->GetData(query.Query, &result, sizeof(uint64), 0) == S_OK;
|
||||
}
|
||||
|
||||
if (!hasData && wait)
|
||||
{
|
||||
// Wait until data is ready
|
||||
Platform::Yield();
|
||||
goto RETRY;
|
||||
}
|
||||
|
||||
if (hasData)
|
||||
{
|
||||
// Query has valid data now (until auto-recycle back to pool)
|
||||
query.State = GPUQueryDataDX11::Finished;
|
||||
query.Result = result;
|
||||
}
|
||||
|
||||
return hasData;
|
||||
}
|
||||
|
||||
GPUTexture* GPUDeviceDX11::CreateTexture(const StringView& name)
|
||||
|
||||
@@ -15,6 +15,38 @@ enum class StencilOperation : byte;
|
||||
class GPUContextDX11;
|
||||
class GPUSwapChainDX11;
|
||||
|
||||
/// <summary>
|
||||
/// GPU query ID packed into 64-bits.
|
||||
/// </summary>
|
||||
struct GPUQueryDX11
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint16 Type;
|
||||
uint16 Index;
|
||||
uint32 Padding;
|
||||
};
|
||||
uint64 Raw;
|
||||
};
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// GPU query data (reusable via pooling).
|
||||
/// </summary>
|
||||
struct GPUQueryDataDX11
|
||||
{
|
||||
ID3D11Query* Query = nullptr;
|
||||
ID3D11Query* TimerBeginQuery = nullptr;
|
||||
ID3D11Query* DisjointQuery = nullptr;
|
||||
uint64 Result = 0;
|
||||
enum States { Ready, Active, Finished } State = Ready;
|
||||
GPUQueryType Type = GPUQueryType::MAX;
|
||||
|
||||
void Release();
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of Graphics Device for DirectX 11 backend.
|
||||
/// </summary>
|
||||
@@ -60,6 +92,8 @@ private:
|
||||
GPUContextDX11* _mainContext = nullptr;
|
||||
bool _allowTearing = false;
|
||||
GPUBuffer* _dummyVB = nullptr;
|
||||
Array<GPUQueryDataDX11> _queries;
|
||||
Array<uint16> _readyQueries[2]; // Timer and Occlusion
|
||||
|
||||
// Static Samplers
|
||||
ID3D11SamplerState* _samplerLinearClamp = nullptr;
|
||||
@@ -124,6 +158,7 @@ public:
|
||||
void Dispose() override;
|
||||
void WaitForGPU() override;
|
||||
void DrawEnd() override;
|
||||
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
|
||||
GPUTexture* CreateTexture(const StringView& name) override;
|
||||
GPUShader* CreateShader(const StringView& name) override;
|
||||
GPUPipelineState* CreatePipelineState() override;
|
||||
|
||||
@@ -1275,6 +1275,31 @@ void GPUContextDX12::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint
|
||||
RENDER_STAT_DRAW_CALL(0, 0);
|
||||
}
|
||||
|
||||
uint64 GPUContextDX12::BeginQuery(GPUQueryType type)
|
||||
{
|
||||
auto query = _device->AllocQuery(type);
|
||||
if (query.Raw)
|
||||
{
|
||||
auto heap = _device->QueryHeaps[query.Heap];
|
||||
if (type == GPUQueryType::Timer) // Timer queries call End twice on different queries to calculate duration between GPU time clocks
|
||||
_commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.SecondaryElement);
|
||||
else
|
||||
_commandList->BeginQuery(heap->QueryHeap, heap->QueryType, query.Element);
|
||||
}
|
||||
return query.Raw;
|
||||
}
|
||||
|
||||
void GPUContextDX12::EndQuery(uint64 queryID)
|
||||
{
|
||||
if (queryID)
|
||||
{
|
||||
GPUQueryDX12 query;
|
||||
query.Raw = queryID;
|
||||
auto heap = _device->QueryHeaps[query.Heap];
|
||||
_commandList->EndQuery(heap->QueryHeap, heap->QueryType, query.Element);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUContextDX12::SetViewport(const Viewport& viewport)
|
||||
{
|
||||
_commandList->RSSetViewports(1, (D3D12_VIEWPORT*)&viewport);
|
||||
|
||||
@@ -197,6 +197,8 @@ public:
|
||||
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
|
||||
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
|
||||
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
|
||||
uint64 BeginQuery(GPUQueryType type) override;
|
||||
void EndQuery(uint64 queryID) override;
|
||||
void SetViewport(const Viewport& viewport) override;
|
||||
void SetScissor(const Rectangle& scissorRect) override;
|
||||
GPUPipelineState* GetState() const override;
|
||||
|
||||
@@ -555,7 +555,6 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
|
||||
, _commandQueue(nullptr)
|
||||
, _mainContext(nullptr)
|
||||
, UploadBuffer(this)
|
||||
, TimestampQueryHeap(this, D3D12_QUERY_HEAP_TYPE_TIMESTAMP, DX12_BACK_BUFFER_COUNT * 1024)
|
||||
, Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false)
|
||||
, Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false)
|
||||
, Heap_DSV(this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 64, false)
|
||||
@@ -883,9 +882,6 @@ bool GPUDeviceDX12::Init()
|
||||
VALIDATE_DIRECTX_CALL(_device->CreateRootSignature(0, signatureBlob->GetBufferPointer(), signatureBlob->GetBufferSize(), IID_PPV_ARGS(&_rootSignature)));
|
||||
}
|
||||
|
||||
if (TimestampQueryHeap.Init())
|
||||
return true;
|
||||
|
||||
// Cached command signatures
|
||||
{
|
||||
DrawIndirectCommandSignature = New<CommandSignatureDX12>(this, 1);
|
||||
@@ -927,8 +923,9 @@ void GPUDeviceDX12::RenderEnd()
|
||||
// Base
|
||||
GPUDeviceDX::RenderEnd();
|
||||
|
||||
// Resolve the timestamp queries
|
||||
TimestampQueryHeap.EndQueryBatchAndResolveQueryData(_mainContext);
|
||||
// Resolve the queries
|
||||
for (auto heap : QueryHeaps)
|
||||
heap->EndQueryBatchAndResolveQueryData(_mainContext);
|
||||
}
|
||||
|
||||
GPUDeviceDX12::~GPUDeviceDX12()
|
||||
@@ -957,11 +954,47 @@ ID3D12CommandQueue* GPUDeviceDX12::GetCommandQueueDX12() const
|
||||
return _commandQueue->GetCommandQueue();
|
||||
}
|
||||
|
||||
GPUQueryDX12 GPUDeviceDX12::AllocQuery(GPUQueryType type)
|
||||
{
|
||||
// Get query heap with free space
|
||||
int32 heapIndex = 0;
|
||||
int32 count = GPUQueryDX12::GetQueriesCount(type);
|
||||
for (; heapIndex < QueryHeaps.Count(); heapIndex++)
|
||||
{
|
||||
auto heap = QueryHeaps[heapIndex];
|
||||
if (heap->Type == type && heap->CanAlloc(count))
|
||||
break;
|
||||
}
|
||||
if (heapIndex == QueryHeaps.Count())
|
||||
{
|
||||
// Allocate a new query heap
|
||||
auto heap = New<QueryHeapDX12>();
|
||||
int32 size = type == GPUQueryType::Occlusion ? 4096 : 1024;
|
||||
if (heap->Init(this, type, size))
|
||||
{
|
||||
Delete(heap);
|
||||
return {};
|
||||
}
|
||||
QueryHeaps.Add(heap);
|
||||
}
|
||||
|
||||
// Alloc query from the heap
|
||||
GPUQueryDX12 query = {};
|
||||
{
|
||||
static_assert(sizeof(GPUQueryDX12) == sizeof(uint64), "Invalid DX12 query size.");
|
||||
query.Type = (uint16)type;
|
||||
query.Heap = heapIndex;
|
||||
auto heap = QueryHeaps[heapIndex];
|
||||
heap->Alloc(query.Element);
|
||||
if (count == 2)
|
||||
heap->Alloc(query.SecondaryElement);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
void GPUDeviceDX12::Dispose()
|
||||
{
|
||||
GPUDeviceLock lock(this);
|
||||
|
||||
// Check if has been disposed already
|
||||
if (_state == DeviceState::Disposed)
|
||||
return;
|
||||
|
||||
@@ -982,7 +1015,12 @@ void GPUDeviceDX12::Dispose()
|
||||
for (auto& srv : _nullSrv)
|
||||
srv.Release();
|
||||
_nullUav.Release();
|
||||
TimestampQueryHeap.Destroy();
|
||||
for (auto* heap : QueryHeaps)
|
||||
{
|
||||
heap->Destroy();
|
||||
Delete(heap);
|
||||
}
|
||||
QueryHeaps.Clear();
|
||||
DX_SAFE_RELEASE_CHECK(_rootSignature, 0);
|
||||
Heap_CBV_SRV_UAV.ReleaseGPU();
|
||||
Heap_RTV.ReleaseGPU();
|
||||
@@ -1012,6 +1050,28 @@ void GPUDeviceDX12::WaitForGPU()
|
||||
_commandQueue->WaitForGPU();
|
||||
}
|
||||
|
||||
bool GPUDeviceDX12::GetQueryResult(uint64 queryID, uint64& result, bool wait)
|
||||
{
|
||||
GPUQueryDX12 query;
|
||||
query.Raw = queryID;
|
||||
auto heap = QueryHeaps[query.Heap];
|
||||
int32 count = GPUQueryDX12::GetQueriesCount((GPUQueryType)query.Type);
|
||||
if (!wait && (!heap->IsReady(query.Element) || (count != 2 || !heap->IsReady(query.SecondaryElement))))
|
||||
return false;
|
||||
if (query.Type == (uint16)GPUQueryType::Timer)
|
||||
{
|
||||
uint64 timestampFrequency = 1;
|
||||
const uint64 timeBegin = *(uint64*)heap->Resolve(query.SecondaryElement);
|
||||
const uint64 timeEnd = *(uint64*)heap->Resolve(query.Element, ×tampFrequency);
|
||||
result = timeEnd > timeBegin ? (timeEnd - timeBegin) * 1000000ull / timestampFrequency : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = *(uint64*)heap->Resolve(query.Element);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
GPUTexture* GPUDeviceDX12::CreateTexture(const StringView& name)
|
||||
{
|
||||
PROFILE_MEM(GraphicsTextures);
|
||||
|
||||
@@ -65,21 +65,13 @@ public:
|
||||
~GPUDeviceDX12();
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
/// Data uploading utility via pages.
|
||||
/// </summary>
|
||||
UploadBufferDX12 UploadBuffer;
|
||||
|
||||
/// <summary>
|
||||
/// The timestamp queries heap.
|
||||
/// </summary>
|
||||
QueryHeapDX12 TimestampQueryHeap;
|
||||
|
||||
bool AllowTearing = false;
|
||||
CommandSignatureDX12* DispatchIndirectCommandSignature = nullptr;
|
||||
CommandSignatureDX12* DrawIndexedIndirectCommandSignature = nullptr;
|
||||
CommandSignatureDX12* DrawIndirectCommandSignature = nullptr;
|
||||
GPUBuffer* DummyVB = nullptr;
|
||||
Array<QueryHeapDX12*, InlinedAllocation<8>> QueryHeaps;
|
||||
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE NullSRV(D3D12_SRV_DIMENSION dimension) const;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE NullUAV() const;
|
||||
@@ -136,6 +128,8 @@ public:
|
||||
return _mainContext;
|
||||
}
|
||||
|
||||
GPUQueryDX12 AllocQuery(GPUQueryType type);
|
||||
|
||||
public:
|
||||
|
||||
DescriptorHeapPoolDX12 Heap_CBV_SRV_UAV;
|
||||
@@ -185,6 +179,7 @@ public:
|
||||
void RenderEnd() override;
|
||||
void Dispose() final override;
|
||||
void WaitForGPU() override;
|
||||
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
|
||||
GPUTexture* CreateTexture(const StringView& name) override;
|
||||
GPUShader* CreateShader(const StringView& name) override;
|
||||
GPUPipelineState* CreatePipelineState() override;
|
||||
|
||||
@@ -20,9 +20,7 @@ void GPUTimerQueryDX12::OnReleaseGPU()
|
||||
void GPUTimerQueryDX12::Begin()
|
||||
{
|
||||
const auto context = _device->GetMainContextDX12();
|
||||
auto& heap = _device->TimestampQueryHeap;
|
||||
heap.EndQuery(context, _begin);
|
||||
|
||||
_query = context->BeginQuery(GPUQueryType::Timer);
|
||||
_hasResult = false;
|
||||
_endCalled = false;
|
||||
}
|
||||
@@ -31,14 +29,8 @@ void GPUTimerQueryDX12::End()
|
||||
{
|
||||
if (_endCalled)
|
||||
return;
|
||||
|
||||
const auto context = _device->GetMainContextDX12();
|
||||
auto& heap = _device->TimestampQueryHeap;
|
||||
heap.EndQuery(context, _end);
|
||||
|
||||
const auto queue = _device->GetCommandQueue()->GetCommandQueue();
|
||||
VALIDATE_DIRECTX_CALL(queue->GetTimestampFrequency(&_gpuFrequency));
|
||||
|
||||
context->EndQuery(_query);
|
||||
_endCalled = true;
|
||||
}
|
||||
|
||||
@@ -48,33 +40,16 @@ bool GPUTimerQueryDX12::HasResult()
|
||||
return false;
|
||||
if (_hasResult)
|
||||
return true;
|
||||
|
||||
auto& heap = _device->TimestampQueryHeap;
|
||||
return heap.IsReady(_end) && heap.IsReady(_begin);
|
||||
uint64 result;
|
||||
return _device->GetQueryResult(_query, result, false);
|
||||
}
|
||||
|
||||
float GPUTimerQueryDX12::GetResult()
|
||||
{
|
||||
if (_hasResult)
|
||||
{
|
||||
return _timeDelta;
|
||||
}
|
||||
|
||||
const uint64 timeBegin = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_begin);
|
||||
const uint64 timeEnd = *(uint64*)_device->TimestampQueryHeap.ResolveQuery(_end);
|
||||
|
||||
// Calculate event duration in milliseconds
|
||||
if (timeEnd > timeBegin)
|
||||
{
|
||||
const uint64 delta = timeEnd - timeBegin;
|
||||
const double frequency = double(_gpuFrequency);
|
||||
_timeDelta = static_cast<float>((delta / frequency) * 1000.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
_timeDelta = 0.0f;
|
||||
}
|
||||
|
||||
uint64 result;
|
||||
_timeDelta = _device->GetQueryResult(_query, result, true) ? (float)((double)result / 1000.0) : 0.0f;
|
||||
_hasResult = true;
|
||||
return _timeDelta;
|
||||
}
|
||||
|
||||
@@ -17,9 +17,7 @@ private:
|
||||
bool _hasResult = false;
|
||||
bool _endCalled = false;
|
||||
float _timeDelta = 0.0f;
|
||||
uint64 _gpuFrequency = 0;
|
||||
QueryHeapDX12::ElementHandle _begin;
|
||||
QueryHeapDX12::ElementHandle _end;
|
||||
uint64 _query = 0;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
@@ -7,42 +7,34 @@
|
||||
#include "GPUContextDX12.h"
|
||||
#include "../RenderToolsDX.h"
|
||||
|
||||
QueryHeapDX12::QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount)
|
||||
: _device(device)
|
||||
, _queryHeap(nullptr)
|
||||
, _resultBuffer(nullptr)
|
||||
, _queryHeapType(queryHeapType)
|
||||
, _currentIndex(0)
|
||||
, _queryHeapCount(queryHeapCount)
|
||||
bool QueryHeapDX12::Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size)
|
||||
{
|
||||
if (queryHeapType == D3D12_QUERY_HEAP_TYPE_OCCLUSION)
|
||||
{
|
||||
_resultSize = sizeof(uint64);
|
||||
_queryType = D3D12_QUERY_TYPE_OCCLUSION;
|
||||
}
|
||||
else if (queryHeapType == D3D12_QUERY_HEAP_TYPE_TIMESTAMP)
|
||||
{
|
||||
_resultSize = sizeof(uint64);
|
||||
_queryType = D3D12_QUERY_TYPE_TIMESTAMP;
|
||||
}
|
||||
else
|
||||
{
|
||||
MISSING_CODE("Not support D3D12 query heap type.");
|
||||
}
|
||||
}
|
||||
|
||||
bool QueryHeapDX12::Init()
|
||||
{
|
||||
_resultData.Resize(_resultSize * _queryHeapCount);
|
||||
|
||||
// Create the query heap
|
||||
D3D12_QUERY_HEAP_DESC heapDesc;
|
||||
heapDesc.Type = _queryHeapType;
|
||||
Type = type;
|
||||
_device = device;
|
||||
_queryHeapCount = size;
|
||||
D3D12_QUERY_HEAP_DESC heapDesc = {};
|
||||
heapDesc.Count = _queryHeapCount;
|
||||
heapDesc.NodeMask = 0;
|
||||
HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&_queryHeap));
|
||||
switch (type)
|
||||
{
|
||||
case GPUQueryType::Timer:
|
||||
_resultSize = sizeof(uint64);
|
||||
QueryType = D3D12_QUERY_TYPE_TIMESTAMP;
|
||||
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
break;
|
||||
case GPUQueryType::Occlusion:
|
||||
_resultSize = sizeof(uint64);
|
||||
QueryType = D3D12_QUERY_TYPE_OCCLUSION;
|
||||
heapDesc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
|
||||
break;
|
||||
case GPUQueryType::MAX:
|
||||
return true;
|
||||
}
|
||||
_resultData.Resize(_resultSize * _queryHeapCount);
|
||||
HRESULT result = _device->GetDevice()->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&QueryHeap));
|
||||
LOG_DIRECTX_RESULT_WITH_RETURN(result, true);
|
||||
DX_SET_DEBUG_NAME(_queryHeap, "Query Heap");
|
||||
DX_SET_DEBUG_NAME(QueryHeap, "Query Heap");
|
||||
|
||||
// Create the result buffer
|
||||
D3D12_HEAP_PROPERTIES heapProperties;
|
||||
@@ -77,8 +69,8 @@ bool QueryHeapDX12::Init()
|
||||
void QueryHeapDX12::Destroy()
|
||||
{
|
||||
SAFE_RELEASE(_resultBuffer);
|
||||
SAFE_RELEASE(_queryHeap);
|
||||
_currentBatch.Clear();
|
||||
SAFE_RELEASE(QueryHeap);
|
||||
_currentBatch = QueryBatch();
|
||||
_resultData.SetCapacity(0);
|
||||
}
|
||||
|
||||
@@ -92,45 +84,36 @@ void QueryHeapDX12::EndQueryBatchAndResolveQueryData(GPUContextDX12* context)
|
||||
_currentBatch.Open = false;
|
||||
|
||||
// Resolve the batch
|
||||
const int32 offset = _currentBatch.Start * _resultSize;
|
||||
context->GetCommandList()->ResolveQueryData(_queryHeap, _queryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
|
||||
_currentBatch.Sync = _device->GetCommandQueue()->GetSyncPoint();
|
||||
const uint32 offset = _currentBatch.Start * _resultSize;
|
||||
context->GetCommandList()->ResolveQueryData(QueryHeap, QueryType, _currentBatch.Start, _currentBatch.Count, _resultBuffer, offset);
|
||||
const auto queue = _device->GetCommandQueue();
|
||||
_currentBatch.Sync = queue->GetSyncPoint();
|
||||
|
||||
// Get GPU clock frequency for timer queries
|
||||
if (Type == GPUQueryType::Timer)
|
||||
{
|
||||
VALIDATE_DIRECTX_CALL(queue->GetCommandQueue()->GetTimestampFrequency(&_currentBatch.TimestampFrequency));
|
||||
}
|
||||
|
||||
// Begin a new query batch
|
||||
_batches.Add(_currentBatch);
|
||||
StartQueryBatch();
|
||||
}
|
||||
|
||||
void QueryHeapDX12::AllocQuery(GPUContextDX12* context, ElementHandle& handle)
|
||||
bool QueryHeapDX12::CanAlloc(int32 count) const
|
||||
{
|
||||
return _currentBatch.Open && _currentIndex + count <= GetQueryHeapCount();
|
||||
}
|
||||
|
||||
void QueryHeapDX12::Alloc(ElementHandle& handle)
|
||||
{
|
||||
ASSERT(_currentBatch.Open);
|
||||
|
||||
// Check if need to start from the buffer head
|
||||
if (_currentIndex >= GetQueryHeapCount())
|
||||
{
|
||||
// We're in the middle of a batch, but we're at the end of the heap so split the batch in two
|
||||
EndQueryBatchAndResolveQueryData(context);
|
||||
}
|
||||
|
||||
// Allocate element into the current batch
|
||||
handle = _currentIndex++;
|
||||
_currentBatch.Count++;
|
||||
}
|
||||
|
||||
void QueryHeapDX12::BeginQuery(GPUContextDX12* context, ElementHandle& handle)
|
||||
{
|
||||
AllocQuery(context, handle);
|
||||
|
||||
context->GetCommandList()->BeginQuery(_queryHeap, _queryType, handle);
|
||||
}
|
||||
|
||||
void QueryHeapDX12::EndQuery(GPUContextDX12* context, ElementHandle& handle)
|
||||
{
|
||||
AllocQuery(context, handle);
|
||||
|
||||
context->GetCommandList()->EndQuery(_queryHeap, _queryType, handle);
|
||||
}
|
||||
|
||||
bool QueryHeapDX12::IsReady(ElementHandle& handle)
|
||||
{
|
||||
// Current batch is not ready (not ended)
|
||||
@@ -150,7 +133,7 @@ bool QueryHeapDX12::IsReady(ElementHandle& handle)
|
||||
return true;
|
||||
}
|
||||
|
||||
void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
|
||||
void* QueryHeapDX12::Resolve(ElementHandle& handle, uint64* timestampFrequency)
|
||||
{
|
||||
// Prevent queries from the current batch
|
||||
ASSERT(!_currentBatch.ContainsElement(handle));
|
||||
@@ -192,10 +175,15 @@ void* QueryHeapDX12::ResolveQuery(ElementHandle& handle)
|
||||
// All elements got its results so we can remove this batch
|
||||
_batches.RemoveAt(i);
|
||||
|
||||
// Cache timestamps frequency for later
|
||||
_timestampFrequency = batch.TimestampFrequency;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (timestampFrequency)
|
||||
*timestampFrequency = _timestampFrequency;
|
||||
return _resultData.Get() + handle * _resultSize;
|
||||
}
|
||||
|
||||
@@ -204,7 +192,7 @@ void QueryHeapDX12::StartQueryBatch()
|
||||
ASSERT(!_currentBatch.Open);
|
||||
|
||||
// Clear the current batch
|
||||
_currentBatch.Clear();
|
||||
_currentBatch = QueryBatch();
|
||||
|
||||
// Loop active index on overflow
|
||||
if (_currentIndex >= GetQueryHeapCount())
|
||||
|
||||
@@ -10,6 +10,31 @@ class GPUContextDX12;
|
||||
class GPUBuffer;
|
||||
|
||||
#include "CommandQueueDX12.h"
|
||||
#include "Engine/Graphics/Enums.h"
|
||||
|
||||
/// <summary>
|
||||
/// GPU query ID packed into 64-bits.
|
||||
/// </summary>
|
||||
struct GPUQueryDX12
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint16 Type;
|
||||
uint16 Heap;
|
||||
uint16 Element;
|
||||
uint16 SecondaryElement;
|
||||
};
|
||||
uint64 Raw;
|
||||
};
|
||||
|
||||
static int32 GetQueriesCount(GPUQueryType type)
|
||||
{
|
||||
// Timer queries need to know duration via GPU timer queries difference
|
||||
return type == GPUQueryType::Timer ? 2 : 1;
|
||||
}
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// GPU queries heap for DirectX 12 backend.
|
||||
@@ -17,14 +42,12 @@ class GPUBuffer;
|
||||
class QueryHeapDX12
|
||||
{
|
||||
public:
|
||||
|
||||
/// <summary>
|
||||
/// The query element handle.
|
||||
/// </summary>
|
||||
typedef int32 ElementHandle;
|
||||
typedef uint16 ElementHandle;
|
||||
|
||||
private:
|
||||
|
||||
struct QueryBatch
|
||||
{
|
||||
/// <summary>
|
||||
@@ -35,71 +58,54 @@ private:
|
||||
/// <summary>
|
||||
/// The first element in the batch (inclusive).
|
||||
/// </summary>
|
||||
int32 Start = 0;
|
||||
uint32 Start = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The amount of elements added to this batch.
|
||||
/// </summary>
|
||||
int32 Count = 0;
|
||||
uint32 Count = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The GPU clock frequency for timer queries.
|
||||
/// </summary>
|
||||
uint64 TimestampFrequency = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Is the batch still open for more begin/end queries.
|
||||
/// </summary>
|
||||
bool Open = false;
|
||||
|
||||
/// <summary>
|
||||
/// Clears this batch.
|
||||
/// </summary>
|
||||
inline void Clear()
|
||||
{
|
||||
Sync = SyncPointDX12();
|
||||
Start = 0;
|
||||
Count = 0;
|
||||
Open = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if this query batch contains a given element contains the element.
|
||||
/// </summary>
|
||||
/// <param name="elementIndex">The index of the element.</param>
|
||||
/// <returns>True if element is in this query, otherwise false.</returns>
|
||||
bool ContainsElement(int32 elementIndex) const
|
||||
bool ContainsElement(uint32 elementIndex) const
|
||||
{
|
||||
return elementIndex >= Start && elementIndex < Start + Count;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
GPUDeviceDX12* _device;
|
||||
ID3D12QueryHeap* _queryHeap;
|
||||
ID3D12Resource* _resultBuffer;
|
||||
D3D12_QUERY_TYPE _queryType;
|
||||
D3D12_QUERY_HEAP_TYPE _queryHeapType;
|
||||
int32 _currentIndex;
|
||||
int32 _resultSize;
|
||||
int32 _queryHeapCount;
|
||||
GPUDeviceDX12* _device = nullptr;
|
||||
ID3D12Resource* _resultBuffer = nullptr;
|
||||
uint32 _currentIndex = 0;
|
||||
uint32 _resultSize = 0;
|
||||
uint32 _queryHeapCount = 0;
|
||||
QueryBatch _currentBatch;
|
||||
Array<QueryBatch> _batches;
|
||||
Array<byte> _resultData;
|
||||
uint64 _timestampFrequency;
|
||||
|
||||
public:
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="QueryHeapDX12"/> class.
|
||||
/// </summary>
|
||||
/// <param name="device">The device.</param>
|
||||
/// <param name="queryHeapType">Type of the query heap.</param>
|
||||
/// <param name="queryHeapCount">The query heap count.</param>
|
||||
QueryHeapDX12(GPUDeviceDX12* device, const D3D12_QUERY_HEAP_TYPE& queryHeapType, int32 queryHeapCount);
|
||||
|
||||
public:
|
||||
|
||||
/// <summary>
|
||||
/// Initializes this instance.
|
||||
/// </summary>
|
||||
/// <param name="device">The device.</param>
|
||||
/// <param name="type">Type of the query heap.</param>
|
||||
/// <param name="size">The size of the heap.</param>
|
||||
/// <returns>True if failed, otherwise false.</returns>
|
||||
bool Init();
|
||||
bool Init(GPUDeviceDX12* device, GPUQueryType type, uint32 size);
|
||||
|
||||
/// <summary>
|
||||
/// Destroys this instance.
|
||||
@@ -107,12 +113,14 @@ public:
|
||||
void Destroy();
|
||||
|
||||
public:
|
||||
GPUQueryType Type;
|
||||
ID3D12QueryHeap* QueryHeap = nullptr;
|
||||
D3D12_QUERY_TYPE QueryType = D3D12_QUERY_TYPE_OCCLUSION;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the query heap capacity.
|
||||
/// </summary>
|
||||
/// <returns>The queries count.</returns>
|
||||
FORCE_INLINE int32 GetQueryHeapCount() const
|
||||
FORCE_INLINE uint32 GetQueryHeapCount() const
|
||||
{
|
||||
return _queryHeapCount;
|
||||
}
|
||||
@@ -120,8 +128,7 @@ public:
|
||||
/// <summary>
|
||||
/// Gets the size of the result value (in bytes).
|
||||
/// </summary>
|
||||
/// <returns>The size of the query result value (in bytes).</returns>
|
||||
FORCE_INLINE int32 GetResultSize() const
|
||||
FORCE_INLINE uint32 GetResultSize() const
|
||||
{
|
||||
return _resultSize;
|
||||
}
|
||||
@@ -129,40 +136,30 @@ public:
|
||||
/// <summary>
|
||||
/// Gets the result buffer (CPU readable via Map/Unmap).
|
||||
/// </summary>
|
||||
/// <returns>The query results buffer.</returns>
|
||||
FORCE_INLINE ID3D12Resource* GetResultBuffer() const
|
||||
{
|
||||
return _resultBuffer;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/// <summary>
|
||||
/// Stops tracking the current batch of begin/end query calls that will be resolved together. This implicitly starts a new batch.
|
||||
/// </summary>
|
||||
/// <param name="context">The context.</param>
|
||||
void EndQueryBatchAndResolveQueryData(GPUContextDX12* context);
|
||||
|
||||
/// <summary>
|
||||
/// Checks if can alloc a new query (without rolling the existing batch).
|
||||
/// </summary>
|
||||
/// <param name="count">How many elements to allocate?</param>
|
||||
/// <returns>True if can alloc new query within the same batch.</returns>
|
||||
bool CanAlloc(int32 count = 1) const;
|
||||
|
||||
/// <summary>
|
||||
/// Allocates the query heap element.
|
||||
/// </summary>
|
||||
/// <param name="context">The context.</param>
|
||||
/// <param name="handle">The result handle.</param>
|
||||
void AllocQuery(GPUContextDX12* context, ElementHandle& handle);
|
||||
|
||||
/// <summary>
|
||||
/// Calls BeginQuery on command list for the given query heap slot.
|
||||
/// </summary>
|
||||
/// <param name="context">The context.</param>
|
||||
/// <param name="handle">The query handle.</param>
|
||||
void BeginQuery(GPUContextDX12* context, ElementHandle& handle);
|
||||
|
||||
/// <summary>
|
||||
/// Calls EndQuery on command list for the given query heap slot.
|
||||
/// </summary>
|
||||
/// <param name="context">The context.</param>
|
||||
/// <param name="handle">The query handle.</param>
|
||||
void EndQuery(GPUContextDX12* context, ElementHandle& handle);
|
||||
void Alloc(ElementHandle& handle);
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether the specified query handle is ready to read data (command list has been executed by the GPU).
|
||||
@@ -175,11 +172,11 @@ public:
|
||||
/// Resolves the query (or skips if already resolved).
|
||||
/// </summary>
|
||||
/// <param name="handle">The result handle.</param>
|
||||
/// <param name="timestampFrequency">The optional pointer to GPU timestamps frequency value to store.</param>
|
||||
/// <returns>The pointer to the resolved query data.</returns>
|
||||
void* ResolveQuery(ElementHandle& handle);
|
||||
void* Resolve(ElementHandle& handle, uint64* timestampFrequency = nullptr);
|
||||
|
||||
private:
|
||||
|
||||
/// <summary>
|
||||
/// Starts tracking a new batch of begin/end query calls that will be resolved together
|
||||
/// </summary>
|
||||
|
||||
@@ -160,6 +160,15 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
uint64 BeginQuery(GPUQueryType type) override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void EndQuery(uint64 queryID) override
|
||||
{
|
||||
}
|
||||
|
||||
void SetViewport(const Viewport& viewport) override
|
||||
{
|
||||
}
|
||||
|
||||
@@ -144,6 +144,11 @@ void GPUDeviceNull::WaitForGPU()
|
||||
{
|
||||
}
|
||||
|
||||
bool GPUDeviceNull::GetQueryResult(uint64 queryID, uint64& result, bool wait)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
GPUTexture* GPUDeviceNull::CreateTexture(const StringView& name)
|
||||
{
|
||||
PROFILE_MEM(GraphicsTextures);
|
||||
|
||||
@@ -20,18 +20,15 @@ class GPUDeviceNull : public GPUDevice
|
||||
friend GPUSwapChainNull;
|
||||
|
||||
private:
|
||||
|
||||
GPUContextNull* _mainContext;
|
||||
GPUAdapterNull* _adapter;
|
||||
|
||||
public:
|
||||
|
||||
static GPUDevice* Create();
|
||||
GPUDeviceNull();
|
||||
~GPUDeviceNull();
|
||||
|
||||
public:
|
||||
|
||||
// [GPUDevice]
|
||||
GPUContext* GetMainContext() override;
|
||||
GPUAdapter* GetAdapter() const override;
|
||||
@@ -41,6 +38,7 @@ public:
|
||||
void Draw() override;
|
||||
void Dispose() override;
|
||||
void WaitForGPU() override;
|
||||
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
|
||||
GPUTexture* CreateTexture(const StringView& name) override;
|
||||
GPUShader* CreateShader(const StringView& name) override;
|
||||
GPUPipelineState* CreatePipelineState() override;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#include "RenderToolsVulkan.h"
|
||||
#include "QueueVulkan.h"
|
||||
#include "GPUContextVulkan.h"
|
||||
#if VULKAN_USE_QUERIES
|
||||
#if VULKAN_USE_TIMER_QUERIES
|
||||
#include "GPUTimerQueryVulkan.h"
|
||||
#endif
|
||||
#include "DescriptorSetVulkan.h"
|
||||
@@ -243,6 +243,7 @@ void CmdBufferPoolVulkan::RefreshFenceStatus(const CmdBufferVulkan* skipCmdBuffe
|
||||
|
||||
CmdBufferManagerVulkan::CmdBufferManagerVulkan(GPUDeviceVulkan* device, GPUContextVulkan* context)
|
||||
: _device(device)
|
||||
, _context(context)
|
||||
, _pool(device)
|
||||
, _queue(context->GetQueue())
|
||||
, _activeCmdBuffer(nullptr)
|
||||
@@ -259,12 +260,28 @@ void CmdBufferManagerVulkan::SubmitActiveCmdBuffer(SemaphoreVulkan* signalSemaph
|
||||
if (_activeCmdBuffer->IsInsideRenderPass())
|
||||
_activeCmdBuffer->EndRenderPass();
|
||||
|
||||
#if VULKAN_USE_QUERIES
|
||||
// Pause all active queries
|
||||
for (int32 i = 0; i < _queriesInProgress.Count(); i++)
|
||||
#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
|
||||
// Pause all active timer queries
|
||||
auto queries = _activeTimerQueries.Get();
|
||||
#if GPU_VULKAN_QUERY_NEW
|
||||
for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
|
||||
{
|
||||
_queriesInProgress.Get()[i]->Interrupt(_activeCmdBuffer);
|
||||
GPUQueryVulkan query;
|
||||
query.Raw = queries[i];
|
||||
|
||||
// End active query to get time from start until submission
|
||||
auto pool = _device->QueryPools[query.PoolIndex];
|
||||
vkCmdWriteTimestamp(_activeCmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, pool->GetHandle(), query.SecondQueryIndex);
|
||||
pool->MarkQueryAsStarted(query.SecondQueryIndex);
|
||||
// TODO: somehow handle ending this query properly by stopping split query instead
|
||||
//_context->EndQuery(query.Raw);
|
||||
|
||||
// TODO: reimplement timer queries pause/resume to be more exact?
|
||||
}
|
||||
#else
|
||||
for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
|
||||
queries->Interrupt(_activeCmdBuffer);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
_activeCmdBuffer->End();
|
||||
@@ -317,27 +334,37 @@ void CmdBufferManagerVulkan::PrepareForNewActiveCommandBuffer()
|
||||
|
||||
_activeCmdBuffer->Begin();
|
||||
|
||||
#if VULKAN_USE_QUERIES
|
||||
// Resume any paused queries with the new command buffer
|
||||
for (int32 i = 0; i < _queriesInProgress.Count(); i++)
|
||||
#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
|
||||
// Resume any paused timer queries with the new command buffer
|
||||
auto queries = _activeTimerQueries.Get();
|
||||
#if GPU_VULKAN_QUERY_NEW
|
||||
for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
|
||||
{
|
||||
_queriesInProgress.Get()[i]->Resume(_activeCmdBuffer);
|
||||
GPUQueryVulkan query;
|
||||
query.Raw = queries[i];
|
||||
//_activeTimerQueries.Get()[i]->Resume(_activeCmdBuffer);
|
||||
}
|
||||
#else
|
||||
for (int32 i = 0; i < _activeTimerQueries.Count(); i++)
|
||||
{
|
||||
queries->Resume(_activeCmdBuffer);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void CmdBufferManagerVulkan::OnQueryBegin(GPUTimerQueryVulkan* query)
|
||||
#if GPU_VULKAN_QUERY_NEW && GPU_VULKAN_PAUSE_QUERIES
|
||||
|
||||
void CmdBufferManagerVulkan::OnTimerQueryBegin(QueryType query)
|
||||
{
|
||||
#if VULKAN_USE_QUERIES
|
||||
_queriesInProgress.Add(query);
|
||||
#endif
|
||||
_activeTimerQueries.Add(query);
|
||||
}
|
||||
|
||||
void CmdBufferManagerVulkan::OnQueryEnd(GPUTimerQueryVulkan* query)
|
||||
void CmdBufferManagerVulkan::OnTimerQueryEnd(QueryType query)
|
||||
{
|
||||
#if VULKAN_USE_QUERIES
|
||||
_queriesInProgress.Remove(query);
|
||||
#endif
|
||||
_activeTimerQueries.Remove(query);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -168,10 +168,18 @@ class CmdBufferManagerVulkan
|
||||
{
|
||||
private:
|
||||
GPUDeviceVulkan* _device;
|
||||
GPUContextVulkan* _context;
|
||||
CmdBufferPoolVulkan _pool;
|
||||
QueueVulkan* _queue;
|
||||
CmdBufferVulkan* _activeCmdBuffer;
|
||||
Array<GPUTimerQueryVulkan*> _queriesInProgress;
|
||||
#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
|
||||
#if GPU_VULKAN_QUERY_NEW
|
||||
typedef uint64 QueryType;
|
||||
#else
|
||||
typedef GPUTimerQueryVulkan* QueryType;
|
||||
#endif
|
||||
Array<QueryType> _activeTimerQueries;
|
||||
#endif
|
||||
|
||||
public:
|
||||
CmdBufferManagerVulkan(GPUDeviceVulkan* device, GPUContextVulkan* context);
|
||||
@@ -192,11 +200,6 @@ public:
|
||||
return _activeCmdBuffer != nullptr;
|
||||
}
|
||||
|
||||
FORCE_INLINE bool HasQueriesInProgress() const
|
||||
{
|
||||
return _queriesInProgress.Count() != 0;
|
||||
}
|
||||
|
||||
FORCE_INLINE CmdBufferVulkan* GetCmdBuffer()
|
||||
{
|
||||
if (!_activeCmdBuffer)
|
||||
@@ -207,14 +210,16 @@ public:
|
||||
public:
|
||||
void SubmitActiveCmdBuffer(SemaphoreVulkan* signalSemaphore = nullptr);
|
||||
void WaitForCmdBuffer(CmdBufferVulkan* cmdBuffer, float timeInSecondsToWait = 1.0f);
|
||||
void RefreshFenceStatus(CmdBufferVulkan* skipCmdBuffer = nullptr)
|
||||
void RefreshFenceStatus(const CmdBufferVulkan* skipCmdBuffer = nullptr)
|
||||
{
|
||||
_pool.RefreshFenceStatus(skipCmdBuffer);
|
||||
}
|
||||
void PrepareForNewActiveCommandBuffer();
|
||||
|
||||
void OnQueryBegin(GPUTimerQueryVulkan* query);
|
||||
void OnQueryEnd(GPUTimerQueryVulkan* query);
|
||||
#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
|
||||
void OnTimerQueryBegin(QueryType query);
|
||||
void OnTimerQueryEnd(QueryType query);
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -45,8 +45,14 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef VULKAN_USE_QUERIES
|
||||
#define VULKAN_USE_QUERIES 1
|
||||
#ifndef VULKAN_USE_TIMER_QUERIES
|
||||
#define VULKAN_USE_TIMER_QUERIES 1
|
||||
#endif
|
||||
|
||||
// Toggles GPUTimerQueryVulkan to use BeginQuery/EndQuery via GPuContext rather than old custom implementation
|
||||
#define GPU_VULKAN_QUERY_NEW 1
|
||||
|
||||
// Toggles pausing and resuming all GPU timer queries when command buffer is being flushed (for more exact timings)
|
||||
#define GPU_VULKAN_PAUSE_QUERIES 0
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1300,6 +1300,72 @@ void GPUContextVulkan::DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, ui
|
||||
RENDER_STAT_DRAW_CALL(0, 0);
|
||||
}
|
||||
|
||||
uint64 GPUContextVulkan::BeginQuery(GPUQueryType type)
|
||||
{
|
||||
// Check if timer queries are supported
|
||||
if (type == GPUQueryType::Timer && _device->PhysicalDeviceLimits.timestampComputeAndGraphics != VK_TRUE)
|
||||
return 0;
|
||||
|
||||
// Allocate query
|
||||
auto poolIndex = _device->GetOrCreateQueryPool(type);
|
||||
auto pool = _device->QueryPools[poolIndex];
|
||||
uint32 index = 0;
|
||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
||||
if (!pool->AcquireQuery(cmdBuffer, index))
|
||||
return 0;
|
||||
GPUQueryVulkan query;
|
||||
query.PoolIndex = (uint16)poolIndex;
|
||||
query.QueryIndex = (uint16)index;
|
||||
query.SecondQueryIndex = 0;
|
||||
query.Dummy = 1; // Ensure Raw is never 0, even for the first query
|
||||
|
||||
// Begin query
|
||||
switch (type)
|
||||
{
|
||||
case GPUQueryType::Timer:
|
||||
// Timer queries need 2 slots (begin + end)
|
||||
pool->AcquireQuery(cmdBuffer, index);
|
||||
query.SecondQueryIndex = (uint16)index;
|
||||
|
||||
vkCmdWriteTimestamp(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, pool->GetHandle(), query.QueryIndex);
|
||||
#if GPU_VULKAN_PAUSE_QUERIES
|
||||
_cmdBufferManager->OnTimerQueryBegin(query.Raw);
|
||||
#endif
|
||||
break;
|
||||
case GPUQueryType::Occlusion:
|
||||
vkCmdBeginQuery(cmdBuffer->GetHandle(), pool->GetHandle(), query.QueryIndex, 0);
|
||||
break;
|
||||
}
|
||||
pool->MarkQueryAsStarted(query.QueryIndex);
|
||||
|
||||
return query.Raw;
|
||||
}
|
||||
|
||||
void GPUContextVulkan::EndQuery(uint64 queryID)
|
||||
{
|
||||
if (!queryID)
|
||||
return;
|
||||
GPUQueryVulkan query;
|
||||
query.Raw = queryID;
|
||||
auto pool = _device->QueryPools[query.PoolIndex];
|
||||
|
||||
// End query
|
||||
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
|
||||
switch (pool->Type)
|
||||
{
|
||||
case GPUQueryType::Timer:
|
||||
vkCmdWriteTimestamp(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, pool->GetHandle(), query.SecondQueryIndex);
|
||||
pool->MarkQueryAsStarted(query.SecondQueryIndex);
|
||||
#if GPU_VULKAN_PAUSE_QUERIES
|
||||
_cmdBufferManager->OnTimerQueryEnd(query.Raw);
|
||||
#endif
|
||||
break;
|
||||
case GPUQueryType::Occlusion:
|
||||
vkCmdEndQuery(cmdBuffer->GetHandle(), pool->GetHandle(), query.QueryIndex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GPUContextVulkan::SetViewport(const Viewport& viewport)
|
||||
{
|
||||
vkCmdSetViewport(_cmdBufferManager->GetCmdBuffer()->GetHandle(), 0, 1, (VkViewport*)&viewport);
|
||||
|
||||
@@ -189,6 +189,8 @@ public:
|
||||
void DrawIndexedInstanced(uint32 indicesCount, uint32 instanceCount, int32 startInstance, int32 startVertex, int32 startIndex) override;
|
||||
void DrawInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
|
||||
void DrawIndexedInstancedIndirect(GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;
|
||||
uint64 BeginQuery(GPUQueryType type) override;
|
||||
void EndQuery(uint64 queryID) override;
|
||||
void SetViewport(const Viewport& viewport) override;
|
||||
void SetScissor(const Rectangle& scissorRect) override;
|
||||
GPUPipelineState* GetState() const override;
|
||||
|
||||
@@ -627,14 +627,14 @@ RenderPassVulkan::~RenderPassVulkan()
|
||||
Device->DeferredDeletionQueue.EnqueueResource(DeferredDeletionQueueVulkan::Type::RenderPass, Handle);
|
||||
}
|
||||
|
||||
QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type)
|
||||
QueryPoolVulkan::QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type)
|
||||
: _device(device)
|
||||
, _handle(VK_NULL_HANDLE)
|
||||
, _type(type)
|
||||
, Type(type)
|
||||
{
|
||||
VkQueryPoolCreateInfo createInfo;
|
||||
RenderToolsVulkan::ZeroStruct(createInfo, VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
|
||||
createInfo.queryType = type;
|
||||
createInfo.queryType = type == GPUQueryType::Occlusion ? VK_QUERY_TYPE_OCCLUSION : VK_QUERY_TYPE_TIMESTAMP;
|
||||
createInfo.queryCount = capacity;
|
||||
VALIDATE_VULKAN_RESULT(vkCreateQueryPool(device->Device, &createInfo, nullptr, &_handle));
|
||||
|
||||
@@ -667,7 +667,7 @@ void QueryPoolVulkan::Reset(CmdBufferVulkan* cmdBuffer)
|
||||
|
||||
#endif
|
||||
|
||||
BufferedQueryPoolVulkan::BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type)
|
||||
BufferedQueryPoolVulkan::BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type)
|
||||
: QueryPoolVulkan(device, capacity, type)
|
||||
, _lastBeginIndex(0)
|
||||
{
|
||||
@@ -720,6 +720,16 @@ void BufferedQueryPoolVulkan::ReleaseQuery(uint32 queryIndex)
|
||||
_lastBeginIndex = (uint32)queryIndex;
|
||||
}
|
||||
}
|
||||
if (_usedQueryBits[word] == 0)
|
||||
{
|
||||
// Check if pool got empty and reset the pointer back to start
|
||||
for (int32 wordIndex = 0; wordIndex < _usedQueryBits.Count(); wordIndex++)
|
||||
{
|
||||
if (_usedQueryBits[wordIndex])
|
||||
return;
|
||||
}
|
||||
_lastBeginIndex = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void BufferedQueryPoolVulkan::MarkQueryAsStarted(uint32 queryIndex)
|
||||
@@ -729,7 +739,7 @@ void BufferedQueryPoolVulkan::MarkQueryAsStarted(uint32 queryIndex)
|
||||
_startedQueryBits[word] = _startedQueryBits[word] | bit;
|
||||
}
|
||||
|
||||
bool BufferedQueryPoolVulkan::GetResults(GPUContextVulkan* context, uint32 index, uint64& result)
|
||||
bool BufferedQueryPoolVulkan::GetResults(uint32 index, uint64& result)
|
||||
{
|
||||
const uint64 bit = (uint64)(index % 64);
|
||||
const uint64 bitMask = (uint64)1 << bit;
|
||||
@@ -1228,22 +1238,20 @@ GPUDeviceVulkan::~GPUDeviceVulkan()
|
||||
GPUDeviceVulkan::Dispose();
|
||||
}
|
||||
|
||||
BufferedQueryPoolVulkan* GPUDeviceVulkan::FindAvailableQueryPool(VkQueryType queryType)
|
||||
int32 GPUDeviceVulkan::GetOrCreateQueryPool(GPUQueryType type)
|
||||
{
|
||||
auto& pools = queryType == VK_QUERY_TYPE_OCCLUSION ? OcclusionQueryPools : TimestampQueryPools;
|
||||
|
||||
// Try to use pool with available space inside
|
||||
for (int32 i = 0; i < pools.Count(); i++)
|
||||
auto pools = QueryPools.Get();
|
||||
for (int32 i = 0; i < QueryPools.Count(); i++)
|
||||
{
|
||||
auto pool = pools.Get()[i];
|
||||
if (pool->HasRoom())
|
||||
return pool;
|
||||
auto pool = pools[i];
|
||||
if (pool->Type == type && pool->HasRoom())
|
||||
return i;
|
||||
}
|
||||
|
||||
// Create new pool
|
||||
const auto pool = New<BufferedQueryPoolVulkan>(this, queryType == VK_QUERY_TYPE_OCCLUSION ? 4096 : 1024, queryType);
|
||||
pools.Add(pool);
|
||||
return pool;
|
||||
PROFILE_CPU_NAMED("Create Create Pool");
|
||||
auto pool = New<BufferedQueryPoolVulkan>(this, type == GPUQueryType::Occlusion ? 4096 : 1024, type);
|
||||
QueryPools.Add(pool);
|
||||
return QueryPools.Count() - 1;
|
||||
}
|
||||
|
||||
RenderPassVulkan* GPUDeviceVulkan::GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout)
|
||||
@@ -1752,6 +1760,10 @@ bool GPUDeviceVulkan::Init()
|
||||
limits.MaximumTexture3DSize = PhysicalDeviceLimits.maxImageDimension3D;
|
||||
limits.MaximumTextureCubeSize = PhysicalDeviceLimits.maxImageDimensionCube;
|
||||
limits.MaximumSamplerAnisotropy = PhysicalDeviceLimits.maxSamplerAnisotropy;
|
||||
if (PhysicalDeviceLimits.timestampComputeAndGraphics != VK_TRUE)
|
||||
{
|
||||
LOG(Warning, "Timer Queries are unsupported by this device");
|
||||
}
|
||||
|
||||
for (int32 i = 0; i < static_cast<int32>(PixelFormat::MAX); i++)
|
||||
{
|
||||
@@ -1982,6 +1994,16 @@ void GPUDeviceVulkan::DrawBegin()
|
||||
// Base
|
||||
GPUDevice::DrawBegin();
|
||||
|
||||
// Put back used queries to the pool
|
||||
for (auto& query : QueriesToRelease)
|
||||
{
|
||||
auto pool = QueryPools[query.PoolIndex];
|
||||
pool->ReleaseQuery(query.QueryIndex);
|
||||
if (pool->Type == GPUQueryType::Timer)
|
||||
pool->ReleaseQuery(query.SecondQueryIndex);
|
||||
}
|
||||
QueriesToRelease.Clear();
|
||||
|
||||
// Flush resources
|
||||
DeferredDeletionQueue.ReleaseResources();
|
||||
DescriptorPoolsManager->GC();
|
||||
@@ -2022,8 +2044,7 @@ void GPUDeviceVulkan::Dispose()
|
||||
_layouts.ClearDelete();
|
||||
HelperResources.Dispose();
|
||||
UploadBuffer.Dispose();
|
||||
TimestampQueryPools.ClearDelete();
|
||||
OcclusionQueryPools.ClearDelete();
|
||||
QueryPools.ClearDelete();
|
||||
SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader);
|
||||
Delete(DescriptorPoolsManager);
|
||||
SAFE_DELETE(MainContext);
|
||||
@@ -2084,6 +2105,61 @@ void GPUDeviceVulkan::WaitForGPU()
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUDeviceVulkan::GetQueryResult(uint64 queryID, uint64& result, bool wait)
|
||||
{
|
||||
if (!queryID)
|
||||
return false;
|
||||
GPUQueryVulkan query;
|
||||
query.Raw = queryID;
|
||||
auto pool = QueryPools[query.PoolIndex];
|
||||
|
||||
RETRY:
|
||||
bool hasData;
|
||||
uint64 resultSecondary;
|
||||
switch (pool->Type)
|
||||
{
|
||||
case GPUQueryType::Timer:
|
||||
hasData = pool->GetResults(query.QueryIndex, result) && pool->GetResults(query.SecondQueryIndex, resultSecondary);
|
||||
#if VULKAN_USE_TIMER_QUERIES && GPU_VULKAN_PAUSE_QUERIES
|
||||
if (hasData)
|
||||
{
|
||||
// Check if dependant queries have completed (timer queries can be split when active command buffer get submitted)
|
||||
// TODO: impl this
|
||||
}
|
||||
#endif
|
||||
if (hasData)
|
||||
{
|
||||
if (resultSecondary >= result)
|
||||
{
|
||||
// Convert GPU timestamps to nanoseconds and then to microseconds
|
||||
double nanoseconds = double(resultSecondary - result) * double(PhysicalDeviceLimits.timestampPeriod);
|
||||
result = (uint64)(nanoseconds * 0.001);
|
||||
}
|
||||
else
|
||||
result = 0;
|
||||
}
|
||||
break;
|
||||
case GPUQueryType::Occlusion:
|
||||
hasData = pool->GetResults(query.QueryIndex, result);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!hasData && wait)
|
||||
{
|
||||
// Wait until data is ready
|
||||
Platform::Yield();
|
||||
goto RETRY;
|
||||
}
|
||||
|
||||
if (hasData)
|
||||
{
|
||||
// Auto-release query on the next frame
|
||||
QueriesToRelease.Add(query);
|
||||
}
|
||||
|
||||
return hasData;
|
||||
}
|
||||
|
||||
GPUTexture* GPUDeviceVulkan::CreateTexture(const StringView& name)
|
||||
{
|
||||
PROFILE_MEM(GraphicsTextures);
|
||||
|
||||
@@ -28,6 +28,24 @@ class GPUDeviceVulkan;
|
||||
class UniformBufferUploaderVulkan;
|
||||
class DescriptorPoolsManagerVulkan;
|
||||
|
||||
/// <summary>
|
||||
/// GPU query ID packed into 64-bits.
|
||||
/// </summary>
|
||||
struct GPUQueryVulkan
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint16 PoolIndex;
|
||||
uint16 QueryIndex;
|
||||
uint16 SecondQueryIndex;
|
||||
uint16 Dummy;
|
||||
};
|
||||
uint64 Raw;
|
||||
};
|
||||
};
|
||||
|
||||
class SemaphoreVulkan
|
||||
{
|
||||
private:
|
||||
@@ -261,16 +279,17 @@ protected:
|
||||
GPUDeviceVulkan* _device;
|
||||
VkQueryPool _handle;
|
||||
|
||||
const VkQueryType _type;
|
||||
#if VULKAN_RESET_QUERY_POOLS
|
||||
Array<Range> _resetRanges;
|
||||
#endif
|
||||
|
||||
public:
|
||||
QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type);
|
||||
QueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type);
|
||||
~QueryPoolVulkan();
|
||||
|
||||
public:
|
||||
const GPUQueryType Type;
|
||||
|
||||
inline VkQueryPool GetHandle() const
|
||||
{
|
||||
return _handle;
|
||||
@@ -294,11 +313,11 @@ private:
|
||||
int32 _lastBeginIndex;
|
||||
|
||||
public:
|
||||
BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, VkQueryType type);
|
||||
BufferedQueryPoolVulkan(GPUDeviceVulkan* device, int32 capacity, GPUQueryType type);
|
||||
bool AcquireQuery(CmdBufferVulkan* cmdBuffer, uint32& resultIndex);
|
||||
void ReleaseQuery(uint32 queryIndex);
|
||||
void MarkQueryAsStarted(uint32 queryIndex);
|
||||
bool GetResults(GPUContextVulkan* context, uint32 index, uint64& result);
|
||||
bool GetResults(uint32 index, uint64& result);
|
||||
bool HasRoom() const;
|
||||
};
|
||||
|
||||
@@ -498,14 +517,13 @@ public:
|
||||
VkPhysicalDeviceFeatures PhysicalDeviceFeatures;
|
||||
VkPhysicalDeviceVulkan12Features PhysicalDeviceFeatures12;
|
||||
|
||||
Array<BufferedQueryPoolVulkan*> TimestampQueryPools;
|
||||
Array<BufferedQueryPoolVulkan*> OcclusionQueryPools;
|
||||
|
||||
Array<BufferedQueryPoolVulkan*> QueryPools;
|
||||
Array<GPUQueryVulkan> QueriesToRelease;
|
||||
#if VULKAN_RESET_QUERY_POOLS
|
||||
Array<QueryPoolVulkan*> QueriesToReset;
|
||||
#endif
|
||||
|
||||
BufferedQueryPoolVulkan* FindAvailableQueryPool(VkQueryType queryType);
|
||||
int32 GetOrCreateQueryPool(GPUQueryType type);
|
||||
RenderPassVulkan* GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout);
|
||||
FramebufferVulkan* GetOrCreateFramebuffer(FramebufferVulkan::Key& key, VkExtent2D& extent, uint32 layers);
|
||||
PipelineLayoutVulkan* GetOrCreateLayout(DescriptorSetLayoutInfoVulkan& key);
|
||||
@@ -553,6 +571,7 @@ public:
|
||||
void DrawBegin() override;
|
||||
void Dispose() override;
|
||||
void WaitForGPU() override;
|
||||
bool GetQueryResult(uint64 queryID, uint64& result, bool wait = false) override;
|
||||
GPUTexture* CreateTexture(const StringView& name) override;
|
||||
GPUShader* CreateShader(const StringView& name) override;
|
||||
GPUPipelineState* CreatePipelineState() override;
|
||||
|
||||
@@ -11,6 +11,78 @@ GPUTimerQueryVulkan::GPUTimerQueryVulkan(GPUDeviceVulkan* device)
|
||||
{
|
||||
}
|
||||
|
||||
#if !VULKAN_USE_TIMER_QUERIES
|
||||
|
||||
void GPUTimerQueryVulkan::OnReleaseGPU()
|
||||
{
|
||||
}
|
||||
|
||||
void GPUTimerQueryVulkan::Begin()
|
||||
{
|
||||
}
|
||||
|
||||
void GPUTimerQueryVulkan::End()
|
||||
{
|
||||
}
|
||||
|
||||
bool GPUTimerQueryVulkan::HasResult()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
float GPUTimerQueryVulkan::GetResult()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif GPU_VULKAN_QUERY_NEW
|
||||
|
||||
void GPUTimerQueryVulkan::OnReleaseGPU()
|
||||
{
|
||||
_hasResult = false;
|
||||
_endCalled = false;
|
||||
_timeDelta = 0.0f;
|
||||
}
|
||||
|
||||
void GPUTimerQueryVulkan::Begin()
|
||||
{
|
||||
const auto context = _device->GetMainContext();
|
||||
_query = context->BeginQuery(GPUQueryType::Timer);
|
||||
_hasResult = false;
|
||||
_endCalled = false;
|
||||
}
|
||||
|
||||
void GPUTimerQueryVulkan::End()
|
||||
{
|
||||
if (_endCalled)
|
||||
return;
|
||||
const auto context = _device->GetMainContext();
|
||||
context->EndQuery(_query);
|
||||
_endCalled = true;
|
||||
}
|
||||
|
||||
bool GPUTimerQueryVulkan::HasResult()
|
||||
{
|
||||
if (!_endCalled)
|
||||
return false;
|
||||
if (_hasResult)
|
||||
return true;
|
||||
uint64 result;
|
||||
return _device->GetQueryResult(_query, result, false);
|
||||
}
|
||||
|
||||
float GPUTimerQueryVulkan::GetResult()
|
||||
{
|
||||
if (_hasResult)
|
||||
return _timeDelta;
|
||||
uint64 result;
|
||||
_timeDelta = _device->GetQueryResult(_query, result, true) ? (float)((double)result / 1000.0) : 0.0f;
|
||||
_hasResult = true;
|
||||
return _timeDelta;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void GPUTimerQueryVulkan::Interrupt(CmdBufferVulkan* cmdBuffer)
|
||||
{
|
||||
if (!_interrupted)
|
||||
@@ -38,8 +110,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query)
|
||||
{
|
||||
if (query.Pool)
|
||||
{
|
||||
const auto context = (GPUContextVulkan*)_device->GetMainContext();
|
||||
if (query.Pool->GetResults(context, query.Index, query.Result))
|
||||
if (query.Pool->GetResults(query.Index, query.Result))
|
||||
{
|
||||
// Release query
|
||||
query.Pool->ReleaseQuery(query.Index);
|
||||
@@ -58,7 +129,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query)
|
||||
|
||||
void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const
|
||||
{
|
||||
auto pool = _device->FindAvailableQueryPool(VK_QUERY_TYPE_TIMESTAMP);
|
||||
auto pool = _device->QueryPools[_device->GetOrCreateQueryPool(GPUQueryType::Timer)];
|
||||
uint32 index;
|
||||
if (pool->AcquireQuery(cmdBuffer, index))
|
||||
{
|
||||
@@ -76,7 +147,6 @@ void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& quer
|
||||
|
||||
bool GPUTimerQueryVulkan::TryGetResult()
|
||||
{
|
||||
#if VULKAN_USE_QUERIES
|
||||
// Try get queries value (if not already)
|
||||
for (int32 i = 0; i < _queries.Count(); i++)
|
||||
{
|
||||
@@ -115,20 +185,12 @@ bool GPUTimerQueryVulkan::TryGetResult()
|
||||
e.End.Pool->ReleaseQuery(e.End.Index);
|
||||
}
|
||||
_queries.Clear();
|
||||
#else
|
||||
_timeDelta = 0.0f;
|
||||
_hasResult = true;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUTimerQueryVulkan::UseQueries()
|
||||
{
|
||||
#if VULKAN_USE_QUERIES
|
||||
return _device->PhysicalDeviceLimits.timestampComputeAndGraphics == VK_TRUE;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void GPUTimerQueryVulkan::OnReleaseGPU()
|
||||
@@ -150,7 +212,6 @@ void GPUTimerQueryVulkan::OnReleaseGPU()
|
||||
|
||||
void GPUTimerQueryVulkan::Begin()
|
||||
{
|
||||
#if VULKAN_USE_QUERIES
|
||||
if (UseQueries())
|
||||
{
|
||||
const auto context = (GPUContextVulkan*)_device->GetMainContext();
|
||||
@@ -162,12 +223,11 @@ void GPUTimerQueryVulkan::Begin()
|
||||
_queryIndex = 0;
|
||||
_interrupted = false;
|
||||
WriteTimestamp(cmdBuffer, e.Begin, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
||||
context->GetCmdBufferManager()->OnQueryBegin(this);
|
||||
context->GetCmdBufferManager()->OnTimerQueryBegin(this);
|
||||
|
||||
ASSERT(_queries.IsEmpty());
|
||||
_queries.Add(e);
|
||||
}
|
||||
#endif
|
||||
|
||||
_hasResult = false;
|
||||
_endCalled = false;
|
||||
@@ -178,7 +238,6 @@ void GPUTimerQueryVulkan::End()
|
||||
if (_endCalled)
|
||||
return;
|
||||
|
||||
#if VULKAN_USE_QUERIES
|
||||
if (UseQueries())
|
||||
{
|
||||
const auto context = (GPUContextVulkan*)_device->GetMainContext();
|
||||
@@ -188,9 +247,8 @@ void GPUTimerQueryVulkan::End()
|
||||
{
|
||||
WriteTimestamp(cmdBuffer, _queries[_queryIndex].End, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||
}
|
||||
context->GetCmdBufferManager()->OnQueryEnd(this);
|
||||
context->GetCmdBufferManager()->OnTimerQueryEnd(this);
|
||||
}
|
||||
#endif
|
||||
|
||||
_endCalled = true;
|
||||
}
|
||||
@@ -213,3 +271,5 @@ float GPUTimerQueryVulkan::GetResult()
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -13,6 +13,13 @@
|
||||
class GPUTimerQueryVulkan : public GPUResourceVulkan<GPUTimerQuery>
|
||||
{
|
||||
private:
|
||||
#if !VULKAN_USE_TIMER_QUERIES
|
||||
#elif GPU_VULKAN_QUERY_NEW
|
||||
bool _hasResult = false;
|
||||
bool _endCalled = false;
|
||||
float _timeDelta = 0.0f;
|
||||
uint64 _query = 0;
|
||||
#else
|
||||
struct Query
|
||||
{
|
||||
BufferedQueryPoolVulkan* Pool;
|
||||
@@ -32,6 +39,7 @@ private:
|
||||
float _timeDelta = 0.0f;
|
||||
int32 _queryIndex;
|
||||
Array<QueryPair, InlinedAllocation<8>> _queries;
|
||||
#endif
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
@@ -40,6 +48,7 @@ public:
|
||||
/// <param name="device">The graphics device.</param>
|
||||
GPUTimerQueryVulkan(GPUDeviceVulkan* device);
|
||||
|
||||
#if !GPU_VULKAN_QUERY_NEW
|
||||
public:
|
||||
/// <summary>
|
||||
/// Interrupts an in-progress query, allowing the command buffer to submitted. Interrupted queries must be resumed using Resume().
|
||||
@@ -58,6 +67,7 @@ private:
|
||||
void WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const;
|
||||
bool TryGetResult();
|
||||
bool UseQueries();
|
||||
#endif
|
||||
|
||||
public:
|
||||
// [GPUTimerQuery]
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#define VULKAN_BACK_BUFFERS_COUNT 3
|
||||
|
||||
// General/Validation Error:0 VK_ERROR_INITIALIZATION_FAILED: Could not create MTLCounterSampleBuffer for query pool of type VK_QUERY_TYPE_TIMESTAMP. Reverting to emulated behavior. (Error code 0): Cannot allocate sample buffer
|
||||
#define VULKAN_USE_QUERIES 0
|
||||
#define VULKAN_USE_TIMER_QUERIES 0
|
||||
|
||||
/// <summary>
|
||||
/// The implementation for the Vulkan API support for Mac platform.
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#define VULKAN_BACK_BUFFERS_COUNT 3
|
||||
|
||||
// General/Validation Error:0 VK_ERROR_INITIALIZATION_FAILED: Could not create MTLCounterSampleBuffer for query pool of type VK_QUERY_TYPE_TIMESTAMP. Reverting to emulated behavior. (Error code 0): Cannot allocate sample buffer
|
||||
#define VULKAN_USE_QUERIES 0
|
||||
#define VULKAN_USE_TIMER_QUERIES 0
|
||||
|
||||
/// <summary>
|
||||
/// The implementation for the Vulkan API support for iOS platform.
|
||||
|
||||
Reference in New Issue
Block a user