Add **GPU profiling support to Tracy integration**

This commit is contained in:
Wojtek Figat
2025-07-30 19:08:45 +02:00
parent 8fcbef863e
commit 5e4d564338
26 changed files with 2716 additions and 46 deletions

View File

@@ -247,7 +247,6 @@ int32 Engine::Main(const Char* cmdLine)
{
OnDraw();
Time::OnEndDraw();
FrameMark;
}
}
@@ -397,6 +396,11 @@ void Engine::OnLateUpdate()
void Engine::OnDraw()
{
#if COMPILE_WITH_PROFILER
// Auto-enable GPU events when Tracy got connected
if (!ProfilerGPU::EventsEnabled && TracyIsConnected)
ProfilerGPU::EventsEnabled = true;
#endif
PROFILE_CPU_NAMED("Draw");
// Begin frame rendering
@@ -411,6 +415,7 @@ void Engine::OnDraw()
device->Draw();
// End frame rendering
FrameMark;
#if COMPILE_WITH_PROFILER
ProfilerGPU::EndFrame();
#endif

View File

@@ -69,6 +69,10 @@ void GPUContext::FrameEnd()
FlushState();
}
void GPUContext::OnPresent()
{
}
void GPUContext::BindSR(int32 slot, GPUTexture* t)
{
ASSERT_LOW_LAYER(t == nullptr || t->ResidentMipLevels() == 0 || t->IsShaderResource());

View File

@@ -148,6 +148,11 @@ public:
/// </summary>
virtual void FrameEnd();
/// <summary>
/// Called after performing final swapchain presentation and submitting all GPU commands.
/// </summary>
virtual void OnPresent();
public:
#if GPU_ALLOW_PROFILE_EVENTS
/// <summary>

View File

@@ -646,6 +646,7 @@ void GPUDevice::DrawEnd()
const double presentEnd = Platform::GetTimeSeconds();
ProfilerGPU::OnPresentTime((float)((presentEnd - presentStart) * 1000.0));
#endif
GetMainContext()->OnPresent();
_wasVSyncUsed = anyVSync;
_isRendering = false;

View File

@@ -20,6 +20,12 @@ public abstract class GraphicsDeviceBaseModule : EngineModule
// Enables GPU diagnostic tools (debug layer etc.)
options.PublicDefinitions.Add("GPU_ENABLE_DIAGNOSTICS");
}
if (Profiler.Use(options) && tracy.GPU && true)
{
// Enables GPU profiling with Tracy
options.PrivateDefinitions.Add("GPU_ENABLE_TRACY");
}
}
/// <inheritdoc />

View File

@@ -65,10 +65,17 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte
_maxUASlots = GPU_MAX_UA_BINDED;
if (_device->GetRendererType() != RendererType::DirectX11)
_maxUASlots = 1;
#if GPU_ENABLE_TRACY
_tracyContext = tracy::CreateD3D11Context(device->GetDevice(), context);
#endif
}
GPUContextDX11::~GPUContextDX11()
{
#if GPU_ENABLE_TRACY
tracy::DestroyD3D11Context(_tracyContext);
#endif
#if GPU_ALLOW_PROFILE_EVENTS
SAFE_RELEASE(_userDefinedAnnotations);
#endif
@@ -139,16 +146,35 @@ void GPUContextDX11::FrameBegin()
_context->CSSetSamplers(0, ARRAY_COUNT(samplers), samplers);
}
void GPUContextDX11::OnPresent()
{
GPUContext::OnPresent();
#if GPU_ENABLE_TRACY
tracy::CollectD3D11Context(_tracyContext);
#endif
}
#if GPU_ALLOW_PROFILE_EVENTS
void GPUContextDX11::EventBegin(const Char* name)
{
if (_userDefinedAnnotations)
_userDefinedAnnotations->BeginEvent(name);
#if GPU_ENABLE_TRACY
char buffer[60];
int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer));
tracy::BeginD3D11ZoneScope(_tracyZone, _tracyContext, buffer, bufferSize);
#endif
}
void GPUContextDX11::EventEnd()
{
#if GPU_ENABLE_TRACY
tracy::EndD3D11ZoneScope(_tracyZone);
#endif
if (_userDefinedAnnotations)
_userDefinedAnnotations->EndEvent();
}

View File

@@ -6,6 +6,7 @@
#include "GPUDeviceDX11.h"
#include "GPUPipelineStateDX11.h"
#include "../IncludeDirectXHeaders.h"
#include <ThirdParty/tracy/tracy/TracyD3D11.hpp>
#if GRAPHICS_API_DIRECTX11
@@ -23,6 +24,10 @@ private:
ID3D11DeviceContext* _context;
#if GPU_ALLOW_PROFILE_EVENTS
ID3DUserDefinedAnnotation* _userDefinedAnnotations;
#endif
#if COMPILE_WITH_PROFILER
void* _tracyContext;
byte _tracyZone[TracyD3D11ZoneSize];
#endif
int32 _maxUASlots;
@@ -110,6 +115,7 @@ public:
// [GPUContext]
void FrameBegin() override;
void OnPresent() override;
#if GPU_ALLOW_PROFILE_EVENTS
void EventBegin(const Char* name) override;
void EventEnd() override;

View File

@@ -99,10 +99,16 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty
#if GPU_ENABLE_RESOURCE_NAMING
_commandList->SetName(TEXT("GPUContextDX12::CommandList"));
#endif
#if GPU_ENABLE_TRACY
_tracyContext = tracy::CreateD3D12Context(device->GetDevice(), _device->GetCommandQueue()->GetCommandQueue());
#endif
}
GPUContextDX12::~GPUContextDX12()
{
#if GPU_ENABLE_TRACY
tracy::DestroyD3D12Context(_tracyContext);
#endif
DX_SAFE_RELEASE_CHECK(_commandList, 0);
}
@@ -706,6 +712,15 @@ void GPUContextDX12::FrameEnd()
FrameFenceValues[0] = Execute(false);
}
void GPUContextDX12::OnPresent()
{
GPUContext::OnPresent();
#if GPU_ENABLE_TRACY
tracy::CollectD3D12Context(_tracyContext);
#endif
}
#if GPU_ALLOW_PROFILE_EVENTS
void GPUContextDX12::EventBegin(const Char* name)
@@ -713,10 +728,22 @@ void GPUContextDX12::EventBegin(const Char* name)
#if USE_PIX
PIXBeginEvent(_commandList, 0, name);
#endif
#if GPU_ENABLE_TRACY
char buffer[60];
int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer));
auto& zone = _tracyZones.AddOne();
tracy::BeginD3D12ZoneScope(zone.Data, _tracyContext, _commandList, buffer, bufferSize);
#endif
}
void GPUContextDX12::EventEnd()
{
#if GPU_ENABLE_TRACY
tracy::EndD3D12ZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
#if USE_PIX
PIXEndEvent(_commandList);
#endif

View File

@@ -6,6 +6,7 @@
#include "IShaderResourceDX12.h"
#include "DescriptorHeapDX12.h"
#include "../IncludeDirectXHeaders.h"
#include <ThirdParty/tracy/tracy/TracyD3D12.hpp>
#if GRAPHICS_API_DIRECTX12
@@ -71,6 +72,12 @@ private:
GPUConstantBufferDX12* _cbHandles[GPU_MAX_CB_BINDED];
GPUSamplerDX12* _samplers[GPU_MAX_SAMPLER_BINDED - GPU_STATIC_SAMPLERS_COUNT];
#if COMPILE_WITH_PROFILER
void* _tracyContext;
struct TracyZone { byte Data[TracyD3D12ZoneSize]; };
Array<TracyZone, InlinedAllocation<32>> _tracyZones;
#endif
public:
GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE type);
@@ -154,6 +161,7 @@ public:
// [GPUContext]
void FrameBegin() override;
void FrameEnd() override;
void OnPresent() override;
#if GPU_ALLOW_PROFILE_EVENTS
void EventBegin(const Char* name) override;
void EventEnd() override;

View File

@@ -49,10 +49,19 @@ void CmdBufferVulkan::End()
PROFILE_CPU();
ASSERT(IsOutsideRenderPass());
#if GPU_ALLOW_PROFILE_EVENTS && VK_EXT_debug_utils
#if GPU_ALLOW_PROFILE_EVENTS
// End remaining events
while (_eventsBegin--)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
{
#if VK_EXT_debug_utils
if (vkCmdEndDebugUtilsLabelEXT)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#endif
#if GPU_ENABLE_TRACY
tracy::EndVkZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
}
#endif
VALIDATE_VULKAN_RESULT(vkEndCommandBuffer(GetHandle()));
@@ -85,39 +94,43 @@ void CmdBufferVulkan::EndRenderPass()
#if GPU_ALLOW_PROFILE_EVENTS
void CmdBufferVulkan::BeginEvent(const Char* name)
void CmdBufferVulkan::BeginEvent(const Char* name, void* tracyContext)
{
#if VK_EXT_debug_utils
if (!vkCmdBeginDebugUtilsLabelEXT)
return;
_eventsBegin++;
// Convert to ANSI
char buffer[101];
int32 i = 0;
while (i < 100 && name[i])
{
buffer[i] = (char)name[i];
i++;
}
buffer[i] = 0;
char buffer[60];
int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer));
VkDebugUtilsLabelEXT label;
RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT);
label.pLabelName = buffer;
vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label);
#if GPU_ENABLE_TRACY
auto& zone = _tracyZones.AddOne();
tracy::BeginVkZoneScope(zone.Data, tracyContext, GetHandle(), buffer, bufferSize);
#endif
#if VK_EXT_debug_utils
if (vkCmdBeginDebugUtilsLabelEXT)
{
VkDebugUtilsLabelEXT label;
RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT);
label.pLabelName = buffer;
vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label);
}
#endif
}
void CmdBufferVulkan::EndEvent()
{
#if VK_EXT_debug_utils
if (_eventsBegin == 0 || !vkCmdEndDebugUtilsLabelEXT)
if (_eventsBegin == 0)
return;
_eventsBegin--;
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#if VK_EXT_debug_utils
if (vkCmdEndDebugUtilsLabelEXT)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#endif
#if GPU_ENABLE_TRACY
tracy::EndVkZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
}

View File

@@ -5,6 +5,7 @@
#include "GPUDeviceVulkan.h"
#include "Engine/Core/Types/BaseTypes.h"
#include "Engine/Core/Collections/Array.h"
#include <ThirdParty/tracy/tracy/TracyVulkan.hpp>
#if GRAPHICS_API_VULKAN
@@ -42,6 +43,8 @@ private:
FenceVulkan* _fence;
#if GPU_ALLOW_PROFILE_EVENTS
int32 _eventsBegin = 0;
struct TracyZone { byte Data[TracyVulkanZoneSize]; };
Array<TracyZone, InlinedAllocation<32>> _tracyZones;
#endif
// The latest value when command buffer was submitted.
@@ -129,7 +132,7 @@ public:
}
#if GPU_ALLOW_PROFILE_EVENTS
void BeginEvent(const Char* name);
void BeginEvent(const Char* name, void* tracyContext);
void EndEvent();
#endif

View File

@@ -4,6 +4,7 @@
#include "GPUContextVulkan.h"
#include "CmdBufferVulkan.h"
#include "GPUAdapterVulkan.h"
#include "RenderToolsVulkan.h"
#include "Engine/Core/Math/Color.h"
#include "Engine/Core/Math/Rectangle.h"
@@ -15,6 +16,7 @@
#include "Engine/Profiler/RenderStats.h"
#include "GPUShaderProgramVulkan.h"
#include "GPUTextureVulkan.h"
#include "QueueVulkan.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Debug/Exceptions/NotImplementedException.h"
@@ -107,10 +109,37 @@ GPUContextVulkan::GPUContextVulkan(GPUDeviceVulkan* device, QueueVulkan* queue)
_handlesSizes[(int32)SpirvShaderResourceBindingType::SRV] = GPU_MAX_SR_BINDED;
_handlesSizes[(int32)SpirvShaderResourceBindingType::UAV] = GPU_MAX_UA_BINDED;
#endif
#if GPU_ENABLE_TRACY
#if VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset
// Use calibrated timestamps extension
if (vkResetQueryPoolEXT && vkGetCalibratedTimestampsEXT)
{
_tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, vkResetQueryPoolEXT, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT);
}
else
#endif
{
// Use immediate command buffer for Tracy initialization
VkCommandBufferAllocateInfo cmdInfo;
RenderToolsVulkan::ZeroStruct(cmdInfo, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO);
cmdInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmdInfo.commandPool = _cmdBufferManager->GetHandle();
cmdInfo.commandBufferCount = 1;
VkCommandBuffer tracyCmdBuffer;
vkAllocateCommandBuffers(_device->Device, &cmdInfo, &tracyCmdBuffer);
_tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, _queue->GetHandle(), tracyCmdBuffer, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT);
vkQueueWaitIdle(_queue->GetHandle());
vkFreeCommandBuffers(_device->Device, _cmdBufferManager->GetHandle(), 1, &tracyCmdBuffer);
}
#endif
}
GPUContextVulkan::~GPUContextVulkan()
{
#if GPU_ENABLE_TRACY
tracy::DestroyVkContext(_tracyContext);
#endif
for (int32 i = 0; i < _descriptorPools.Count(); i++)
{
_descriptorPools[i].ClearDelete();
@@ -679,15 +708,9 @@ void GPUContextVulkan::OnDrawCall()
// Bind descriptors sets to the graphics pipeline
if (pipelineState->HasDescriptorsPerStageMask)
{
vkCmdBindDescriptorSets(
cmdBuffer->GetHandle(),
VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineState->GetLayout()->Handle,
0,
pipelineState->DescriptorSetHandles.Count(),
pipelineState->DescriptorSetHandles.Get(),
pipelineState->DynamicOffsets.Count(),
pipelineState->DynamicOffsets.Get());
auto& descriptorSets = pipelineState->DescriptorSetHandles;
auto& dynamicOffsets = pipelineState->DynamicOffsets;
vkCmdBindDescriptorSets(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineState->GetLayout()->Handle, 0, descriptorSets.Count(), descriptorSets.Get(), dynamicOffsets.Count(), dynamicOffsets.Get());
}
_rtDirtyFlag = false;
@@ -748,6 +771,11 @@ void GPUContextVulkan::FrameEnd()
// Execute any queued layout transitions that weren't already handled by the render pass
FlushBarriers();
#if GPU_ENABLE_TRACY
if (cmdBuffer)
tracy::CollectVkContext(_tracyContext, cmdBuffer->GetHandle());
#endif
// Base
GPUContext::FrameEnd();
}
@@ -757,7 +785,12 @@ void GPUContextVulkan::FrameEnd()
void GPUContextVulkan::EventBegin(const Char* name)
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
cmdBuffer->BeginEvent(name);
#if COMPILE_WITH_PROFILER
void* tracyContext = _tracyContext;
#else
void* tracyContext = nullptr;
#endif
cmdBuffer->BeginEvent(name, tracyContext);
}
void GPUContextVulkan::EventEnd()

View File

@@ -94,6 +94,9 @@ private:
#if ENABLE_ASSERTION
uint32 _handlesSizes[(int32)SpirvShaderResourceBindingType::MAX];
#endif
#if COMPILE_WITH_PROFILER
void* _tracyContext;
#endif
typedef Array<DescriptorPoolVulkan*> DescriptorPoolArray;
Dictionary<uint32, DescriptorPoolArray> _descriptorPools;

View File

@@ -62,6 +62,10 @@ static const char* GDeviceExtensions[] =
#endif
#if VK_KHR_sampler_mirror_clamp_to_edge
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
#endif
#if GPU_ENABLE_TRACY && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset
VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
#endif
nullptr
};

View File

@@ -424,6 +424,7 @@ GPUSwapChainVulkan::Status GPUSwapChainVulkan::Present(QueueVulkan* presentQueue
{
if (_currentImageIndex == -1)
return Status::Ok;
PROFILE_CPU_NAMED("vkQueuePresentKHR");
VkPresentInfoKHR presentInfo;
RenderToolsVulkan::ZeroStruct(presentInfo, VK_STRUCTURE_TYPE_PRESENT_INFO_KHR);
@@ -506,7 +507,7 @@ int32 GPUSwapChainVulkan::TryPresent(Function<int32(GPUSwapChainVulkan*, void*)>
int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore)
{
PROFILE_CPU();
PROFILE_CPU_NAMED("vkAcquireNextImageKHR");
ASSERT(_swapChain && _backBuffers.HasItems());
uint32 imageIndex = _currentImageIndex;
@@ -514,13 +515,7 @@ int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore)
_semaphoreIndex = (_semaphoreIndex + 1) % _backBuffers.Count();
const auto semaphore = _backBuffers[_semaphoreIndex].ImageAcquiredSemaphore;
const VkResult result = vkAcquireNextImageKHR(
_device->Device,
_swapChain,
UINT64_MAX,
semaphore->GetHandle(),
VK_NULL_HANDLE,
&imageIndex);
const VkResult result = vkAcquireNextImageKHR(_device->Device, _swapChain, UINT64_MAX, semaphore->GetHandle(), VK_NULL_HANDLE, &imageIndex);
if (result == VK_ERROR_OUT_OF_DATE_KHR)
{
_semaphoreIndex = prevSemaphoreIndex;

View File

@@ -16,6 +16,18 @@ constexpr char DirectorySeparatorChar = '\\';
constexpr char AltDirectorySeparatorChar = '/';
constexpr char VolumeSeparatorChar = ':';
int32 StringUtils::Copy(char* dst, const Char* src, int32 count)
{
int32 i = 0;
while (i < count && src[i])
{
dst[i] = (char)src[i];
i++;
}
dst[i] = 0;
return i;
}
const Char* StringUtils::FindIgnoreCase(const Char* str, const Char* toFind)
{
if (toFind == nullptr || str == nullptr)

View File

@@ -125,6 +125,9 @@ public:
// Copies the string (count is maximum amount of characters to copy).
static Char* Copy(Char* dst, const Char* src, int32 count);
// Copies the string (count is maximum amount of characters to copy). Returns amount of copied elements (excluding null terminator character).
static int32 Copy(char* dst, const Char* src, int32 count);
// Finds specific sub-string in the input string. Returns the first found position in the input string or nulll if failed.
static const Char* Find(const Char* str, const Char* toFind);