Add **GPU profiling support to Tracy integration**

This commit is contained in:
Wojtek Figat
2025-07-30 19:08:45 +02:00
parent 8fcbef863e
commit 5e4d564338
26 changed files with 2716 additions and 46 deletions

View File

@@ -247,7 +247,6 @@ int32 Engine::Main(const Char* cmdLine)
{
OnDraw();
Time::OnEndDraw();
FrameMark;
}
}
@@ -397,6 +396,11 @@ void Engine::OnLateUpdate()
void Engine::OnDraw()
{
#if COMPILE_WITH_PROFILER
// Auto-enable GPU events when Tracy got connected
if (!ProfilerGPU::EventsEnabled && TracyIsConnected)
ProfilerGPU::EventsEnabled = true;
#endif
PROFILE_CPU_NAMED("Draw");
// Begin frame rendering
@@ -411,6 +415,7 @@ void Engine::OnDraw()
device->Draw();
// End frame rendering
FrameMark;
#if COMPILE_WITH_PROFILER
ProfilerGPU::EndFrame();
#endif

View File

@@ -69,6 +69,10 @@ void GPUContext::FrameEnd()
FlushState();
}
void GPUContext::OnPresent()
{
}
void GPUContext::BindSR(int32 slot, GPUTexture* t)
{
ASSERT_LOW_LAYER(t == nullptr || t->ResidentMipLevels() == 0 || t->IsShaderResource());

View File

@@ -148,6 +148,11 @@ public:
/// </summary>
virtual void FrameEnd();
/// <summary>
/// Called after performing final swapchain presentation and submitting all GPU commands.
/// </summary>
virtual void OnPresent();
public:
#if GPU_ALLOW_PROFILE_EVENTS
/// <summary>

View File

@@ -646,6 +646,7 @@ void GPUDevice::DrawEnd()
const double presentEnd = Platform::GetTimeSeconds();
ProfilerGPU::OnPresentTime((float)((presentEnd - presentStart) * 1000.0));
#endif
GetMainContext()->OnPresent();
_wasVSyncUsed = anyVSync;
_isRendering = false;

View File

@@ -20,6 +20,12 @@ public abstract class GraphicsDeviceBaseModule : EngineModule
// Enables GPU diagnostic tools (debug layer etc.)
options.PublicDefinitions.Add("GPU_ENABLE_DIAGNOSTICS");
}
if (Profiler.Use(options) && tracy.GPU && true)
{
// Enables GPU profiling with Tracy
options.PrivateDefinitions.Add("GPU_ENABLE_TRACY");
}
}
/// <inheritdoc />

View File

@@ -65,10 +65,17 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte
_maxUASlots = GPU_MAX_UA_BINDED;
if (_device->GetRendererType() != RendererType::DirectX11)
_maxUASlots = 1;
#if GPU_ENABLE_TRACY
_tracyContext = tracy::CreateD3D11Context(device->GetDevice(), context);
#endif
}
GPUContextDX11::~GPUContextDX11()
{
#if GPU_ENABLE_TRACY
tracy::DestroyD3D11Context(_tracyContext);
#endif
#if GPU_ALLOW_PROFILE_EVENTS
SAFE_RELEASE(_userDefinedAnnotations);
#endif
@@ -139,16 +146,35 @@ void GPUContextDX11::FrameBegin()
_context->CSSetSamplers(0, ARRAY_COUNT(samplers), samplers);
}
void GPUContextDX11::OnPresent()
{
GPUContext::OnPresent();
#if GPU_ENABLE_TRACY
tracy::CollectD3D11Context(_tracyContext);
#endif
}
#if GPU_ALLOW_PROFILE_EVENTS
void GPUContextDX11::EventBegin(const Char* name)
{
if (_userDefinedAnnotations)
_userDefinedAnnotations->BeginEvent(name);
#if GPU_ENABLE_TRACY
char buffer[60];
int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer));
tracy::BeginD3D11ZoneScope(_tracyZone, _tracyContext, buffer, bufferSize);
#endif
}
void GPUContextDX11::EventEnd()
{
#if GPU_ENABLE_TRACY
tracy::EndD3D11ZoneScope(_tracyZone);
#endif
if (_userDefinedAnnotations)
_userDefinedAnnotations->EndEvent();
}

View File

@@ -6,6 +6,7 @@
#include "GPUDeviceDX11.h"
#include "GPUPipelineStateDX11.h"
#include "../IncludeDirectXHeaders.h"
#include <ThirdParty/tracy/tracy/TracyD3D11.hpp>
#if GRAPHICS_API_DIRECTX11
@@ -23,6 +24,10 @@ private:
ID3D11DeviceContext* _context;
#if GPU_ALLOW_PROFILE_EVENTS
ID3DUserDefinedAnnotation* _userDefinedAnnotations;
#endif
#if COMPILE_WITH_PROFILER
void* _tracyContext;
byte _tracyZone[TracyD3D11ZoneSize];
#endif
int32 _maxUASlots;
@@ -110,6 +115,7 @@ public:
// [GPUContext]
void FrameBegin() override;
void OnPresent() override;
#if GPU_ALLOW_PROFILE_EVENTS
void EventBegin(const Char* name) override;
void EventEnd() override;

View File

@@ -99,10 +99,16 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty
#if GPU_ENABLE_RESOURCE_NAMING
_commandList->SetName(TEXT("GPUContextDX12::CommandList"));
#endif
#if GPU_ENABLE_TRACY
_tracyContext = tracy::CreateD3D12Context(device->GetDevice(), _device->GetCommandQueue()->GetCommandQueue());
#endif
}
GPUContextDX12::~GPUContextDX12()
{
#if GPU_ENABLE_TRACY
tracy::DestroyD3D12Context(_tracyContext);
#endif
DX_SAFE_RELEASE_CHECK(_commandList, 0);
}
@@ -706,6 +712,15 @@ void GPUContextDX12::FrameEnd()
FrameFenceValues[0] = Execute(false);
}
void GPUContextDX12::OnPresent()
{
GPUContext::OnPresent();
#if GPU_ENABLE_TRACY
tracy::CollectD3D12Context(_tracyContext);
#endif
}
#if GPU_ALLOW_PROFILE_EVENTS
void GPUContextDX12::EventBegin(const Char* name)
@@ -713,10 +728,22 @@ void GPUContextDX12::EventBegin(const Char* name)
#if USE_PIX
PIXBeginEvent(_commandList, 0, name);
#endif
#if GPU_ENABLE_TRACY
char buffer[60];
int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer));
auto& zone = _tracyZones.AddOne();
tracy::BeginD3D12ZoneScope(zone.Data, _tracyContext, _commandList, buffer, bufferSize);
#endif
}
void GPUContextDX12::EventEnd()
{
#if GPU_ENABLE_TRACY
tracy::EndD3D12ZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
#if USE_PIX
PIXEndEvent(_commandList);
#endif

View File

@@ -6,6 +6,7 @@
#include "IShaderResourceDX12.h"
#include "DescriptorHeapDX12.h"
#include "../IncludeDirectXHeaders.h"
#include <ThirdParty/tracy/tracy/TracyD3D12.hpp>
#if GRAPHICS_API_DIRECTX12
@@ -71,6 +72,12 @@ private:
GPUConstantBufferDX12* _cbHandles[GPU_MAX_CB_BINDED];
GPUSamplerDX12* _samplers[GPU_MAX_SAMPLER_BINDED - GPU_STATIC_SAMPLERS_COUNT];
#if COMPILE_WITH_PROFILER
void* _tracyContext;
struct TracyZone { byte Data[TracyD3D12ZoneSize]; };
Array<TracyZone, InlinedAllocation<32>> _tracyZones;
#endif
public:
GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE type);
@@ -154,6 +161,7 @@ public:
// [GPUContext]
void FrameBegin() override;
void FrameEnd() override;
void OnPresent() override;
#if GPU_ALLOW_PROFILE_EVENTS
void EventBegin(const Char* name) override;
void EventEnd() override;

View File

@@ -49,10 +49,19 @@ void CmdBufferVulkan::End()
PROFILE_CPU();
ASSERT(IsOutsideRenderPass());
#if GPU_ALLOW_PROFILE_EVENTS && VK_EXT_debug_utils
#if GPU_ALLOW_PROFILE_EVENTS
// End remaining events
while (_eventsBegin--)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
{
#if VK_EXT_debug_utils
if (vkCmdEndDebugUtilsLabelEXT)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#endif
#if GPU_ENABLE_TRACY
tracy::EndVkZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
}
#endif
VALIDATE_VULKAN_RESULT(vkEndCommandBuffer(GetHandle()));
@@ -85,39 +94,43 @@ void CmdBufferVulkan::EndRenderPass()
#if GPU_ALLOW_PROFILE_EVENTS
void CmdBufferVulkan::BeginEvent(const Char* name)
void CmdBufferVulkan::BeginEvent(const Char* name, void* tracyContext)
{
#if VK_EXT_debug_utils
if (!vkCmdBeginDebugUtilsLabelEXT)
return;
_eventsBegin++;
// Convert to ANSI
char buffer[101];
int32 i = 0;
while (i < 100 && name[i])
{
buffer[i] = (char)name[i];
i++;
}
buffer[i] = 0;
char buffer[60];
int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer));
VkDebugUtilsLabelEXT label;
RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT);
label.pLabelName = buffer;
vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label);
#if GPU_ENABLE_TRACY
auto& zone = _tracyZones.AddOne();
tracy::BeginVkZoneScope(zone.Data, tracyContext, GetHandle(), buffer, bufferSize);
#endif
#if VK_EXT_debug_utils
if (vkCmdBeginDebugUtilsLabelEXT)
{
VkDebugUtilsLabelEXT label;
RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT);
label.pLabelName = buffer;
vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label);
}
#endif
}
void CmdBufferVulkan::EndEvent()
{
#if VK_EXT_debug_utils
if (_eventsBegin == 0 || !vkCmdEndDebugUtilsLabelEXT)
if (_eventsBegin == 0)
return;
_eventsBegin--;
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#if VK_EXT_debug_utils
if (vkCmdEndDebugUtilsLabelEXT)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#endif
#if GPU_ENABLE_TRACY
tracy::EndVkZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
}

View File

@@ -5,6 +5,7 @@
#include "GPUDeviceVulkan.h"
#include "Engine/Core/Types/BaseTypes.h"
#include "Engine/Core/Collections/Array.h"
#include <ThirdParty/tracy/tracy/TracyVulkan.hpp>
#if GRAPHICS_API_VULKAN
@@ -42,6 +43,8 @@ private:
FenceVulkan* _fence;
#if GPU_ALLOW_PROFILE_EVENTS
int32 _eventsBegin = 0;
struct TracyZone { byte Data[TracyVulkanZoneSize]; };
Array<TracyZone, InlinedAllocation<32>> _tracyZones;
#endif
// The latest value when command buffer was submitted.
@@ -129,7 +132,7 @@ public:
}
#if GPU_ALLOW_PROFILE_EVENTS
void BeginEvent(const Char* name);
void BeginEvent(const Char* name, void* tracyContext);
void EndEvent();
#endif

View File

@@ -4,6 +4,7 @@
#include "GPUContextVulkan.h"
#include "CmdBufferVulkan.h"
#include "GPUAdapterVulkan.h"
#include "RenderToolsVulkan.h"
#include "Engine/Core/Math/Color.h"
#include "Engine/Core/Math/Rectangle.h"
@@ -15,6 +16,7 @@
#include "Engine/Profiler/RenderStats.h"
#include "GPUShaderProgramVulkan.h"
#include "GPUTextureVulkan.h"
#include "QueueVulkan.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Debug/Exceptions/NotImplementedException.h"
@@ -107,10 +109,37 @@ GPUContextVulkan::GPUContextVulkan(GPUDeviceVulkan* device, QueueVulkan* queue)
_handlesSizes[(int32)SpirvShaderResourceBindingType::SRV] = GPU_MAX_SR_BINDED;
_handlesSizes[(int32)SpirvShaderResourceBindingType::UAV] = GPU_MAX_UA_BINDED;
#endif
#if GPU_ENABLE_TRACY
#if VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset
// Use calibrated timestamps extension
if (vkResetQueryPoolEXT && vkGetCalibratedTimestampsEXT)
{
_tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, vkResetQueryPoolEXT, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT);
}
else
#endif
{
// Use immediate command buffer for Tracy initialization
VkCommandBufferAllocateInfo cmdInfo;
RenderToolsVulkan::ZeroStruct(cmdInfo, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO);
cmdInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmdInfo.commandPool = _cmdBufferManager->GetHandle();
cmdInfo.commandBufferCount = 1;
VkCommandBuffer tracyCmdBuffer;
vkAllocateCommandBuffers(_device->Device, &cmdInfo, &tracyCmdBuffer);
_tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, _queue->GetHandle(), tracyCmdBuffer, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT);
vkQueueWaitIdle(_queue->GetHandle());
vkFreeCommandBuffers(_device->Device, _cmdBufferManager->GetHandle(), 1, &tracyCmdBuffer);
}
#endif
}
GPUContextVulkan::~GPUContextVulkan()
{
#if GPU_ENABLE_TRACY
tracy::DestroyVkContext(_tracyContext);
#endif
for (int32 i = 0; i < _descriptorPools.Count(); i++)
{
_descriptorPools[i].ClearDelete();
@@ -679,15 +708,9 @@ void GPUContextVulkan::OnDrawCall()
// Bind descriptors sets to the graphics pipeline
if (pipelineState->HasDescriptorsPerStageMask)
{
vkCmdBindDescriptorSets(
cmdBuffer->GetHandle(),
VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineState->GetLayout()->Handle,
0,
pipelineState->DescriptorSetHandles.Count(),
pipelineState->DescriptorSetHandles.Get(),
pipelineState->DynamicOffsets.Count(),
pipelineState->DynamicOffsets.Get());
auto& descriptorSets = pipelineState->DescriptorSetHandles;
auto& dynamicOffsets = pipelineState->DynamicOffsets;
vkCmdBindDescriptorSets(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineState->GetLayout()->Handle, 0, descriptorSets.Count(), descriptorSets.Get(), dynamicOffsets.Count(), dynamicOffsets.Get());
}
_rtDirtyFlag = false;
@@ -748,6 +771,11 @@ void GPUContextVulkan::FrameEnd()
// Execute any queued layout transitions that weren't already handled by the render pass
FlushBarriers();
#if GPU_ENABLE_TRACY
if (cmdBuffer)
tracy::CollectVkContext(_tracyContext, cmdBuffer->GetHandle());
#endif
// Base
GPUContext::FrameEnd();
}
@@ -757,7 +785,12 @@ void GPUContextVulkan::FrameEnd()
void GPUContextVulkan::EventBegin(const Char* name)
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
cmdBuffer->BeginEvent(name);
#if COMPILE_WITH_PROFILER
void* tracyContext = _tracyContext;
#else
void* tracyContext = nullptr;
#endif
cmdBuffer->BeginEvent(name, tracyContext);
}
void GPUContextVulkan::EventEnd()

View File

@@ -94,6 +94,9 @@ private:
#if ENABLE_ASSERTION
uint32 _handlesSizes[(int32)SpirvShaderResourceBindingType::MAX];
#endif
#if COMPILE_WITH_PROFILER
void* _tracyContext;
#endif
typedef Array<DescriptorPoolVulkan*> DescriptorPoolArray;
Dictionary<uint32, DescriptorPoolArray> _descriptorPools;

View File

@@ -62,6 +62,10 @@ static const char* GDeviceExtensions[] =
#endif
#if VK_KHR_sampler_mirror_clamp_to_edge
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
#endif
#if GPU_ENABLE_TRACY && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset
VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
#endif
nullptr
};

View File

@@ -424,6 +424,7 @@ GPUSwapChainVulkan::Status GPUSwapChainVulkan::Present(QueueVulkan* presentQueue
{
if (_currentImageIndex == -1)
return Status::Ok;
PROFILE_CPU_NAMED("vkQueuePresentKHR");
VkPresentInfoKHR presentInfo;
RenderToolsVulkan::ZeroStruct(presentInfo, VK_STRUCTURE_TYPE_PRESENT_INFO_KHR);
@@ -506,7 +507,7 @@ int32 GPUSwapChainVulkan::TryPresent(Function<int32(GPUSwapChainVulkan*, void*)>
int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore)
{
PROFILE_CPU();
PROFILE_CPU_NAMED("vkAcquireNextImageKHR");
ASSERT(_swapChain && _backBuffers.HasItems());
uint32 imageIndex = _currentImageIndex;
@@ -514,13 +515,7 @@ int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore)
_semaphoreIndex = (_semaphoreIndex + 1) % _backBuffers.Count();
const auto semaphore = _backBuffers[_semaphoreIndex].ImageAcquiredSemaphore;
const VkResult result = vkAcquireNextImageKHR(
_device->Device,
_swapChain,
UINT64_MAX,
semaphore->GetHandle(),
VK_NULL_HANDLE,
&imageIndex);
const VkResult result = vkAcquireNextImageKHR(_device->Device, _swapChain, UINT64_MAX, semaphore->GetHandle(), VK_NULL_HANDLE, &imageIndex);
if (result == VK_ERROR_OUT_OF_DATE_KHR)
{
_semaphoreIndex = prevSemaphoreIndex;

View File

@@ -16,6 +16,18 @@ constexpr char DirectorySeparatorChar = '\\';
constexpr char AltDirectorySeparatorChar = '/';
constexpr char VolumeSeparatorChar = ':';
int32 StringUtils::Copy(char* dst, const Char* src, int32 count)
{
int32 i = 0;
while (i < count && src[i])
{
dst[i] = (char)src[i];
i++;
}
dst[i] = 0;
return i;
}
const Char* StringUtils::FindIgnoreCase(const Char* str, const Char* toFind)
{
if (toFind == nullptr || str == nullptr)

View File

@@ -125,6 +125,9 @@ public:
// Copies the string (count is maximum amount of characters to copy).
static Char* Copy(Char* dst, const Char* src, int32 count);
// Copies the string (count is maximum amount of characters to copy). Returns amount of copied elements (excluding null terminator character).
static int32 Copy(char* dst, const Char* src, int32 count);
// Finds specific sub-string in the input string. Returns the first found position in the input string or nulll if failed.
static const Char* Find(const Char* str, const Char* toFind);

View File

@@ -1400,6 +1400,7 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
# endif
#endif
TRACY_API bool IsConnected() { return GetProfiler().IsConnected(); }
TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; }
TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; }

View File

@@ -0,0 +1,690 @@
#ifndef __TRACYCOLOR_HPP__
#define __TRACYCOLOR_HPP__
namespace tracy
{
struct Color
{
enum ColorType
{
Snow = 0xfffafa,
GhostWhite = 0xf8f8ff,
WhiteSmoke = 0xf5f5f5,
Gainsboro = 0xdcdcdc,
FloralWhite = 0xfffaf0,
OldLace = 0xfdf5e6,
Linen = 0xfaf0e6,
AntiqueWhite = 0xfaebd7,
PapayaWhip = 0xffefd5,
BlanchedAlmond = 0xffebcd,
Bisque = 0xffe4c4,
PeachPuff = 0xffdab9,
NavajoWhite = 0xffdead,
Moccasin = 0xffe4b5,
Cornsilk = 0xfff8dc,
Ivory = 0xfffff0,
LemonChiffon = 0xfffacd,
Seashell = 0xfff5ee,
Honeydew = 0xf0fff0,
MintCream = 0xf5fffa,
Azure = 0xf0ffff,
AliceBlue = 0xf0f8ff,
Lavender = 0xe6e6fa,
LavenderBlush = 0xfff0f5,
MistyRose = 0xffe4e1,
White = 0xffffff,
Black = 0x000000,
DarkSlateGray = 0x2f4f4f,
DarkSlateGrey = 0x2f4f4f,
DimGray = 0x696969,
DimGrey = 0x696969,
SlateGray = 0x708090,
SlateGrey = 0x708090,
LightSlateGray = 0x778899,
LightSlateGrey = 0x778899,
Gray = 0xbebebe,
Grey = 0xbebebe,
X11Gray = 0xbebebe,
X11Grey = 0xbebebe,
WebGray = 0x808080,
WebGrey = 0x808080,
LightGrey = 0xd3d3d3,
LightGray = 0xd3d3d3,
MidnightBlue = 0x191970,
Navy = 0x000080,
NavyBlue = 0x000080,
CornflowerBlue = 0x6495ed,
DarkSlateBlue = 0x483d8b,
SlateBlue = 0x6a5acd,
MediumSlateBlue = 0x7b68ee,
LightSlateBlue = 0x8470ff,
MediumBlue = 0x0000cd,
RoyalBlue = 0x4169e1,
Blue = 0x0000ff,
DodgerBlue = 0x1e90ff,
DeepSkyBlue = 0x00bfff,
SkyBlue = 0x87ceeb,
LightSkyBlue = 0x87cefa,
SteelBlue = 0x4682b4,
LightSteelBlue = 0xb0c4de,
LightBlue = 0xadd8e6,
PowderBlue = 0xb0e0e6,
PaleTurquoise = 0xafeeee,
DarkTurquoise = 0x00ced1,
MediumTurquoise = 0x48d1cc,
Turquoise = 0x40e0d0,
Cyan = 0x00ffff,
Aqua = 0x00ffff,
LightCyan = 0xe0ffff,
CadetBlue = 0x5f9ea0,
MediumAquamarine = 0x66cdaa,
Aquamarine = 0x7fffd4,
DarkGreen = 0x006400,
DarkOliveGreen = 0x556b2f,
DarkSeaGreen = 0x8fbc8f,
SeaGreen = 0x2e8b57,
MediumSeaGreen = 0x3cb371,
LightSeaGreen = 0x20b2aa,
PaleGreen = 0x98fb98,
SpringGreen = 0x00ff7f,
LawnGreen = 0x7cfc00,
Green = 0x00ff00,
Lime = 0x00ff00,
X11Green = 0x00ff00,
WebGreen = 0x008000,
Chartreuse = 0x7fff00,
MediumSpringGreen = 0x00fa9a,
GreenYellow = 0xadff2f,
LimeGreen = 0x32cd32,
YellowGreen = 0x9acd32,
ForestGreen = 0x228b22,
OliveDrab = 0x6b8e23,
DarkKhaki = 0xbdb76b,
Khaki = 0xf0e68c,
PaleGoldenrod = 0xeee8aa,
LightGoldenrodYellow = 0xfafad2,
LightYellow = 0xffffe0,
Yellow = 0xffff00,
Gold = 0xffd700,
LightGoldenrod = 0xeedd82,
Goldenrod = 0xdaa520,
DarkGoldenrod = 0xb8860b,
RosyBrown = 0xbc8f8f,
IndianRed = 0xcd5c5c,
SaddleBrown = 0x8b4513,
Sienna = 0xa0522d,
Peru = 0xcd853f,
Burlywood = 0xdeb887,
Beige = 0xf5f5dc,
Wheat = 0xf5deb3,
SandyBrown = 0xf4a460,
Tan = 0xd2b48c,
Chocolate = 0xd2691e,
Firebrick = 0xb22222,
Brown = 0xa52a2a,
DarkSalmon = 0xe9967a,
Salmon = 0xfa8072,
LightSalmon = 0xffa07a,
Orange = 0xffa500,
DarkOrange = 0xff8c00,
Coral = 0xff7f50,
LightCoral = 0xf08080,
Tomato = 0xff6347,
OrangeRed = 0xff4500,
Red = 0xff0000,
HotPink = 0xff69b4,
DeepPink = 0xff1493,
Pink = 0xffc0cb,
LightPink = 0xffb6c1,
PaleVioletRed = 0xdb7093,
Maroon = 0xb03060,
X11Maroon = 0xb03060,
WebMaroon = 0x800000,
MediumVioletRed = 0xc71585,
VioletRed = 0xd02090,
Magenta = 0xff00ff,
Fuchsia = 0xff00ff,
Violet = 0xee82ee,
Plum = 0xdda0dd,
Orchid = 0xda70d6,
MediumOrchid = 0xba55d3,
DarkOrchid = 0x9932cc,
DarkViolet = 0x9400d3,
BlueViolet = 0x8a2be2,
Purple = 0xa020f0,
X11Purple = 0xa020f0,
WebPurple = 0x800080,
MediumPurple = 0x9370db,
Thistle = 0xd8bfd8,
Snow1 = 0xfffafa,
Snow2 = 0xeee9e9,
Snow3 = 0xcdc9c9,
Snow4 = 0x8b8989,
Seashell1 = 0xfff5ee,
Seashell2 = 0xeee5de,
Seashell3 = 0xcdc5bf,
Seashell4 = 0x8b8682,
AntiqueWhite1 = 0xffefdb,
AntiqueWhite2 = 0xeedfcc,
AntiqueWhite3 = 0xcdc0b0,
AntiqueWhite4 = 0x8b8378,
Bisque1 = 0xffe4c4,
Bisque2 = 0xeed5b7,
Bisque3 = 0xcdb79e,
Bisque4 = 0x8b7d6b,
PeachPuff1 = 0xffdab9,
PeachPuff2 = 0xeecbad,
PeachPuff3 = 0xcdaf95,
PeachPuff4 = 0x8b7765,
NavajoWhite1 = 0xffdead,
NavajoWhite2 = 0xeecfa1,
NavajoWhite3 = 0xcdb38b,
NavajoWhite4 = 0x8b795e,
LemonChiffon1 = 0xfffacd,
LemonChiffon2 = 0xeee9bf,
LemonChiffon3 = 0xcdc9a5,
LemonChiffon4 = 0x8b8970,
Cornsilk1 = 0xfff8dc,
Cornsilk2 = 0xeee8cd,
Cornsilk3 = 0xcdc8b1,
Cornsilk4 = 0x8b8878,
Ivory1 = 0xfffff0,
Ivory2 = 0xeeeee0,
Ivory3 = 0xcdcdc1,
Ivory4 = 0x8b8b83,
Honeydew1 = 0xf0fff0,
Honeydew2 = 0xe0eee0,
Honeydew3 = 0xc1cdc1,
Honeydew4 = 0x838b83,
LavenderBlush1 = 0xfff0f5,
LavenderBlush2 = 0xeee0e5,
LavenderBlush3 = 0xcdc1c5,
LavenderBlush4 = 0x8b8386,
MistyRose1 = 0xffe4e1,
MistyRose2 = 0xeed5d2,
MistyRose3 = 0xcdb7b5,
MistyRose4 = 0x8b7d7b,
Azure1 = 0xf0ffff,
Azure2 = 0xe0eeee,
Azure3 = 0xc1cdcd,
Azure4 = 0x838b8b,
SlateBlue1 = 0x836fff,
SlateBlue2 = 0x7a67ee,
SlateBlue3 = 0x6959cd,
SlateBlue4 = 0x473c8b,
RoyalBlue1 = 0x4876ff,
RoyalBlue2 = 0x436eee,
RoyalBlue3 = 0x3a5fcd,
RoyalBlue4 = 0x27408b,
Blue1 = 0x0000ff,
Blue2 = 0x0000ee,
Blue3 = 0x0000cd,
Blue4 = 0x00008b,
DodgerBlue1 = 0x1e90ff,
DodgerBlue2 = 0x1c86ee,
DodgerBlue3 = 0x1874cd,
DodgerBlue4 = 0x104e8b,
SteelBlue1 = 0x63b8ff,
SteelBlue2 = 0x5cacee,
SteelBlue3 = 0x4f94cd,
SteelBlue4 = 0x36648b,
DeepSkyBlue1 = 0x00bfff,
DeepSkyBlue2 = 0x00b2ee,
DeepSkyBlue3 = 0x009acd,
DeepSkyBlue4 = 0x00688b,
SkyBlue1 = 0x87ceff,
SkyBlue2 = 0x7ec0ee,
SkyBlue3 = 0x6ca6cd,
SkyBlue4 = 0x4a708b,
LightSkyBlue1 = 0xb0e2ff,
LightSkyBlue2 = 0xa4d3ee,
LightSkyBlue3 = 0x8db6cd,
LightSkyBlue4 = 0x607b8b,
SlateGray1 = 0xc6e2ff,
SlateGray2 = 0xb9d3ee,
SlateGray3 = 0x9fb6cd,
SlateGray4 = 0x6c7b8b,
LightSteelBlue1 = 0xcae1ff,
LightSteelBlue2 = 0xbcd2ee,
LightSteelBlue3 = 0xa2b5cd,
LightSteelBlue4 = 0x6e7b8b,
LightBlue1 = 0xbfefff,
LightBlue2 = 0xb2dfee,
LightBlue3 = 0x9ac0cd,
LightBlue4 = 0x68838b,
LightCyan1 = 0xe0ffff,
LightCyan2 = 0xd1eeee,
LightCyan3 = 0xb4cdcd,
LightCyan4 = 0x7a8b8b,
PaleTurquoise1 = 0xbbffff,
PaleTurquoise2 = 0xaeeeee,
PaleTurquoise3 = 0x96cdcd,
PaleTurquoise4 = 0x668b8b,
CadetBlue1 = 0x98f5ff,
CadetBlue2 = 0x8ee5ee,
CadetBlue3 = 0x7ac5cd,
CadetBlue4 = 0x53868b,
Turquoise1 = 0x00f5ff,
Turquoise2 = 0x00e5ee,
Turquoise3 = 0x00c5cd,
Turquoise4 = 0x00868b,
Cyan1 = 0x00ffff,
Cyan2 = 0x00eeee,
Cyan3 = 0x00cdcd,
Cyan4 = 0x008b8b,
DarkSlateGray1 = 0x97ffff,
DarkSlateGray2 = 0x8deeee,
DarkSlateGray3 = 0x79cdcd,
DarkSlateGray4 = 0x528b8b,
Aquamarine1 = 0x7fffd4,
Aquamarine2 = 0x76eec6,
Aquamarine3 = 0x66cdaa,
Aquamarine4 = 0x458b74,
DarkSeaGreen1 = 0xc1ffc1,
DarkSeaGreen2 = 0xb4eeb4,
DarkSeaGreen3 = 0x9bcd9b,
DarkSeaGreen4 = 0x698b69,
SeaGreen1 = 0x54ff9f,
SeaGreen2 = 0x4eee94,
SeaGreen3 = 0x43cd80,
SeaGreen4 = 0x2e8b57,
PaleGreen1 = 0x9aff9a,
PaleGreen2 = 0x90ee90,
PaleGreen3 = 0x7ccd7c,
PaleGreen4 = 0x548b54,
SpringGreen1 = 0x00ff7f,
SpringGreen2 = 0x00ee76,
SpringGreen3 = 0x00cd66,
SpringGreen4 = 0x008b45,
Green1 = 0x00ff00,
Green2 = 0x00ee00,
Green3 = 0x00cd00,
Green4 = 0x008b00,
Chartreuse1 = 0x7fff00,
Chartreuse2 = 0x76ee00,
Chartreuse3 = 0x66cd00,
Chartreuse4 = 0x458b00,
OliveDrab1 = 0xc0ff3e,
OliveDrab2 = 0xb3ee3a,
OliveDrab3 = 0x9acd32,
OliveDrab4 = 0x698b22,
DarkOliveGreen1 = 0xcaff70,
DarkOliveGreen2 = 0xbcee68,
DarkOliveGreen3 = 0xa2cd5a,
DarkOliveGreen4 = 0x6e8b3d,
Khaki1 = 0xfff68f,
Khaki2 = 0xeee685,
Khaki3 = 0xcdc673,
Khaki4 = 0x8b864e,
LightGoldenrod1 = 0xffec8b,
LightGoldenrod2 = 0xeedc82,
LightGoldenrod3 = 0xcdbe70,
LightGoldenrod4 = 0x8b814c,
LightYellow1 = 0xffffe0,
LightYellow2 = 0xeeeed1,
LightYellow3 = 0xcdcdb4,
LightYellow4 = 0x8b8b7a,
Yellow1 = 0xffff00,
Yellow2 = 0xeeee00,
Yellow3 = 0xcdcd00,
Yellow4 = 0x8b8b00,
Gold1 = 0xffd700,
Gold2 = 0xeec900,
Gold3 = 0xcdad00,
Gold4 = 0x8b7500,
Goldenrod1 = 0xffc125,
Goldenrod2 = 0xeeb422,
Goldenrod3 = 0xcd9b1d,
Goldenrod4 = 0x8b6914,
DarkGoldenrod1 = 0xffb90f,
DarkGoldenrod2 = 0xeead0e,
DarkGoldenrod3 = 0xcd950c,
DarkGoldenrod4 = 0x8b6508,
RosyBrown1 = 0xffc1c1,
RosyBrown2 = 0xeeb4b4,
RosyBrown3 = 0xcd9b9b,
RosyBrown4 = 0x8b6969,
IndianRed1 = 0xff6a6a,
IndianRed2 = 0xee6363,
IndianRed3 = 0xcd5555,
IndianRed4 = 0x8b3a3a,
Sienna1 = 0xff8247,
Sienna2 = 0xee7942,
Sienna3 = 0xcd6839,
Sienna4 = 0x8b4726,
Burlywood1 = 0xffd39b,
Burlywood2 = 0xeec591,
Burlywood3 = 0xcdaa7d,
Burlywood4 = 0x8b7355,
Wheat1 = 0xffe7ba,
Wheat2 = 0xeed8ae,
Wheat3 = 0xcdba96,
Wheat4 = 0x8b7e66,
Tan1 = 0xffa54f,
Tan2 = 0xee9a49,
Tan3 = 0xcd853f,
Tan4 = 0x8b5a2b,
Chocolate1 = 0xff7f24,
Chocolate2 = 0xee7621,
Chocolate3 = 0xcd661d,
Chocolate4 = 0x8b4513,
Firebrick1 = 0xff3030,
Firebrick2 = 0xee2c2c,
Firebrick3 = 0xcd2626,
Firebrick4 = 0x8b1a1a,
Brown1 = 0xff4040,
Brown2 = 0xee3b3b,
Brown3 = 0xcd3333,
Brown4 = 0x8b2323,
Salmon1 = 0xff8c69,
Salmon2 = 0xee8262,
Salmon3 = 0xcd7054,
Salmon4 = 0x8b4c39,
LightSalmon1 = 0xffa07a,
LightSalmon2 = 0xee9572,
LightSalmon3 = 0xcd8162,
LightSalmon4 = 0x8b5742,
Orange1 = 0xffa500,
Orange2 = 0xee9a00,
Orange3 = 0xcd8500,
Orange4 = 0x8b5a00,
DarkOrange1 = 0xff7f00,
DarkOrange2 = 0xee7600,
DarkOrange3 = 0xcd6600,
DarkOrange4 = 0x8b4500,
Coral1 = 0xff7256,
Coral2 = 0xee6a50,
Coral3 = 0xcd5b45,
Coral4 = 0x8b3e2f,
Tomato1 = 0xff6347,
Tomato2 = 0xee5c42,
Tomato3 = 0xcd4f39,
Tomato4 = 0x8b3626,
OrangeRed1 = 0xff4500,
OrangeRed2 = 0xee4000,
OrangeRed3 = 0xcd3700,
OrangeRed4 = 0x8b2500,
Red1 = 0xff0000,
Red2 = 0xee0000,
Red3 = 0xcd0000,
Red4 = 0x8b0000,
DeepPink1 = 0xff1493,
DeepPink2 = 0xee1289,
DeepPink3 = 0xcd1076,
DeepPink4 = 0x8b0a50,
HotPink1 = 0xff6eb4,
HotPink2 = 0xee6aa7,
HotPink3 = 0xcd6090,
HotPink4 = 0x8b3a62,
Pink1 = 0xffb5c5,
Pink2 = 0xeea9b8,
Pink3 = 0xcd919e,
Pink4 = 0x8b636c,
LightPink1 = 0xffaeb9,
LightPink2 = 0xeea2ad,
LightPink3 = 0xcd8c95,
LightPink4 = 0x8b5f65,
PaleVioletRed1 = 0xff82ab,
PaleVioletRed2 = 0xee799f,
PaleVioletRed3 = 0xcd6889,
PaleVioletRed4 = 0x8b475d,
Maroon1 = 0xff34b3,
Maroon2 = 0xee30a7,
Maroon3 = 0xcd2990,
Maroon4 = 0x8b1c62,
VioletRed1 = 0xff3e96,
VioletRed2 = 0xee3a8c,
VioletRed3 = 0xcd3278,
VioletRed4 = 0x8b2252,
Magenta1 = 0xff00ff,
Magenta2 = 0xee00ee,
Magenta3 = 0xcd00cd,
Magenta4 = 0x8b008b,
Orchid1 = 0xff83fa,
Orchid2 = 0xee7ae9,
Orchid3 = 0xcd69c9,
Orchid4 = 0x8b4789,
Plum1 = 0xffbbff,
Plum2 = 0xeeaeee,
Plum3 = 0xcd96cd,
Plum4 = 0x8b668b,
MediumOrchid1 = 0xe066ff,
MediumOrchid2 = 0xd15fee,
MediumOrchid3 = 0xb452cd,
MediumOrchid4 = 0x7a378b,
DarkOrchid1 = 0xbf3eff,
DarkOrchid2 = 0xb23aee,
DarkOrchid3 = 0x9a32cd,
DarkOrchid4 = 0x68228b,
Purple1 = 0x9b30ff,
Purple2 = 0x912cee,
Purple3 = 0x7d26cd,
Purple4 = 0x551a8b,
MediumPurple1 = 0xab82ff,
MediumPurple2 = 0x9f79ee,
MediumPurple3 = 0x8968cd,
MediumPurple4 = 0x5d478b,
Thistle1 = 0xffe1ff,
Thistle2 = 0xeed2ee,
Thistle3 = 0xcdb5cd,
Thistle4 = 0x8b7b8b,
Gray0 = 0x000000,
Grey0 = 0x000000,
Gray1 = 0x030303,
Grey1 = 0x030303,
Gray2 = 0x050505,
Grey2 = 0x050505,
Gray3 = 0x080808,
Grey3 = 0x080808,
Gray4 = 0x0a0a0a,
Grey4 = 0x0a0a0a,
Gray5 = 0x0d0d0d,
Grey5 = 0x0d0d0d,
Gray6 = 0x0f0f0f,
Grey6 = 0x0f0f0f,
Gray7 = 0x121212,
Grey7 = 0x121212,
Gray8 = 0x141414,
Grey8 = 0x141414,
Gray9 = 0x171717,
Grey9 = 0x171717,
Gray10 = 0x1a1a1a,
Grey10 = 0x1a1a1a,
Gray11 = 0x1c1c1c,
Grey11 = 0x1c1c1c,
Gray12 = 0x1f1f1f,
Grey12 = 0x1f1f1f,
Gray13 = 0x212121,
Grey13 = 0x212121,
Gray14 = 0x242424,
Grey14 = 0x242424,
Gray15 = 0x262626,
Grey15 = 0x262626,
Gray16 = 0x292929,
Grey16 = 0x292929,
Gray17 = 0x2b2b2b,
Grey17 = 0x2b2b2b,
Gray18 = 0x2e2e2e,
Grey18 = 0x2e2e2e,
Gray19 = 0x303030,
Grey19 = 0x303030,
Gray20 = 0x333333,
Grey20 = 0x333333,
Gray21 = 0x363636,
Grey21 = 0x363636,
Gray22 = 0x383838,
Grey22 = 0x383838,
Gray23 = 0x3b3b3b,
Grey23 = 0x3b3b3b,
Gray24 = 0x3d3d3d,
Grey24 = 0x3d3d3d,
Gray25 = 0x404040,
Grey25 = 0x404040,
Gray26 = 0x424242,
Grey26 = 0x424242,
Gray27 = 0x454545,
Grey27 = 0x454545,
Gray28 = 0x474747,
Grey28 = 0x474747,
Gray29 = 0x4a4a4a,
Grey29 = 0x4a4a4a,
Gray30 = 0x4d4d4d,
Grey30 = 0x4d4d4d,
Gray31 = 0x4f4f4f,
Grey31 = 0x4f4f4f,
Gray32 = 0x525252,
Grey32 = 0x525252,
Gray33 = 0x545454,
Grey33 = 0x545454,
Gray34 = 0x575757,
Grey34 = 0x575757,
Gray35 = 0x595959,
Grey35 = 0x595959,
Gray36 = 0x5c5c5c,
Grey36 = 0x5c5c5c,
Gray37 = 0x5e5e5e,
Grey37 = 0x5e5e5e,
Gray38 = 0x616161,
Grey38 = 0x616161,
Gray39 = 0x636363,
Grey39 = 0x636363,
Gray40 = 0x666666,
Grey40 = 0x666666,
Gray41 = 0x696969,
Grey41 = 0x696969,
Gray42 = 0x6b6b6b,
Grey42 = 0x6b6b6b,
Gray43 = 0x6e6e6e,
Grey43 = 0x6e6e6e,
Gray44 = 0x707070,
Grey44 = 0x707070,
Gray45 = 0x737373,
Grey45 = 0x737373,
Gray46 = 0x757575,
Grey46 = 0x757575,
Gray47 = 0x787878,
Grey47 = 0x787878,
Gray48 = 0x7a7a7a,
Grey48 = 0x7a7a7a,
Gray49 = 0x7d7d7d,
Grey49 = 0x7d7d7d,
Gray50 = 0x7f7f7f,
Grey50 = 0x7f7f7f,
Gray51 = 0x828282,
Grey51 = 0x828282,
Gray52 = 0x858585,
Grey52 = 0x858585,
Gray53 = 0x878787,
Grey53 = 0x878787,
Gray54 = 0x8a8a8a,
Grey54 = 0x8a8a8a,
Gray55 = 0x8c8c8c,
Grey55 = 0x8c8c8c,
Gray56 = 0x8f8f8f,
Grey56 = 0x8f8f8f,
Gray57 = 0x919191,
Grey57 = 0x919191,
Gray58 = 0x949494,
Grey58 = 0x949494,
Gray59 = 0x969696,
Grey59 = 0x969696,
Gray60 = 0x999999,
Grey60 = 0x999999,
Gray61 = 0x9c9c9c,
Grey61 = 0x9c9c9c,
Gray62 = 0x9e9e9e,
Grey62 = 0x9e9e9e,
Gray63 = 0xa1a1a1,
Grey63 = 0xa1a1a1,
Gray64 = 0xa3a3a3,
Grey64 = 0xa3a3a3,
Gray65 = 0xa6a6a6,
Grey65 = 0xa6a6a6,
Gray66 = 0xa8a8a8,
Grey66 = 0xa8a8a8,
Gray67 = 0xababab,
Grey67 = 0xababab,
Gray68 = 0xadadad,
Grey68 = 0xadadad,
Gray69 = 0xb0b0b0,
Grey69 = 0xb0b0b0,
Gray70 = 0xb3b3b3,
Grey70 = 0xb3b3b3,
Gray71 = 0xb5b5b5,
Grey71 = 0xb5b5b5,
Gray72 = 0xb8b8b8,
Grey72 = 0xb8b8b8,
Gray73 = 0xbababa,
Grey73 = 0xbababa,
Gray74 = 0xbdbdbd,
Grey74 = 0xbdbdbd,
Gray75 = 0xbfbfbf,
Grey75 = 0xbfbfbf,
Gray76 = 0xc2c2c2,
Grey76 = 0xc2c2c2,
Gray77 = 0xc4c4c4,
Grey77 = 0xc4c4c4,
Gray78 = 0xc7c7c7,
Grey78 = 0xc7c7c7,
Gray79 = 0xc9c9c9,
Grey79 = 0xc9c9c9,
Gray80 = 0xcccccc,
Grey80 = 0xcccccc,
Gray81 = 0xcfcfcf,
Grey81 = 0xcfcfcf,
Gray82 = 0xd1d1d1,
Grey82 = 0xd1d1d1,
Gray83 = 0xd4d4d4,
Grey83 = 0xd4d4d4,
Gray84 = 0xd6d6d6,
Grey84 = 0xd6d6d6,
Gray85 = 0xd9d9d9,
Grey85 = 0xd9d9d9,
Gray86 = 0xdbdbdb,
Grey86 = 0xdbdbdb,
Gray87 = 0xdedede,
Grey87 = 0xdedede,
Gray88 = 0xe0e0e0,
Grey88 = 0xe0e0e0,
Gray89 = 0xe3e3e3,
Grey89 = 0xe3e3e3,
Gray90 = 0xe5e5e5,
Grey90 = 0xe5e5e5,
Gray91 = 0xe8e8e8,
Grey91 = 0xe8e8e8,
Gray92 = 0xebebeb,
Grey92 = 0xebebeb,
Gray93 = 0xededed,
Grey93 = 0xededed,
Gray94 = 0xf0f0f0,
Grey94 = 0xf0f0f0,
Gray95 = 0xf2f2f2,
Grey95 = 0xf2f2f2,
Gray96 = 0xf5f5f5,
Grey96 = 0xf5f5f5,
Gray97 = 0xf7f7f7,
Grey97 = 0xf7f7f7,
Gray98 = 0xfafafa,
Grey98 = 0xfafafa,
Gray99 = 0xfcfcfc,
Grey99 = 0xfcfcfc,
Gray100 = 0xffffff,
Grey100 = 0xffffff,
DarkGrey = 0xa9a9a9,
DarkGray = 0xa9a9a9,
DarkBlue = 0x00008b,
DarkCyan = 0x008b8b,
DarkMagenta = 0x8b008b,
DarkRed = 0x8b0000,
LightGreen = 0x90ee90,
Crimson = 0xdc143c,
Indigo = 0x4b0082,
Olive = 0x808000,
RebeccaPurple = 0x663399,
Silver = 0xc0c0c0,
Teal = 0x008080,
};
};
}
#endif

View File

@@ -351,3 +351,20 @@ TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadNa
#ifdef __cplusplus
}
#endif
// Inset graphics integration (within Tracy module)
#define TRACY_GPU_IMPL 1
#if TRACY_GPU_D3D11
#include <ThirdParty/tracy/tracy/TracyD3D11.hpp>
static_assert(sizeof(tracy::D3D11ZoneScope) <= TracyD3D11ZoneSize, "Invalid zone size");
#endif
#if TRACY_GPU_D3D12
#include <ThirdParty/tracy/tracy/TracyD3D12.hpp>
static_assert(sizeof(tracy::D3D12ZoneScope) <= TracyD3D12ZoneSize, "Invalid zone size");
#endif
#if TRACY_GPU_VULKAN
#define GRAPHICS_API_VULKAN 1
#include "Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h"
#include <ThirdParty/tracy/tracy/TracyVulkan.hpp>
static_assert(sizeof(tracy::VkCtxScope) <= TracyVulkanZoneSize, "Invalid zone size");
#endif

View File

@@ -15,6 +15,11 @@ public class tracy : ThirdPartyModule
/// </summary>
public static bool OnDemand = true;
/// <summary>
/// Enables GPU profiling.
/// </summary>
public static bool GPU = true;
/// <inheritdoc />
public override void Init()
{
@@ -56,8 +61,25 @@ public class tracy : ThirdPartyModule
options.PrivateDefinitions.Add("TRACY_USE_MALLOC");
options.PrivateDefinitions.Add("TRACY_ONLY_IPV4");
options.PrivateDefinitions.Add("TRACY_NO_PIPE");
options.PrivateDefinitions.Add("TRACY_NO_CODE_TRANSFER");
break;
}
if (GPU)
{
// Ask Graphics module which graphics backends are active
var graphics = new Graphics();
graphics.FilePath = FilePath;
graphics.FolderPath = FolderPath;
var graphicsOptions = (BuildOptions)options.Clone();
graphics.Setup(graphicsOptions);
if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceDX11"))
options.PrivateDefinitions.Add("TRACY_GPU_D3D11");
if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceDX12"))
options.PrivateDefinitions.Add("TRACY_GPU_D3D12");
if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceVulkan"))
options.PrivateDefinitions.Add("TRACY_GPU_VULKAN");
}
}
/// <inheritdoc />

View File

@@ -119,6 +119,8 @@
namespace tracy
{
TRACY_API bool IsConnected();
class TRACY_API Profiler
{
public:
@@ -143,7 +145,6 @@ public:
static void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name );
static void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name );
static void SendCallstack( int depth );
static void ParameterRegister( ParameterCallback cb );
static void ParameterRegister( ParameterCallback cb, void* data );
static void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val );
};
@@ -255,7 +256,7 @@ public:
#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data )
#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data )
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val )
#define TracyIsConnected tracy::GetProfiler().IsConnected()
#define TracyIsConnected tracy::IsConnected()
#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name );
#ifdef TRACY_FIBERS

View File

@@ -0,0 +1,456 @@
#ifndef __TRACYD3D11_HPP__
#define __TRACYD3D11_HPP__
#define TracyD3D11ZoneSize 16
#ifndef TRACY_ENABLE
#define TracyD3D11Context(device,queue) nullptr
#define TracyD3D11Destroy(ctx)
#define TracyD3D11ContextName(ctx, name, size)
#define TracyD3D11NewFrame(ctx)
#define TracyD3D11Zone(ctx, name)
#define TracyD3D11ZoneC(ctx, name, color)
#define TracyD3D11NamedZone(ctx, varname, name, active)
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
#define TracyD3D11ZoneTransient(ctx, varname, name, active)
#define TracyD3D11ZoneS(ctx, name, depth)
#define TracyD3D11ZoneCS(ctx, name, color, depth)
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active)
#define TracyD3D11Collect(ctx)
namespace tracy
{
class D3D11ZoneScope {};
}
using TracyD3D11Ctx = void*;
#elif TRACY_GPU_IMPL
#include <atomic>
#include <assert.h>
#include <stdlib.h>
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include "../common/TracyYield.hpp"
#include "../common/TracyColor.hpp"
#include <d3d11.h>
#define TRACY_CALLSTACK 0
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
namespace tracy
{
class D3D11Ctx
{
friend class D3D11ZoneScope;
static constexpr uint32_t MaxQueries = 64 * 1024;
enum CollectMode { POLL, BLOCK };
public:
tracy_force_inline D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
{
// TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument
m_device = device;
device->AddRef();
m_immediateDevCtx = devicectx;
devicectx->AddRef();
{
D3D11_QUERY_DESC desc = { };
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery)))
{
TracyD3D11Panic("unable to create disjoint timestamp query.", return);
}
}
for (ID3D11Query*& query : m_queries)
{
D3D11_QUERY_DESC desc = { };
desc.Query = D3D11_QUERY_TIMESTAMP;
if (FAILED(m_device->CreateQuery(&desc, &query)))
{
TracyD3D11Panic("unable to create timestamp query.", return);
}
}
// Calibrate CPU and GPU timestamps
int64_t tcpu = 0;
int64_t tgpu = 0;
for (int attempts = 0; attempts < 50; attempts++)
{
m_immediateDevCtx->Begin(m_disjointQuery);
m_immediateDevCtx->End(m_queries[0]);
m_immediateDevCtx->End(m_disjointQuery);
int64_t tcpu0 = Profiler::GetTime();
WaitForQuery(m_disjointQuery);
// NOTE: one would expect that by waiting for the enclosing disjoint query to finish,
// all timestamp queries within would also be readily available, but that does not
// seem to be the case here... See https://github.com/wolfpld/tracy/issues/947
WaitForQuery(m_queries[0]);
int64_t tcpu1 = Profiler::GetTime();
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK)
{
TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato);
continue;
}
if (disjoint.Disjoint)
continue;
UINT64 timestamp = 0;
if (m_immediateDevCtx->GetData(m_queries[0], &timestamp, sizeof(timestamp), 0) != S_OK)
continue; // this should never happen (we waited for the query to finish above)
tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2;
tgpu = timestamp * (1000000000 / disjoint.Frequency);
break;
}
// ready to roll
m_contextId = GetGpuCtxCounter().fetch_add(1);
m_immediateDevCtx->Begin(m_disjointQuery);
m_previousCheckpoint = m_nextCheckpoint = 0;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()?
MemWrite( &item->gpuNewContext.period, 1.0f );
MemWrite( &item->gpuNewContext.context, m_contextId);
MemWrite( &item->gpuNewContext.flags, uint8_t(0) );
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
tracy_force_inline ~D3D11Ctx()
{
// collect all pending timestamps before destroying everything
do
{
Collect(BLOCK);
} while (m_previousCheckpoint != m_queryCounter);
for (ID3D11Query* query : m_queries)
{
query->Release();
}
m_immediateDevCtx->End(m_disjointQuery);
m_disjointQuery->Release();
m_immediateDevCtx->Release();
m_device->Release();
}
tracy_force_inline void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_contextId );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect(CollectMode mode = POLL)
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
m_previousCheckpoint = m_nextCheckpoint = m_queryCounter;
return;
}
#endif
if (m_previousCheckpoint == m_nextCheckpoint)
{
uintptr_t nextCheckpoint = m_queryCounter;
if (nextCheckpoint == m_nextCheckpoint)
{
return;
}
m_nextCheckpoint = nextCheckpoint;
m_immediateDevCtx->End(m_disjointQuery);
}
if (mode == CollectMode::BLOCK)
{
WaitForQuery(m_disjointQuery);
}
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK)
{
return;
}
if (disjoint.Disjoint == TRUE)
{
m_previousCheckpoint = m_nextCheckpoint;
TracyD3D11Panic("disjoint timestamps detected; dropping.");
return;
}
auto begin = m_previousCheckpoint;
auto end = m_nextCheckpoint;
for (auto i = begin; i != end; ++i)
{
uint32_t k = RingIndex(i);
UINT64 timestamp = 0;
if (m_immediateDevCtx->GetData(m_queries[k], &timestamp, sizeof(timestamp), 0) != S_OK)
{
TracyD3D11Panic("timestamp expected to be ready, but it was not!");
break;
}
timestamp *= (1000000000ull / disjoint.Frequency);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(timestamp));
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
}
// disjoint timestamp queries should only be invoked once per frame or less
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query
m_immediateDevCtx->Begin(m_disjointQuery);
m_previousCheckpoint = m_nextCheckpoint;
}
private:
tracy_force_inline uint32_t RingIndex(uintptr_t index)
{
index %= MaxQueries;
return static_cast<uint32_t>(index);
}
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
{
// wrap-around safe: all unsigned
uintptr_t count = end - begin;
return static_cast<uint32_t>(count);
}
tracy_force_inline uint32_t NextQueryId()
{
auto id = m_queryCounter++;
if (RingCount(m_previousCheckpoint, id) >= MaxQueries)
{
TracyD3D11Panic("too many pending timestamp queries.");
// #TODO: return some sentinel value; ideally a "hidden" query index
}
return RingIndex(id);
}
tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id)
{
return m_queries[id];
}
tracy_force_inline void WaitForQuery(ID3D11Query* query)
{
m_immediateDevCtx->Flush();
while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK)
YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends...
}
tracy_force_inline uint8_t GetContextId() const
{
return m_contextId;
}
ID3D11Device* m_device = nullptr;
ID3D11DeviceContext* m_immediateDevCtx = nullptr;
ID3D11Query* m_queries[MaxQueries];
ID3D11Query* m_disjointQuery = nullptr;
uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere?
uintptr_t m_queryCounter = 0;
uintptr_t m_previousCheckpoint = 0;
uintptr_t m_nextCheckpoint = 0;
};
class D3D11ZoneScope
{
public:
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active )
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
}
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active )
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
if( depth > 0 && has_callstack() )
{
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
}
else
{
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
}
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active)
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
if ( depth > 0 && has_callstack() )
{
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
}
else
{
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
}
tracy_force_inline ~D3D11ZoneScope()
{
if( !m_active ) return;
const auto queryId = m_ctx->NextQueryId();
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() );
Profiler::QueueSerialFinish();
}
private:
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active )
#ifdef TRACY_ON_DEMAND
: m_active( active && GetProfiler().IsConnected() )
#else
: m_active( active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
}
void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation)
{
const auto queryId = m_ctx->NextQueryId();
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
MemWrite( &item->hdr.type, queueItemType);
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() );
Profiler::QueueSerialFinish();
}
const bool m_active;
D3D11Ctx* m_ctx;
};
void* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
{
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
new(ctx) D3D11Ctx( device, devicectx );
ctx->Name("D3D11", 5);
return ctx;
}
void CollectD3D11Context(void* ctx)
{
((D3D11Ctx*)ctx)->Collect();
}
void DestroyD3D11Context(void* ctx )
{
((D3D11Ctx*)ctx)->~D3D11Ctx();
tracy_free( ctx );
}
void BeginD3D11ZoneScope(void* zone, void* ctx, const char* name, size_t nameLen)
{
new(zone) tracy::D3D11ZoneScope{ (tracy::D3D11Ctx*)ctx, 0, 0, 0, 0, 0, name, nameLen, true };
}
void EndD3D11ZoneScope(void* zone)
{
((tracy::D3D11ZoneScope*)zone)->~D3D11ZoneScope();
}
}
#undef TracyD3D11Panic
#else
// Forward declarations to be used in engine
namespace tracy
{
extern void* CreateD3D11Context(ID3D11Device* device, ID3D11DeviceContext* devicectx);
extern void DestroyD3D11Context(void* ctx);
extern void CollectD3D11Context(void* ctx);
extern void BeginD3D11ZoneScope(void* zone, void* ctx, const char* name, size_t nameLen);
extern void EndD3D11ZoneScope(void* zone);
}
#endif
#endif

View File

@@ -0,0 +1,529 @@
#ifndef __TRACYD3D12_HPP__
#define __TRACYD3D12_HPP__
#define TracyD3D12ZoneSize 32
#ifndef TRACY_ENABLE
#define TracyD3D12Context(device, queue) nullptr
#define TracyD3D12Destroy(ctx)
#define TracyD3D12ContextName(ctx, name, size)
#define TracyD3D12NewFrame(ctx)
#define TracyD3D12Zone(ctx, cmdList, name)
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
#define TracyD3D12ZoneS(ctx, cmdList, name, depth)
#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth)
#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active)
#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active)
#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active)
#define TracyD3D12Collect(ctx)
namespace tracy
{
class D3D12ZoneScope {};
}
using TracyD3D12Ctx = void*;
#elif TRACY_GPU_IMPL
#include "../client/TracyProfiler.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include "../common/TracyColor.hpp"
#include <cstdlib>
#include <cassert>
#include <d3d12.h>
#include <dxgi.h>
#include <queue>
#define TRACY_CALLSTACK 0
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
namespace tracy
{
struct D3D12QueryPayload
{
uint32_t m_queryIdStart = 0;
uint32_t m_queryCount = 0;
};
// Command queue context.
class D3D12QueueCtx
{
friend class D3D12ZoneScope;
ID3D12Device* m_device = nullptr;
ID3D12CommandQueue* m_queue = nullptr;
uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere?
ID3D12QueryHeap* m_queryHeap = nullptr;
ID3D12Resource* m_readbackBuffer = nullptr;
// In-progress payload.
uint32_t m_queryLimit = 0;
std::atomic<uint32_t> m_queryCounter = 0;
uint32_t m_previousQueryCounter = 0;
uint32_t m_activePayload = 0;
ID3D12Fence* m_payloadFence = nullptr;
std::queue<D3D12QueryPayload> m_payloadQueue;
UINT64 m_prevCalibrationTicksCPU = 0;
void RecalibrateClocks()
{
UINT64 cpuTimestamp;
UINT64 gpuTimestamp;
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
{
TracyD3D12Panic("failed to obtain queue clock calibration counters.", return);
}
int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU;
if (cpuDeltaTicks > 0)
{
static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc();
int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick;
// Save the device cpu timestamp, not the Tracy profiler timestamp:
m_prevCalibrationTicksCPU = cpuTimestamp;
cpuTimestamp = Profiler::GetTime();
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS);
MemWrite(&item->gpuCalibration.context, GetId());
SubmitQueueItem(item);
}
}
tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item)
{
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif
Profiler::QueueSerialFinish();
}
public:
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
: m_device(device)
, m_queue(queue)
{
// Verify we support timestamp queries on this queue.
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
{
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData));
if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE))
{
TracyD3D12Panic("Platform does not support profiling of copy queues.", return);
}
}
static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs
m_queryLimit = MaxQueries;
D3D12_QUERY_HEAP_DESC heapDesc{};
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
heapDesc.Count = m_queryLimit;
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
{
m_queryLimit /= 2;
heapDesc.Count = m_queryLimit;
}
// Create a readback buffer, which will be used as a destination for the query data.
D3D12_RESOURCE_DESC readbackBufferDesc{};
readbackBufferDesc.Alignment = 0;
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
readbackBufferDesc.Height = 1;
readbackBufferDesc.DepthOrArraySize = 1;
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
readbackBufferDesc.MipLevels = 1;
readbackBufferDesc.SampleDesc.Count = 1;
readbackBufferDesc.SampleDesc.Quality = 0;
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
D3D12_HEAP_PROPERTIES readbackHeapProps{};
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
readbackHeapProps.CreationNodeMask = 0;
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
{
TracyD3D12Panic("Failed to create query readback buffer.", return);
}
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
{
TracyD3D12Panic("Failed to create payload fence.", return);
}
float period = [queue]()
{
uint64_t timestampFrequency;
if (FAILED(queue->GetTimestampFrequency(&timestampFrequency)))
{
return 0.0f;
}
return static_cast<float>( 1E+09 / static_cast<double>(timestampFrequency) );
}();
if (period == 0.0f)
{
TracyD3D12Panic("Failed to get timestamp frequency.", return);
}
uint64_t cpuTimestamp;
uint64_t gpuTimestamp;
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
{
TracyD3D12Panic("Failed to get queue clock calibration.", return);
}
// Save the device cpu timestamp, not the profiler's timestamp.
m_prevCalibrationTicksCPU = cpuTimestamp;
cpuTimestamp = Profiler::GetTime();
// all checked: ready to roll
m_contextId = GetGpuCtxCounter().fetch_add(1);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()?
MemWrite(&item->gpuNewContext.period, period);
MemWrite(&item->gpuNewContext.context, GetId());
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
SubmitQueueItem(item);
}
~D3D12QueueCtx()
{
// collect all pending timestamps
while (m_payloadFence->GetCompletedValue() != m_activePayload)
/* busy-wait ... */;
Collect();
m_payloadFence->Release();
m_readbackBuffer->Release();
m_queryHeap->Release();
}
void NewFrame()
{
uint32_t queryCounter = m_queryCounter.exchange(0);
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter });
m_previousQueryCounter += queryCounter;
if (m_previousQueryCounter >= m_queryLimit)
{
m_previousQueryCounter -= m_queryLimit;
}
m_queue->Signal(m_payloadFence, ++m_activePayload);
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, GetId());
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
SubmitQueueItem(item);
}
void Collect()
{
#ifdef TRACY_ON_DEMAND
if (!GetProfiler().IsConnected())
{
m_queryCounter = 0;
return;
}
#endif
// Find out what payloads are available.
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
if (!payloadCount)
{
return; // No payloads are available yet, exit out.
}
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
// Map the readback buffer so we can fetch the query data from the GPU.
void* readbackBufferMapping = nullptr;
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
{
TracyD3D12Panic("Failed to map readback buffer.", return);
}
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
for (uint32_t i = 0; i < payloadCount; ++i)
{
const auto& payload = m_payloadQueue.front();
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
{
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
const auto timestamp = timestampData[counter];
const auto queryId = counter;
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, timestamp);
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuTime.context, GetId());
Profiler::QueueSerialFinish();
}
m_payloadQueue.pop();
}
m_readbackBuffer->Unmap(0, nullptr);
// Recalibrate to account for drift.
RecalibrateClocks();
}
private:
tracy_force_inline uint32_t NextQueryId()
{
uint32_t queryCounter = m_queryCounter.fetch_add(2);
if (queryCounter >= m_queryLimit)
{
TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries.");
// #TODO: consider returning an invalid id or sentinel value here
}
const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit;
return id;
}
tracy_force_inline uint8_t GetId() const
{
return m_contextId;
}
};
class D3D12ZoneScope
{
const bool m_active;
D3D12QueueCtx* m_ctx = nullptr;
ID3D12GraphicsCommandList* m_cmdList = nullptr;
uint32_t m_queryId = 0; // Used for tracking in nested zones.
tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation)
{
MemWrite(&item->hdr.type, type);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, srcLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if (!m_active) return;
m_ctx = ctx;
m_cmdList = cmdList;
m_queryId = m_ctx->NextQueryId();
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
}
public:
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcLocation));
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcLocation));
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
}
tracy_force_inline ~D3D12ZoneScope()
{
if (!m_active) return;
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
Profiler::QueueSerialFinish();
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t));
}
};
void* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
{
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
new (ctx) D3D12QueueCtx{ device, queue };
ctx->Name("D3D12", 5);
return ctx;
}
void CollectD3D12Context(void* ctx)
{
((D3D12QueueCtx*)ctx)->Collect();
((D3D12QueueCtx*)ctx)->NewFrame();
}
void DestroyD3D12Context(void* ctx)
{
((D3D12QueueCtx*)ctx)->~D3D12QueueCtx();
tracy_free(ctx);
}
void BeginD3D12ZoneScope(void* zone, void* ctx, ID3D12GraphicsCommandList* cmdList, const char* name, size_t nameLen)
{
new(zone) tracy::D3D12ZoneScope{ (tracy::D3D12QueueCtx*)ctx, 0, 0, 0, 0, 0, name, nameLen, cmdList, true };
}
void EndD3D12ZoneScope(void* zone)
{
((tracy::D3D12ZoneScope*)zone)->~D3D12ZoneScope();
}
}
#undef TracyD3D12Panic
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size);
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone
#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine)
#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true)
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true)
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
#else
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true)
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true)
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active };
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true)
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true)
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active };
#else
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name)
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color)
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
#endif
#define TracyD3D12Collect(ctx) ctx->Collect();
#else
// Forward declarations to be used in engine
namespace tracy
{
extern void* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue);
extern void DestroyD3D12Context(void* ctx);
extern void CollectD3D12Context(void* ctx);
extern void BeginD3D12ZoneScope(void* zone, void* ctx, ID3D12GraphicsCommandList* cmdList, const char* name, size_t nameLen);
extern void EndD3D12ZoneScope(void* zone);
}
#endif
#endif

View File

@@ -0,0 +1,779 @@
#ifndef __TRACYVULKAN_HPP__
#define __TRACYVULKAN_HPP__
#define TracyVulkanZoneSize 24
#if !defined TRACY_ENABLE
#define TracyVkContext(x,y,z,w) nullptr
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
#if defined VK_EXT_host_query_reset
#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr
#endif
#define TracyVkDestroy(x)
#define TracyVkContextName(c,x,y)
#define TracyVkNamedZone(c,x,y,z,w)
#define TracyVkNamedZoneC(c,x,y,z,w,a)
#define TracyVkZone(c,x,y)
#define TracyVkZoneC(c,x,y,z)
#define TracyVkZoneTransient(c,x,y,z,w)
#define TracyVkCollect(c,x)
#define TracyVkNamedZoneS(c,x,y,z,w,a)
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
#define TracyVkZoneS(c,x,y,z)
#define TracyVkZoneCS(c,x,y,z,w)
#define TracyVkZoneTransientS(c,x,y,z,w,a)
namespace tracy
{
class VkCtxScope {};
}
using TracyVkCtx = void*;
#elif TRACY_GPU_VULKAN
#if !defined VK_NULL_HANDLE
# error "You must include Vulkan headers before including TracyVulkan.hpp"
#endif
#include <assert.h>
#include <stdlib.h>
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include <atomic>
namespace tracy
{
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define LoadVkDeviceCoreSymbols(Operation) \
Operation(vkBeginCommandBuffer) \
Operation(vkCmdResetQueryPool) \
Operation(vkCmdWriteTimestamp) \
Operation(vkCreateQueryPool) \
Operation(vkDestroyQueryPool) \
Operation(vkEndCommandBuffer) \
Operation(vkGetQueryPoolResults) \
Operation(vkQueueSubmit) \
Operation(vkQueueWaitIdle) \
Operation(vkResetQueryPool)
#define LoadVkDeviceExtensionSymbols(Operation) \
Operation(vkGetCalibratedTimestampsEXT)
#define LoadVkInstanceExtensionSymbols(Operation) \
Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)
#define LoadVkInstanceCoreSymbols(Operation) \
Operation(vkGetPhysicalDeviceProperties)
struct VkSymbolTable
{
#define MAKE_PFN(name) PFN_##name name;
LoadVkDeviceCoreSymbols(MAKE_PFN)
LoadVkDeviceExtensionSymbols(MAKE_PFN)
LoadVkInstanceExtensionSymbols(MAKE_PFN)
LoadVkInstanceCoreSymbols(MAKE_PFN)
#undef MAKE_PFN
};
#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature
#else
#define VK_FUNCTION_WRAPPER(callSignature) callSignature
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature
#endif
class VkCtx
{
friend class VkCtxScope;
enum { QueryCount = 64 * 1024 };
public:
#if defined TRACY_VK_USE_SYMBOL_TABLE
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated )
#else
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT)
#endif
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
#if !defined TRACY_VK_USE_SYMBOL_TABLE
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
#endif
{
assert( m_context != 255 );
#if defined TRACY_VK_USE_SYMBOL_TABLE
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
if ( calibrated )
{
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
}
#endif
if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT )
{
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
}
CreateQueryPool();
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmdbuf;
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
int64_t tcpu, tgpu;
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
{
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) );
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
tcpu = Profiler::GetTime();
VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) );
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) );
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
}
else
{
FindCalibratedTimestampDeviation();
Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime();
}
WriteInitialItem( physdev, tcpu, tgpu );
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
}
#if defined VK_EXT_host_query_reset
/**
* This alternative constructor does not use command buffers and instead uses functionality from
* VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires
* the physical device to have another time domain apart from DEVICE to be calibrateable.
*/
#if defined TRACY_VK_USE_SYMBOL_TABLE
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
#else
VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT )
#endif
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
#if !defined TRACY_VK_USE_SYMBOL_TABLE
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
#endif
{
assert( m_context != 255);
#if defined TRACY_VK_USE_SYMBOL_TABLE
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
#endif
assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr );
assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr );
assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr );
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
// We require a host time domain to be available to properly calibrate.
FindCalibratedTimestampDeviation();
int64_t tgpu;
Calibrate( device, m_prevCalibration, tgpu );
int64_t tcpu = Profiler::GetTime();
CreateQueryPool();
VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) );
WriteInitialItem( physdev, tcpu, tgpu );
// We need the buffer to be twice as large for availability values
size_t resSize = sizeof( int64_t ) * m_queryCount * 2;
m_res = (int64_t*)tracy_malloc( resSize );
}
#endif
~VkCtx()
{
tracy_free( m_res );
VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) );
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect( VkCommandBuffer cmdbuf )
{
const uint64_t head = m_head.load(std::memory_order_relaxed);
if( m_tail == head ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
m_tail = head;
m_oldCnt = 0;
int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
return;
}
#endif
assert( head > m_tail );
const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount );
unsigned int cnt;
if( m_oldCnt != 0 )
{
cnt = m_oldCnt;
m_oldCnt = 0;
}
else
{
cnt = (unsigned int)( head - m_tail );
assert( cnt <= m_queryCount );
if( wrappedTail + cnt > m_queryCount )
{
cnt = m_queryCount - wrappedTail;
}
}
VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) );
for( unsigned int idx=0; idx<cnt; idx++ )
{
int64_t avail = m_res[idx * 2 + 1];
if( avail == 0 )
{
m_oldCnt = cnt - idx;
cnt = idx;
break;
}
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] );
MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) );
MemWrite( &item->gpuTime.context, m_context );
Profiler::QueueSerialFinish();
}
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
{
int64_t tgpu, tcpu;
Calibrate( m_device, tcpu, tgpu );
const auto refCpu = Profiler::GetTime();
const auto delta = tcpu - m_prevCalibration;
if( delta > 0 )
{
m_prevCalibration = tcpu;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
Profiler::QueueSerialFinish();
}
}
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) );
m_tail += cnt;
}
tracy_force_inline unsigned int NextQueryId()
{
const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed);
return id % m_queryCount;
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
tracy_force_inline VkQueryPool GetQueryPool() const
{
return m_query;
}
private:
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation;
do
{
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
}
while( deviation > m_deviation );
#if defined _WIN32
tGpu = ts[0];
tCpu = ts[1] * m_qpcToNs;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
tGpu = ts[0];
tCpu = ts[1];
#else
assert( false );
#endif
}
tracy_force_inline void CreateQueryPool()
{
VkQueryPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
poolInfo.queryCount = m_queryCount;
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) )
{
m_queryCount /= 2;
poolInfo.queryCount = m_queryCount;
}
}
tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT )
{
uint32_t num;
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr );
if(num > 4) num = 4;
VkTimeDomainEXT data[4];
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data );
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
#if defined _WIN32
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
#endif
for( uint32_t i=0; i<num; i++ ) {
if(data[i] == supportedDomain) {
m_timeDomain = data[i];
break;
}
}
}
tracy_force_inline void FindCalibratedTimestampDeviation()
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
constexpr size_t NumProbes = 32;
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( size_t i=0; i<NumProbes; i++ ) {
m_vkGetCalibratedTimestampsEXT( m_device, 2, spec, ts, deviation + i );
}
uint64_t minDeviation = deviation[0];
for( size_t i=1; i<NumProbes; i++ ) {
if ( minDeviation > deviation[i] ) {
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
#if defined _WIN32
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
#endif
}
tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu )
{
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
VkPhysicalDeviceProperties prop;
VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) );
const float period = prop.limits.timestampPeriod;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
#if defined TRACY_VK_USE_SYMBOL_TABLE
void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
{
#define VK_GET_DEVICE_SYMBOL( name ) \
(PFN_##name)deviceProcAddr( m_device, #name );
#define VK_LOAD_DEVICE_SYMBOL( name ) \
m_symbols.name = VK_GET_DEVICE_SYMBOL( name );
#define VK_GET_INSTANCE_SYMBOL( name ) \
(PFN_##name)instanceProcAddr( instance, #name );
#define VK_LOAD_INSTANCE_SYMBOL( name ) \
m_symbols.name = VK_GET_INSTANCE_SYMBOL( name );
LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL )
LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL )
LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL )
LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL )
#undef VK_GET_DEVICE_SYMBOL
#undef VK_LOAD_DEVICE_SYMBOL
#undef VK_GET_INSTANCE_SYMBOL
#undef VK_LOAD_INSTANCE_SYMBOL
}
#endif
VkDevice m_device;
VkQueryPool m_query;
VkTimeDomainEXT m_timeDomain;
#if defined TRACY_VK_USE_SYMBOL_TABLE
VkSymbolTable m_symbols;
#endif
uint64_t m_deviation;
#ifdef _WIN32
int64_t m_qpcToNs;
#endif
int64_t m_prevCalibration;
uint8_t m_context;
std::atomic<uint64_t> m_head;
uint64_t m_tail;
unsigned int m_oldCnt;
unsigned int m_queryCount;
int64_t* m_res;
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
};
class VkCtxScope
{
public:
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
QueueItem *item;
if( depth > 0 && has_callstack() )
{
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
}
else
{
item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
}
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
QueueItem *item;
if( depth > 0 && has_callstack() )
{
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
}
else
{
item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
}
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline ~VkCtxScope()
{
if( !m_active ) return;
const auto queryId = m_ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
Profiler::QueueSerialFinish();
}
private:
const bool m_active;
VkCommandBuffer m_cmdbuf;
VkCtx* m_ctx;
};
#if defined TRACY_VK_USE_SYMBOL_TABLE
void* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false )
#else
void* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
#endif
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
#if defined TRACY_VK_USE_SYMBOL_TABLE
new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated );
#else
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
#endif
return ctx;
}
#if defined VK_EXT_host_query_reset
#if defined TRACY_VK_USE_SYMBOL_TABLE
void* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr )
#else
void* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
#endif
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
#if defined TRACY_VK_USE_SYMBOL_TABLE
new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr );
#else
new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct );
#endif
ctx->Name("Vulkan", 6);
return ctx;
}
#endif
void DestroyVkContext( void* ctx )
{
((VkCtx*)ctx)->~VkCtx();
tracy_free( ctx );
}
void CollectVkContext( void* ctx, VkCommandBuffer cmdbuf )
{
((VkCtx*)ctx)->Collect(cmdbuf);
}
void BeginVkZoneScope(void* zone, void* ctx, VkCommandBuffer cmdbuf, const char* name, size_t nameLen)
{
new(zone) tracy::VkCtxScope{ (tracy::VkCtx*)ctx, 0, 0, 0, 0, 0, name, nameLen, cmdbuf, true };
}
void EndVkZoneScope(void* zone)
{
((tracy::VkCtxScope*)zone)->~VkCtxScope();
}
}
using TracyVkCtx = tracy::VkCtx*;
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr );
#else
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
#endif
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true );
#else
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
#endif
#if defined VK_EXT_host_query_reset
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr );
#else
#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct );
#endif
#endif
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active )
#else
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active );
#endif
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
#ifdef TRACY_HAS_CALLSTACK
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, depth, active );
#else
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active )
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active )
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color )
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active )
#endif
#else
// Forward declarations to be used in engine
namespace tracy
{
#if defined TRACY_VK_USE_SYMBOL_TABLE
extern void* CreateVkContext(VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false);
#else
extern void* CreateVkContext(VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct);
#endif
#if defined VK_EXT_host_query_reset
#if defined TRACY_VK_USE_SYMBOL_TABLE
extern void* CreateVkContext(VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr);
#else
extern void* CreateVkContext(VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct);
#endif
#endif
extern void DestroyVkContext(void* ctx);
extern void CollectVkContext(void* ctx, VkCommandBuffer cmdbuf);
extern void BeginVkZoneScope(void* zone, void* ctx, VkCommandBuffer cmdbuf, const char* name, size_t nameLen);
extern void EndVkZoneScope(void* zone);
}
#endif
#endif

View File

@@ -73,7 +73,7 @@ namespace Flax.Build.NativeCpp
/// <summary>
/// The native C++ module build settings container.
/// </summary>
public sealed class BuildOptions
public sealed class BuildOptions : ICloneable
{
/// <summary>
/// The target that builds this module.
@@ -442,5 +442,26 @@ namespace Flax.Build.NativeCpp
SourcePaths.Clear();
}
}
/// <inheritdoc />
public object Clone()
{
var clone = new BuildOptions
{
Target = Target,
Platform = Platform,
Toolchain = Toolchain,
Architecture = Architecture,
Configuration = Configuration,
CompileEnv = (CompileEnvironment)CompileEnv.Clone(),
LinkEnv = (LinkEnvironment)LinkEnv.Clone(),
IntermediateFolder = IntermediateFolder,
OutputFolder = OutputFolder,
WorkingDirectory = WorkingDirectory,
HotReloadPostfix = HotReloadPostfix,
Flags = Flags,
};
return clone;
}
}
}