diff --git a/Source/Engine/Engine/Engine.cpp b/Source/Engine/Engine/Engine.cpp index 4ea18a85c..5e8224b4e 100644 --- a/Source/Engine/Engine/Engine.cpp +++ b/Source/Engine/Engine/Engine.cpp @@ -247,7 +247,6 @@ int32 Engine::Main(const Char* cmdLine) { OnDraw(); Time::OnEndDraw(); - FrameMark; } } @@ -397,6 +396,11 @@ void Engine::OnLateUpdate() void Engine::OnDraw() { +#if COMPILE_WITH_PROFILER + // Auto-enable GPU events when Tracy got connected + if (!ProfilerGPU::EventsEnabled && TracyIsConnected) + ProfilerGPU::EventsEnabled = true; +#endif PROFILE_CPU_NAMED("Draw"); // Begin frame rendering @@ -411,6 +415,7 @@ void Engine::OnDraw() device->Draw(); // End frame rendering + FrameMark; #if COMPILE_WITH_PROFILER ProfilerGPU::EndFrame(); #endif diff --git a/Source/Engine/Graphics/GPUContext.cpp b/Source/Engine/Graphics/GPUContext.cpp index 9c8a60b4b..fdeca122b 100644 --- a/Source/Engine/Graphics/GPUContext.cpp +++ b/Source/Engine/Graphics/GPUContext.cpp @@ -69,6 +69,10 @@ void GPUContext::FrameEnd() FlushState(); } +void GPUContext::OnPresent() +{ +} + void GPUContext::BindSR(int32 slot, GPUTexture* t) { ASSERT_LOW_LAYER(t == nullptr || t->ResidentMipLevels() == 0 || t->IsShaderResource()); diff --git a/Source/Engine/Graphics/GPUContext.h b/Source/Engine/Graphics/GPUContext.h index aa6003f87..a042e3f83 100644 --- a/Source/Engine/Graphics/GPUContext.h +++ b/Source/Engine/Graphics/GPUContext.h @@ -148,6 +148,11 @@ public: /// virtual void FrameEnd(); + /// + /// Called after performing final swapchain presentation and submitting all GPU commands. + /// + virtual void OnPresent(); + public: #if GPU_ALLOW_PROFILE_EVENTS /// diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index d7f64de45..eda66d95e 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -646,6 +646,7 @@ void GPUDevice::DrawEnd() const double presentEnd = Platform::GetTimeSeconds(); ProfilerGPU::OnPresentTime((float)((presentEnd - presentStart) * 1000.0)); #endif + GetMainContext()->OnPresent(); _wasVSyncUsed = anyVSync; _isRendering = false; diff --git a/Source/Engine/Graphics/Graphics.Build.cs b/Source/Engine/Graphics/Graphics.Build.cs index 212f975f0..e1c77d844 100644 --- a/Source/Engine/Graphics/Graphics.Build.cs +++ b/Source/Engine/Graphics/Graphics.Build.cs @@ -20,6 +20,12 @@ public abstract class GraphicsDeviceBaseModule : EngineModule // Enables GPU diagnostic tools (debug layer etc.) options.PublicDefinitions.Add("GPU_ENABLE_DIAGNOSTICS"); } + + if (Profiler.Use(options) && tracy.GPU && true) + { + // Enables GPU profiling with Tracy + options.PrivateDefinitions.Add("GPU_ENABLE_TRACY"); + } } /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 22780e3ec..b68f5e595 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -65,10 +65,17 @@ GPUContextDX11::GPUContextDX11(GPUDeviceDX11* device, ID3D11DeviceContext* conte _maxUASlots = GPU_MAX_UA_BINDED; if (_device->GetRendererType() != RendererType::DirectX11) _maxUASlots = 1; + +#if GPU_ENABLE_TRACY + _tracyContext = tracy::CreateD3D11Context(device->GetDevice(), context); +#endif } GPUContextDX11::~GPUContextDX11() { +#if GPU_ENABLE_TRACY + tracy::DestroyD3D11Context(_tracyContext); +#endif #if GPU_ALLOW_PROFILE_EVENTS SAFE_RELEASE(_userDefinedAnnotations); #endif @@ -139,16 +146,35 @@ void GPUContextDX11::FrameBegin() _context->CSSetSamplers(0, ARRAY_COUNT(samplers), samplers); } +void GPUContextDX11::OnPresent() +{ + GPUContext::OnPresent(); + +#if GPU_ENABLE_TRACY + tracy::CollectD3D11Context(_tracyContext); +#endif +} + #if GPU_ALLOW_PROFILE_EVENTS void GPUContextDX11::EventBegin(const Char* name) { if (_userDefinedAnnotations) _userDefinedAnnotations->BeginEvent(name); + +#if GPU_ENABLE_TRACY + char buffer[60]; + int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer)); + tracy::BeginD3D11ZoneScope(_tracyZone, _tracyContext, buffer, bufferSize); +#endif } void GPUContextDX11::EventEnd() { +#if GPU_ENABLE_TRACY + tracy::EndD3D11ZoneScope(_tracyZone); +#endif + if (_userDefinedAnnotations) _userDefinedAnnotations->EndEvent(); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index 4941af978..48de69b3f 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -6,6 +6,7 @@ #include "GPUDeviceDX11.h" #include "GPUPipelineStateDX11.h" #include "../IncludeDirectXHeaders.h" +#include #if GRAPHICS_API_DIRECTX11 @@ -23,6 +24,10 @@ private: ID3D11DeviceContext* _context; #if GPU_ALLOW_PROFILE_EVENTS ID3DUserDefinedAnnotation* _userDefinedAnnotations; +#endif +#if COMPILE_WITH_PROFILER + void* _tracyContext; + byte _tracyZone[TracyD3D11ZoneSize]; #endif int32 _maxUASlots; @@ -110,6 +115,7 @@ public: // [GPUContext] void FrameBegin() override; + void OnPresent() override; #if GPU_ALLOW_PROFILE_EVENTS void EventBegin(const Char* name) override; void EventEnd() override; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 9cb285ac1..6d06231ee 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -99,10 +99,16 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty #if GPU_ENABLE_RESOURCE_NAMING _commandList->SetName(TEXT("GPUContextDX12::CommandList")); #endif +#if GPU_ENABLE_TRACY + _tracyContext = tracy::CreateD3D12Context(device->GetDevice(), _device->GetCommandQueue()->GetCommandQueue()); +#endif } GPUContextDX12::~GPUContextDX12() { +#if GPU_ENABLE_TRACY + tracy::DestroyD3D12Context(_tracyContext); +#endif DX_SAFE_RELEASE_CHECK(_commandList, 0); } @@ -706,6 +712,15 @@ void GPUContextDX12::FrameEnd() FrameFenceValues[0] = Execute(false); } +void GPUContextDX12::OnPresent() +{ + GPUContext::OnPresent(); + +#if GPU_ENABLE_TRACY + tracy::CollectD3D12Context(_tracyContext); +#endif +} + #if GPU_ALLOW_PROFILE_EVENTS void GPUContextDX12::EventBegin(const Char* name) @@ -713,10 +728,22 @@ void GPUContextDX12::EventBegin(const Char* name) #if USE_PIX PIXBeginEvent(_commandList, 0, name); #endif + +#if GPU_ENABLE_TRACY + char buffer[60]; + int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer)); + auto& zone = _tracyZones.AddOne(); + tracy::BeginD3D12ZoneScope(zone.Data, _tracyContext, _commandList, buffer, bufferSize); +#endif } void GPUContextDX12::EventEnd() { +#if GPU_ENABLE_TRACY + tracy::EndD3D12ZoneScope(_tracyZones.Last().Data); + _tracyZones.RemoveLast(); +#endif + #if USE_PIX PIXEndEvent(_commandList); #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h index 3fa0f7930..917b68165 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h @@ -6,6 +6,7 @@ #include "IShaderResourceDX12.h" #include "DescriptorHeapDX12.h" #include "../IncludeDirectXHeaders.h" +#include #if GRAPHICS_API_DIRECTX12 @@ -71,6 +72,12 @@ private: GPUConstantBufferDX12* _cbHandles[GPU_MAX_CB_BINDED]; GPUSamplerDX12* _samplers[GPU_MAX_SAMPLER_BINDED - GPU_STATIC_SAMPLERS_COUNT]; +#if COMPILE_WITH_PROFILER + void* _tracyContext; + struct TracyZone { byte Data[TracyD3D12ZoneSize]; }; + Array> _tracyZones; +#endif + public: GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE type); @@ -154,6 +161,7 @@ public: // [GPUContext] void FrameBegin() override; void FrameEnd() override; + void OnPresent() override; #if GPU_ALLOW_PROFILE_EVENTS void EventBegin(const Char* name) override; void EventEnd() override; diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp index 029d8ee1d..36eacccf9 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.cpp @@ -49,10 +49,19 @@ void CmdBufferVulkan::End() PROFILE_CPU(); ASSERT(IsOutsideRenderPass()); -#if GPU_ALLOW_PROFILE_EVENTS && VK_EXT_debug_utils +#if GPU_ALLOW_PROFILE_EVENTS // End remaining events while (_eventsBegin--) - vkCmdEndDebugUtilsLabelEXT(GetHandle()); + { +#if VK_EXT_debug_utils + if (vkCmdEndDebugUtilsLabelEXT) + vkCmdEndDebugUtilsLabelEXT(GetHandle()); +#endif +#if GPU_ENABLE_TRACY + tracy::EndVkZoneScope(_tracyZones.Last().Data); + _tracyZones.RemoveLast(); +#endif + } #endif VALIDATE_VULKAN_RESULT(vkEndCommandBuffer(GetHandle())); @@ -85,39 +94,43 @@ void CmdBufferVulkan::EndRenderPass() #if GPU_ALLOW_PROFILE_EVENTS -void CmdBufferVulkan::BeginEvent(const Char* name) +void CmdBufferVulkan::BeginEvent(const Char* name, void* tracyContext) { -#if VK_EXT_debug_utils - if (!vkCmdBeginDebugUtilsLabelEXT) - return; - _eventsBegin++; - // Convert to ANSI - char buffer[101]; - int32 i = 0; - while (i < 100 && name[i]) - { - buffer[i] = (char)name[i]; - i++; - } - buffer[i] = 0; + char buffer[60]; + int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer)); - VkDebugUtilsLabelEXT label; - RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT); - label.pLabelName = buffer; - vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label); +#if GPU_ENABLE_TRACY + auto& zone = _tracyZones.AddOne(); + tracy::BeginVkZoneScope(zone.Data, tracyContext, GetHandle(), buffer, bufferSize); +#endif + +#if VK_EXT_debug_utils + if (vkCmdBeginDebugUtilsLabelEXT) + { + VkDebugUtilsLabelEXT label; + RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT); + label.pLabelName = buffer; + vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label); + } #endif } void CmdBufferVulkan::EndEvent() { -#if VK_EXT_debug_utils - if (_eventsBegin == 0 || !vkCmdEndDebugUtilsLabelEXT) + if (_eventsBegin == 0) return; _eventsBegin--; - vkCmdEndDebugUtilsLabelEXT(GetHandle()); +#if VK_EXT_debug_utils + if (vkCmdEndDebugUtilsLabelEXT) + vkCmdEndDebugUtilsLabelEXT(GetHandle()); +#endif + +#if GPU_ENABLE_TRACY + tracy::EndVkZoneScope(_tracyZones.Last().Data); + _tracyZones.RemoveLast(); #endif } diff --git a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h index 15cd616ff..7cb3ee104 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/CmdBufferVulkan.h @@ -5,6 +5,7 @@ #include "GPUDeviceVulkan.h" #include "Engine/Core/Types/BaseTypes.h" #include "Engine/Core/Collections/Array.h" +#include #if GRAPHICS_API_VULKAN @@ -42,6 +43,8 @@ private: FenceVulkan* _fence; #if GPU_ALLOW_PROFILE_EVENTS int32 _eventsBegin = 0; + struct TracyZone { byte Data[TracyVulkanZoneSize]; }; + Array> _tracyZones; #endif // The latest value when command buffer was submitted. @@ -129,7 +132,7 @@ public: } #if GPU_ALLOW_PROFILE_EVENTS - void BeginEvent(const Char* name); + void BeginEvent(const Char* name, void* tracyContext); void EndEvent(); #endif diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp index d0ecf9358..99b3712ae 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.cpp @@ -4,6 +4,7 @@ #include "GPUContextVulkan.h" #include "CmdBufferVulkan.h" +#include "GPUAdapterVulkan.h" #include "RenderToolsVulkan.h" #include "Engine/Core/Math/Color.h" #include "Engine/Core/Math/Rectangle.h" @@ -15,6 +16,7 @@ #include "Engine/Profiler/RenderStats.h" #include "GPUShaderProgramVulkan.h" #include "GPUTextureVulkan.h" +#include "QueueVulkan.h" #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Debug/Exceptions/NotImplementedException.h" @@ -107,10 +109,37 @@ GPUContextVulkan::GPUContextVulkan(GPUDeviceVulkan* device, QueueVulkan* queue) _handlesSizes[(int32)SpirvShaderResourceBindingType::SRV] = GPU_MAX_SR_BINDED; _handlesSizes[(int32)SpirvShaderResourceBindingType::UAV] = GPU_MAX_UA_BINDED; #endif + +#if GPU_ENABLE_TRACY +#if VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset + // Use calibrated timestamps extension + if (vkResetQueryPoolEXT && vkGetCalibratedTimestampsEXT) + { + _tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, vkResetQueryPoolEXT, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT); + } + else +#endif + { + // Use immediate command buffer for Tracy initialization + VkCommandBufferAllocateInfo cmdInfo; + RenderToolsVulkan::ZeroStruct(cmdInfo, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO); + cmdInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmdInfo.commandPool = _cmdBufferManager->GetHandle(); + cmdInfo.commandBufferCount = 1; + VkCommandBuffer tracyCmdBuffer; + vkAllocateCommandBuffers(_device->Device, &cmdInfo, &tracyCmdBuffer); + _tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, _queue->GetHandle(), tracyCmdBuffer, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT); + vkQueueWaitIdle(_queue->GetHandle()); + vkFreeCommandBuffers(_device->Device, _cmdBufferManager->GetHandle(), 1, &tracyCmdBuffer); + } +#endif } GPUContextVulkan::~GPUContextVulkan() { +#if GPU_ENABLE_TRACY + tracy::DestroyVkContext(_tracyContext); +#endif for (int32 i = 0; i < _descriptorPools.Count(); i++) { _descriptorPools[i].ClearDelete(); @@ -679,15 +708,9 @@ void GPUContextVulkan::OnDrawCall() // Bind descriptors sets to the graphics pipeline if (pipelineState->HasDescriptorsPerStageMask) { - vkCmdBindDescriptorSets( - cmdBuffer->GetHandle(), - VK_PIPELINE_BIND_POINT_GRAPHICS, - pipelineState->GetLayout()->Handle, - 0, - pipelineState->DescriptorSetHandles.Count(), - pipelineState->DescriptorSetHandles.Get(), - pipelineState->DynamicOffsets.Count(), - pipelineState->DynamicOffsets.Get()); + auto& descriptorSets = pipelineState->DescriptorSetHandles; + auto& dynamicOffsets = pipelineState->DynamicOffsets; + vkCmdBindDescriptorSets(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineState->GetLayout()->Handle, 0, descriptorSets.Count(), descriptorSets.Get(), dynamicOffsets.Count(), dynamicOffsets.Get()); } _rtDirtyFlag = false; @@ -748,6 +771,11 @@ void GPUContextVulkan::FrameEnd() // Execute any queued layout transitions that weren't already handled by the render pass FlushBarriers(); +#if GPU_ENABLE_TRACY + if (cmdBuffer) + tracy::CollectVkContext(_tracyContext, cmdBuffer->GetHandle()); +#endif + // Base GPUContext::FrameEnd(); } @@ -757,7 +785,12 @@ void GPUContextVulkan::FrameEnd() void GPUContextVulkan::EventBegin(const Char* name) { const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer(); - cmdBuffer->BeginEvent(name); +#if COMPILE_WITH_PROFILER + void* tracyContext = _tracyContext; +#else + void* tracyContext = nullptr; +#endif + cmdBuffer->BeginEvent(name, tracyContext); } void GPUContextVulkan::EventEnd() diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h index b5be52461..73aa5a52f 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUContextVulkan.h @@ -94,6 +94,9 @@ private: #if ENABLE_ASSERTION uint32 _handlesSizes[(int32)SpirvShaderResourceBindingType::MAX]; #endif +#if COMPILE_WITH_PROFILER + void* _tracyContext; +#endif typedef Array DescriptorPoolArray; Dictionary _descriptorPools; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp index 14cca0a6d..ad99d8850 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUDeviceVulkan.Layers.cpp @@ -62,6 +62,10 @@ static const char* GDeviceExtensions[] = #endif #if VK_KHR_sampler_mirror_clamp_to_edge VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, +#endif +#if GPU_ENABLE_TRACY && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset + VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, #endif nullptr }; diff --git a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp index 21971c5ca..70d422cd9 100644 --- a/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp +++ b/Source/Engine/GraphicsDevice/Vulkan/GPUSwapChainVulkan.cpp @@ -424,6 +424,7 @@ GPUSwapChainVulkan::Status GPUSwapChainVulkan::Present(QueueVulkan* presentQueue { if (_currentImageIndex == -1) return Status::Ok; + PROFILE_CPU_NAMED("vkQueuePresentKHR"); VkPresentInfoKHR presentInfo; RenderToolsVulkan::ZeroStruct(presentInfo, VK_STRUCTURE_TYPE_PRESENT_INFO_KHR); @@ -506,7 +507,7 @@ int32 GPUSwapChainVulkan::TryPresent(Function int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore) { - PROFILE_CPU(); + PROFILE_CPU_NAMED("vkAcquireNextImageKHR"); ASSERT(_swapChain && _backBuffers.HasItems()); uint32 imageIndex = _currentImageIndex; @@ -514,13 +515,7 @@ int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore) _semaphoreIndex = (_semaphoreIndex + 1) % _backBuffers.Count(); const auto semaphore = _backBuffers[_semaphoreIndex].ImageAcquiredSemaphore; - const VkResult result = vkAcquireNextImageKHR( - _device->Device, - _swapChain, - UINT64_MAX, - semaphore->GetHandle(), - VK_NULL_HANDLE, - &imageIndex); + const VkResult result = vkAcquireNextImageKHR(_device->Device, _swapChain, UINT64_MAX, semaphore->GetHandle(), VK_NULL_HANDLE, &imageIndex); if (result == VK_ERROR_OUT_OF_DATE_KHR) { _semaphoreIndex = prevSemaphoreIndex; diff --git a/Source/Engine/Platform/Base/StringUtilsBase.cpp b/Source/Engine/Platform/Base/StringUtilsBase.cpp index 6b9c6f861..adda1b5a7 100644 --- a/Source/Engine/Platform/Base/StringUtilsBase.cpp +++ b/Source/Engine/Platform/Base/StringUtilsBase.cpp @@ -16,6 +16,18 @@ constexpr char DirectorySeparatorChar = '\\'; constexpr char AltDirectorySeparatorChar = '/'; constexpr char VolumeSeparatorChar = ':'; +int32 StringUtils::Copy(char* dst, const Char* src, int32 count) +{ + int32 i = 0; + while (i < count && src[i]) + { + dst[i] = (char)src[i]; + i++; + } + dst[i] = 0; + return i; +} + const Char* StringUtils::FindIgnoreCase(const Char* str, const Char* toFind) { if (toFind == nullptr || str == nullptr) diff --git a/Source/Engine/Platform/StringUtils.h b/Source/Engine/Platform/StringUtils.h index 5d712070b..3e1dc5660 100644 --- a/Source/Engine/Platform/StringUtils.h +++ b/Source/Engine/Platform/StringUtils.h @@ -125,6 +125,9 @@ public: // Copies the string (count is maximum amount of characters to copy). static Char* Copy(Char* dst, const Char* src, int32 count); + // Copies the string (count is maximum amount of characters to copy). Returns amount of copied elements (excluding null terminator character). + static int32 Copy(char* dst, const Char* src, int32 count); + // Finds specific sub-string in the input string. Returns the first found position in the input string or nulll if failed. static const Char* Find(const Char* str, const Char* toFind); diff --git a/Source/ThirdParty/tracy/client/TracyProfiler.cpp b/Source/ThirdParty/tracy/client/TracyProfiler.cpp index 837b36cc3..fe24f9309 100644 --- a/Source/ThirdParty/tracy/client/TracyProfiler.cpp +++ b/Source/ThirdParty/tracy/client/TracyProfiler.cpp @@ -1400,6 +1400,7 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } # endif #endif +TRACY_API bool IsConnected() { return GetProfiler().IsConnected(); } TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } diff --git a/Source/ThirdParty/tracy/common/TracyColor.hpp b/Source/ThirdParty/tracy/common/TracyColor.hpp new file mode 100644 index 000000000..4825c0fba --- /dev/null +++ b/Source/ThirdParty/tracy/common/TracyColor.hpp @@ -0,0 +1,690 @@ +#ifndef __TRACYCOLOR_HPP__ +#define __TRACYCOLOR_HPP__ + +namespace tracy +{ +struct Color +{ +enum ColorType +{ + Snow = 0xfffafa, + GhostWhite = 0xf8f8ff, + WhiteSmoke = 0xf5f5f5, + Gainsboro = 0xdcdcdc, + FloralWhite = 0xfffaf0, + OldLace = 0xfdf5e6, + Linen = 0xfaf0e6, + AntiqueWhite = 0xfaebd7, + PapayaWhip = 0xffefd5, + BlanchedAlmond = 0xffebcd, + Bisque = 0xffe4c4, + PeachPuff = 0xffdab9, + NavajoWhite = 0xffdead, + Moccasin = 0xffe4b5, + Cornsilk = 0xfff8dc, + Ivory = 0xfffff0, + LemonChiffon = 0xfffacd, + Seashell = 0xfff5ee, + Honeydew = 0xf0fff0, + MintCream = 0xf5fffa, + Azure = 0xf0ffff, + AliceBlue = 0xf0f8ff, + Lavender = 0xe6e6fa, + LavenderBlush = 0xfff0f5, + MistyRose = 0xffe4e1, + White = 0xffffff, + Black = 0x000000, + DarkSlateGray = 0x2f4f4f, + DarkSlateGrey = 0x2f4f4f, + DimGray = 0x696969, + DimGrey = 0x696969, + SlateGray = 0x708090, + SlateGrey = 0x708090, + LightSlateGray = 0x778899, + LightSlateGrey = 0x778899, + Gray = 0xbebebe, + Grey = 0xbebebe, + X11Gray = 0xbebebe, + X11Grey = 0xbebebe, + WebGray = 0x808080, + WebGrey = 0x808080, + LightGrey = 0xd3d3d3, + LightGray = 0xd3d3d3, + MidnightBlue = 0x191970, + Navy = 0x000080, + NavyBlue = 0x000080, + CornflowerBlue = 0x6495ed, + DarkSlateBlue = 0x483d8b, + SlateBlue = 0x6a5acd, + MediumSlateBlue = 0x7b68ee, + LightSlateBlue = 0x8470ff, + MediumBlue = 0x0000cd, + RoyalBlue = 0x4169e1, + Blue = 0x0000ff, + DodgerBlue = 0x1e90ff, + DeepSkyBlue = 0x00bfff, + SkyBlue = 0x87ceeb, + LightSkyBlue = 0x87cefa, + SteelBlue = 0x4682b4, + LightSteelBlue = 0xb0c4de, + LightBlue = 0xadd8e6, + PowderBlue = 0xb0e0e6, + PaleTurquoise = 0xafeeee, + DarkTurquoise = 0x00ced1, + MediumTurquoise = 0x48d1cc, + Turquoise = 0x40e0d0, + Cyan = 0x00ffff, + Aqua = 0x00ffff, + LightCyan = 0xe0ffff, + CadetBlue = 0x5f9ea0, + MediumAquamarine = 0x66cdaa, + Aquamarine = 0x7fffd4, + DarkGreen = 0x006400, + DarkOliveGreen = 0x556b2f, + DarkSeaGreen = 0x8fbc8f, + SeaGreen = 0x2e8b57, + MediumSeaGreen = 0x3cb371, + LightSeaGreen = 0x20b2aa, + PaleGreen = 0x98fb98, + SpringGreen = 0x00ff7f, + LawnGreen = 0x7cfc00, + Green = 0x00ff00, + Lime = 0x00ff00, + X11Green = 0x00ff00, + WebGreen = 0x008000, + Chartreuse = 0x7fff00, + MediumSpringGreen = 0x00fa9a, + GreenYellow = 0xadff2f, + LimeGreen = 0x32cd32, + YellowGreen = 0x9acd32, + ForestGreen = 0x228b22, + OliveDrab = 0x6b8e23, + DarkKhaki = 0xbdb76b, + Khaki = 0xf0e68c, + PaleGoldenrod = 0xeee8aa, + LightGoldenrodYellow = 0xfafad2, + LightYellow = 0xffffe0, + Yellow = 0xffff00, + Gold = 0xffd700, + LightGoldenrod = 0xeedd82, + Goldenrod = 0xdaa520, + DarkGoldenrod = 0xb8860b, + RosyBrown = 0xbc8f8f, + IndianRed = 0xcd5c5c, + SaddleBrown = 0x8b4513, + Sienna = 0xa0522d, + Peru = 0xcd853f, + Burlywood = 0xdeb887, + Beige = 0xf5f5dc, + Wheat = 0xf5deb3, + SandyBrown = 0xf4a460, + Tan = 0xd2b48c, + Chocolate = 0xd2691e, + Firebrick = 0xb22222, + Brown = 0xa52a2a, + DarkSalmon = 0xe9967a, + Salmon = 0xfa8072, + LightSalmon = 0xffa07a, + Orange = 0xffa500, + DarkOrange = 0xff8c00, + Coral = 0xff7f50, + LightCoral = 0xf08080, + Tomato = 0xff6347, + OrangeRed = 0xff4500, + Red = 0xff0000, + HotPink = 0xff69b4, + DeepPink = 0xff1493, + Pink = 0xffc0cb, + LightPink = 0xffb6c1, + PaleVioletRed = 0xdb7093, + Maroon = 0xb03060, + X11Maroon = 0xb03060, + WebMaroon = 0x800000, + MediumVioletRed = 0xc71585, + VioletRed = 0xd02090, + Magenta = 0xff00ff, + Fuchsia = 0xff00ff, + Violet = 0xee82ee, + Plum = 0xdda0dd, + Orchid = 0xda70d6, + MediumOrchid = 0xba55d3, + DarkOrchid = 0x9932cc, + DarkViolet = 0x9400d3, + BlueViolet = 0x8a2be2, + Purple = 0xa020f0, + X11Purple = 0xa020f0, + WebPurple = 0x800080, + MediumPurple = 0x9370db, + Thistle = 0xd8bfd8, + Snow1 = 0xfffafa, + Snow2 = 0xeee9e9, + Snow3 = 0xcdc9c9, + Snow4 = 0x8b8989, + Seashell1 = 0xfff5ee, + Seashell2 = 0xeee5de, + Seashell3 = 0xcdc5bf, + Seashell4 = 0x8b8682, + AntiqueWhite1 = 0xffefdb, + AntiqueWhite2 = 0xeedfcc, + AntiqueWhite3 = 0xcdc0b0, + AntiqueWhite4 = 0x8b8378, + Bisque1 = 0xffe4c4, + Bisque2 = 0xeed5b7, + Bisque3 = 0xcdb79e, + Bisque4 = 0x8b7d6b, + PeachPuff1 = 0xffdab9, + PeachPuff2 = 0xeecbad, + PeachPuff3 = 0xcdaf95, + PeachPuff4 = 0x8b7765, + NavajoWhite1 = 0xffdead, + NavajoWhite2 = 0xeecfa1, + NavajoWhite3 = 0xcdb38b, + NavajoWhite4 = 0x8b795e, + LemonChiffon1 = 0xfffacd, + LemonChiffon2 = 0xeee9bf, + LemonChiffon3 = 0xcdc9a5, + LemonChiffon4 = 0x8b8970, + Cornsilk1 = 0xfff8dc, + Cornsilk2 = 0xeee8cd, + Cornsilk3 = 0xcdc8b1, + Cornsilk4 = 0x8b8878, + Ivory1 = 0xfffff0, + Ivory2 = 0xeeeee0, + Ivory3 = 0xcdcdc1, + Ivory4 = 0x8b8b83, + Honeydew1 = 0xf0fff0, + Honeydew2 = 0xe0eee0, + Honeydew3 = 0xc1cdc1, + Honeydew4 = 0x838b83, + LavenderBlush1 = 0xfff0f5, + LavenderBlush2 = 0xeee0e5, + LavenderBlush3 = 0xcdc1c5, + LavenderBlush4 = 0x8b8386, + MistyRose1 = 0xffe4e1, + MistyRose2 = 0xeed5d2, + MistyRose3 = 0xcdb7b5, + MistyRose4 = 0x8b7d7b, + Azure1 = 0xf0ffff, + Azure2 = 0xe0eeee, + Azure3 = 0xc1cdcd, + Azure4 = 0x838b8b, + SlateBlue1 = 0x836fff, + SlateBlue2 = 0x7a67ee, + SlateBlue3 = 0x6959cd, + SlateBlue4 = 0x473c8b, + RoyalBlue1 = 0x4876ff, + RoyalBlue2 = 0x436eee, + RoyalBlue3 = 0x3a5fcd, + RoyalBlue4 = 0x27408b, + Blue1 = 0x0000ff, + Blue2 = 0x0000ee, + Blue3 = 0x0000cd, + Blue4 = 0x00008b, + DodgerBlue1 = 0x1e90ff, + DodgerBlue2 = 0x1c86ee, + DodgerBlue3 = 0x1874cd, + DodgerBlue4 = 0x104e8b, + SteelBlue1 = 0x63b8ff, + SteelBlue2 = 0x5cacee, + SteelBlue3 = 0x4f94cd, + SteelBlue4 = 0x36648b, + DeepSkyBlue1 = 0x00bfff, + DeepSkyBlue2 = 0x00b2ee, + DeepSkyBlue3 = 0x009acd, + DeepSkyBlue4 = 0x00688b, + SkyBlue1 = 0x87ceff, + SkyBlue2 = 0x7ec0ee, + SkyBlue3 = 0x6ca6cd, + SkyBlue4 = 0x4a708b, + LightSkyBlue1 = 0xb0e2ff, + LightSkyBlue2 = 0xa4d3ee, + LightSkyBlue3 = 0x8db6cd, + LightSkyBlue4 = 0x607b8b, + SlateGray1 = 0xc6e2ff, + SlateGray2 = 0xb9d3ee, + SlateGray3 = 0x9fb6cd, + SlateGray4 = 0x6c7b8b, + LightSteelBlue1 = 0xcae1ff, + LightSteelBlue2 = 0xbcd2ee, + LightSteelBlue3 = 0xa2b5cd, + LightSteelBlue4 = 0x6e7b8b, + LightBlue1 = 0xbfefff, + LightBlue2 = 0xb2dfee, + LightBlue3 = 0x9ac0cd, + LightBlue4 = 0x68838b, + LightCyan1 = 0xe0ffff, + LightCyan2 = 0xd1eeee, + LightCyan3 = 0xb4cdcd, + LightCyan4 = 0x7a8b8b, + PaleTurquoise1 = 0xbbffff, + PaleTurquoise2 = 0xaeeeee, + PaleTurquoise3 = 0x96cdcd, + PaleTurquoise4 = 0x668b8b, + CadetBlue1 = 0x98f5ff, + CadetBlue2 = 0x8ee5ee, + CadetBlue3 = 0x7ac5cd, + CadetBlue4 = 0x53868b, + Turquoise1 = 0x00f5ff, + Turquoise2 = 0x00e5ee, + Turquoise3 = 0x00c5cd, + Turquoise4 = 0x00868b, + Cyan1 = 0x00ffff, + Cyan2 = 0x00eeee, + Cyan3 = 0x00cdcd, + Cyan4 = 0x008b8b, + DarkSlateGray1 = 0x97ffff, + DarkSlateGray2 = 0x8deeee, + DarkSlateGray3 = 0x79cdcd, + DarkSlateGray4 = 0x528b8b, + Aquamarine1 = 0x7fffd4, + Aquamarine2 = 0x76eec6, + Aquamarine3 = 0x66cdaa, + Aquamarine4 = 0x458b74, + DarkSeaGreen1 = 0xc1ffc1, + DarkSeaGreen2 = 0xb4eeb4, + DarkSeaGreen3 = 0x9bcd9b, + DarkSeaGreen4 = 0x698b69, + SeaGreen1 = 0x54ff9f, + SeaGreen2 = 0x4eee94, + SeaGreen3 = 0x43cd80, + SeaGreen4 = 0x2e8b57, + PaleGreen1 = 0x9aff9a, + PaleGreen2 = 0x90ee90, + PaleGreen3 = 0x7ccd7c, + PaleGreen4 = 0x548b54, + SpringGreen1 = 0x00ff7f, + SpringGreen2 = 0x00ee76, + SpringGreen3 = 0x00cd66, + SpringGreen4 = 0x008b45, + Green1 = 0x00ff00, + Green2 = 0x00ee00, + Green3 = 0x00cd00, + Green4 = 0x008b00, + Chartreuse1 = 0x7fff00, + Chartreuse2 = 0x76ee00, + Chartreuse3 = 0x66cd00, + Chartreuse4 = 0x458b00, + OliveDrab1 = 0xc0ff3e, + OliveDrab2 = 0xb3ee3a, + OliveDrab3 = 0x9acd32, + OliveDrab4 = 0x698b22, + DarkOliveGreen1 = 0xcaff70, + DarkOliveGreen2 = 0xbcee68, + DarkOliveGreen3 = 0xa2cd5a, + DarkOliveGreen4 = 0x6e8b3d, + Khaki1 = 0xfff68f, + Khaki2 = 0xeee685, + Khaki3 = 0xcdc673, + Khaki4 = 0x8b864e, + LightGoldenrod1 = 0xffec8b, + LightGoldenrod2 = 0xeedc82, + LightGoldenrod3 = 0xcdbe70, + LightGoldenrod4 = 0x8b814c, + LightYellow1 = 0xffffe0, + LightYellow2 = 0xeeeed1, + LightYellow3 = 0xcdcdb4, + LightYellow4 = 0x8b8b7a, + Yellow1 = 0xffff00, + Yellow2 = 0xeeee00, + Yellow3 = 0xcdcd00, + Yellow4 = 0x8b8b00, + Gold1 = 0xffd700, + Gold2 = 0xeec900, + Gold3 = 0xcdad00, + Gold4 = 0x8b7500, + Goldenrod1 = 0xffc125, + Goldenrod2 = 0xeeb422, + Goldenrod3 = 0xcd9b1d, + Goldenrod4 = 0x8b6914, + DarkGoldenrod1 = 0xffb90f, + DarkGoldenrod2 = 0xeead0e, + DarkGoldenrod3 = 0xcd950c, + DarkGoldenrod4 = 0x8b6508, + RosyBrown1 = 0xffc1c1, + RosyBrown2 = 0xeeb4b4, + RosyBrown3 = 0xcd9b9b, + RosyBrown4 = 0x8b6969, + IndianRed1 = 0xff6a6a, + IndianRed2 = 0xee6363, + IndianRed3 = 0xcd5555, + IndianRed4 = 0x8b3a3a, + Sienna1 = 0xff8247, + Sienna2 = 0xee7942, + Sienna3 = 0xcd6839, + Sienna4 = 0x8b4726, + Burlywood1 = 0xffd39b, + Burlywood2 = 0xeec591, + Burlywood3 = 0xcdaa7d, + Burlywood4 = 0x8b7355, + Wheat1 = 0xffe7ba, + Wheat2 = 0xeed8ae, + Wheat3 = 0xcdba96, + Wheat4 = 0x8b7e66, + Tan1 = 0xffa54f, + Tan2 = 0xee9a49, + Tan3 = 0xcd853f, + Tan4 = 0x8b5a2b, + Chocolate1 = 0xff7f24, + Chocolate2 = 0xee7621, + Chocolate3 = 0xcd661d, + Chocolate4 = 0x8b4513, + Firebrick1 = 0xff3030, + Firebrick2 = 0xee2c2c, + Firebrick3 = 0xcd2626, + Firebrick4 = 0x8b1a1a, + Brown1 = 0xff4040, + Brown2 = 0xee3b3b, + Brown3 = 0xcd3333, + Brown4 = 0x8b2323, + Salmon1 = 0xff8c69, + Salmon2 = 0xee8262, + Salmon3 = 0xcd7054, + Salmon4 = 0x8b4c39, + LightSalmon1 = 0xffa07a, + LightSalmon2 = 0xee9572, + LightSalmon3 = 0xcd8162, + LightSalmon4 = 0x8b5742, + Orange1 = 0xffa500, + Orange2 = 0xee9a00, + Orange3 = 0xcd8500, + Orange4 = 0x8b5a00, + DarkOrange1 = 0xff7f00, + DarkOrange2 = 0xee7600, + DarkOrange3 = 0xcd6600, + DarkOrange4 = 0x8b4500, + Coral1 = 0xff7256, + Coral2 = 0xee6a50, + Coral3 = 0xcd5b45, + Coral4 = 0x8b3e2f, + Tomato1 = 0xff6347, + Tomato2 = 0xee5c42, + Tomato3 = 0xcd4f39, + Tomato4 = 0x8b3626, + OrangeRed1 = 0xff4500, + OrangeRed2 = 0xee4000, + OrangeRed3 = 0xcd3700, + OrangeRed4 = 0x8b2500, + Red1 = 0xff0000, + Red2 = 0xee0000, + Red3 = 0xcd0000, + Red4 = 0x8b0000, + DeepPink1 = 0xff1493, + DeepPink2 = 0xee1289, + DeepPink3 = 0xcd1076, + DeepPink4 = 0x8b0a50, + HotPink1 = 0xff6eb4, + HotPink2 = 0xee6aa7, + HotPink3 = 0xcd6090, + HotPink4 = 0x8b3a62, + Pink1 = 0xffb5c5, + Pink2 = 0xeea9b8, + Pink3 = 0xcd919e, + Pink4 = 0x8b636c, + LightPink1 = 0xffaeb9, + LightPink2 = 0xeea2ad, + LightPink3 = 0xcd8c95, + LightPink4 = 0x8b5f65, + PaleVioletRed1 = 0xff82ab, + PaleVioletRed2 = 0xee799f, + PaleVioletRed3 = 0xcd6889, + PaleVioletRed4 = 0x8b475d, + Maroon1 = 0xff34b3, + Maroon2 = 0xee30a7, + Maroon3 = 0xcd2990, + Maroon4 = 0x8b1c62, + VioletRed1 = 0xff3e96, + VioletRed2 = 0xee3a8c, + VioletRed3 = 0xcd3278, + VioletRed4 = 0x8b2252, + Magenta1 = 0xff00ff, + Magenta2 = 0xee00ee, + Magenta3 = 0xcd00cd, + Magenta4 = 0x8b008b, + Orchid1 = 0xff83fa, + Orchid2 = 0xee7ae9, + Orchid3 = 0xcd69c9, + Orchid4 = 0x8b4789, + Plum1 = 0xffbbff, + Plum2 = 0xeeaeee, + Plum3 = 0xcd96cd, + Plum4 = 0x8b668b, + MediumOrchid1 = 0xe066ff, + MediumOrchid2 = 0xd15fee, + MediumOrchid3 = 0xb452cd, + MediumOrchid4 = 0x7a378b, + DarkOrchid1 = 0xbf3eff, + DarkOrchid2 = 0xb23aee, + DarkOrchid3 = 0x9a32cd, + DarkOrchid4 = 0x68228b, + Purple1 = 0x9b30ff, + Purple2 = 0x912cee, + Purple3 = 0x7d26cd, + Purple4 = 0x551a8b, + MediumPurple1 = 0xab82ff, + MediumPurple2 = 0x9f79ee, + MediumPurple3 = 0x8968cd, + MediumPurple4 = 0x5d478b, + Thistle1 = 0xffe1ff, + Thistle2 = 0xeed2ee, + Thistle3 = 0xcdb5cd, + Thistle4 = 0x8b7b8b, + Gray0 = 0x000000, + Grey0 = 0x000000, + Gray1 = 0x030303, + Grey1 = 0x030303, + Gray2 = 0x050505, + Grey2 = 0x050505, + Gray3 = 0x080808, + Grey3 = 0x080808, + Gray4 = 0x0a0a0a, + Grey4 = 0x0a0a0a, + Gray5 = 0x0d0d0d, + Grey5 = 0x0d0d0d, + Gray6 = 0x0f0f0f, + Grey6 = 0x0f0f0f, + Gray7 = 0x121212, + Grey7 = 0x121212, + Gray8 = 0x141414, + Grey8 = 0x141414, + Gray9 = 0x171717, + Grey9 = 0x171717, + Gray10 = 0x1a1a1a, + Grey10 = 0x1a1a1a, + Gray11 = 0x1c1c1c, + Grey11 = 0x1c1c1c, + Gray12 = 0x1f1f1f, + Grey12 = 0x1f1f1f, + Gray13 = 0x212121, + Grey13 = 0x212121, + Gray14 = 0x242424, + Grey14 = 0x242424, + Gray15 = 0x262626, + Grey15 = 0x262626, + Gray16 = 0x292929, + Grey16 = 0x292929, + Gray17 = 0x2b2b2b, + Grey17 = 0x2b2b2b, + Gray18 = 0x2e2e2e, + Grey18 = 0x2e2e2e, + Gray19 = 0x303030, + Grey19 = 0x303030, + Gray20 = 0x333333, + Grey20 = 0x333333, + Gray21 = 0x363636, + Grey21 = 0x363636, + Gray22 = 0x383838, + Grey22 = 0x383838, + Gray23 = 0x3b3b3b, + Grey23 = 0x3b3b3b, + Gray24 = 0x3d3d3d, + Grey24 = 0x3d3d3d, + Gray25 = 0x404040, + Grey25 = 0x404040, + Gray26 = 0x424242, + Grey26 = 0x424242, + Gray27 = 0x454545, + Grey27 = 0x454545, + Gray28 = 0x474747, + Grey28 = 0x474747, + Gray29 = 0x4a4a4a, + Grey29 = 0x4a4a4a, + Gray30 = 0x4d4d4d, + Grey30 = 0x4d4d4d, + Gray31 = 0x4f4f4f, + Grey31 = 0x4f4f4f, + Gray32 = 0x525252, + Grey32 = 0x525252, + Gray33 = 0x545454, + Grey33 = 0x545454, + Gray34 = 0x575757, + Grey34 = 0x575757, + Gray35 = 0x595959, + Grey35 = 0x595959, + Gray36 = 0x5c5c5c, + Grey36 = 0x5c5c5c, + Gray37 = 0x5e5e5e, + Grey37 = 0x5e5e5e, + Gray38 = 0x616161, + Grey38 = 0x616161, + Gray39 = 0x636363, + Grey39 = 0x636363, + Gray40 = 0x666666, + Grey40 = 0x666666, + Gray41 = 0x696969, + Grey41 = 0x696969, + Gray42 = 0x6b6b6b, + Grey42 = 0x6b6b6b, + Gray43 = 0x6e6e6e, + Grey43 = 0x6e6e6e, + Gray44 = 0x707070, + Grey44 = 0x707070, + Gray45 = 0x737373, + Grey45 = 0x737373, + Gray46 = 0x757575, + Grey46 = 0x757575, + Gray47 = 0x787878, + Grey47 = 0x787878, + Gray48 = 0x7a7a7a, + Grey48 = 0x7a7a7a, + Gray49 = 0x7d7d7d, + Grey49 = 0x7d7d7d, + Gray50 = 0x7f7f7f, + Grey50 = 0x7f7f7f, + Gray51 = 0x828282, + Grey51 = 0x828282, + Gray52 = 0x858585, + Grey52 = 0x858585, + Gray53 = 0x878787, + Grey53 = 0x878787, + Gray54 = 0x8a8a8a, + Grey54 = 0x8a8a8a, + Gray55 = 0x8c8c8c, + Grey55 = 0x8c8c8c, + Gray56 = 0x8f8f8f, + Grey56 = 0x8f8f8f, + Gray57 = 0x919191, + Grey57 = 0x919191, + Gray58 = 0x949494, + Grey58 = 0x949494, + Gray59 = 0x969696, + Grey59 = 0x969696, + Gray60 = 0x999999, + Grey60 = 0x999999, + Gray61 = 0x9c9c9c, + Grey61 = 0x9c9c9c, + Gray62 = 0x9e9e9e, + Grey62 = 0x9e9e9e, + Gray63 = 0xa1a1a1, + Grey63 = 0xa1a1a1, + Gray64 = 0xa3a3a3, + Grey64 = 0xa3a3a3, + Gray65 = 0xa6a6a6, + Grey65 = 0xa6a6a6, + Gray66 = 0xa8a8a8, + Grey66 = 0xa8a8a8, + Gray67 = 0xababab, + Grey67 = 0xababab, + Gray68 = 0xadadad, + Grey68 = 0xadadad, + Gray69 = 0xb0b0b0, + Grey69 = 0xb0b0b0, + Gray70 = 0xb3b3b3, + Grey70 = 0xb3b3b3, + Gray71 = 0xb5b5b5, + Grey71 = 0xb5b5b5, + Gray72 = 0xb8b8b8, + Grey72 = 0xb8b8b8, + Gray73 = 0xbababa, + Grey73 = 0xbababa, + Gray74 = 0xbdbdbd, + Grey74 = 0xbdbdbd, + Gray75 = 0xbfbfbf, + Grey75 = 0xbfbfbf, + Gray76 = 0xc2c2c2, + Grey76 = 0xc2c2c2, + Gray77 = 0xc4c4c4, + Grey77 = 0xc4c4c4, + Gray78 = 0xc7c7c7, + Grey78 = 0xc7c7c7, + Gray79 = 0xc9c9c9, + Grey79 = 0xc9c9c9, + Gray80 = 0xcccccc, + Grey80 = 0xcccccc, + Gray81 = 0xcfcfcf, + Grey81 = 0xcfcfcf, + Gray82 = 0xd1d1d1, + Grey82 = 0xd1d1d1, + Gray83 = 0xd4d4d4, + Grey83 = 0xd4d4d4, + Gray84 = 0xd6d6d6, + Grey84 = 0xd6d6d6, + Gray85 = 0xd9d9d9, + Grey85 = 0xd9d9d9, + Gray86 = 0xdbdbdb, + Grey86 = 0xdbdbdb, + Gray87 = 0xdedede, + Grey87 = 0xdedede, + Gray88 = 0xe0e0e0, + Grey88 = 0xe0e0e0, + Gray89 = 0xe3e3e3, + Grey89 = 0xe3e3e3, + Gray90 = 0xe5e5e5, + Grey90 = 0xe5e5e5, + Gray91 = 0xe8e8e8, + Grey91 = 0xe8e8e8, + Gray92 = 0xebebeb, + Grey92 = 0xebebeb, + Gray93 = 0xededed, + Grey93 = 0xededed, + Gray94 = 0xf0f0f0, + Grey94 = 0xf0f0f0, + Gray95 = 0xf2f2f2, + Grey95 = 0xf2f2f2, + Gray96 = 0xf5f5f5, + Grey96 = 0xf5f5f5, + Gray97 = 0xf7f7f7, + Grey97 = 0xf7f7f7, + Gray98 = 0xfafafa, + Grey98 = 0xfafafa, + Gray99 = 0xfcfcfc, + Grey99 = 0xfcfcfc, + Gray100 = 0xffffff, + Grey100 = 0xffffff, + DarkGrey = 0xa9a9a9, + DarkGray = 0xa9a9a9, + DarkBlue = 0x00008b, + DarkCyan = 0x008b8b, + DarkMagenta = 0x8b008b, + DarkRed = 0x8b0000, + LightGreen = 0x90ee90, + Crimson = 0xdc143c, + Indigo = 0x4b0082, + Olive = 0x808000, + RebeccaPurple = 0x663399, + Silver = 0xc0c0c0, + Teal = 0x008080, +}; +}; +} + +#endif diff --git a/Source/ThirdParty/tracy/common/TracySystem.cpp b/Source/ThirdParty/tracy/common/TracySystem.cpp index eb831fe20..78cce7bdf 100644 --- a/Source/ThirdParty/tracy/common/TracySystem.cpp +++ b/Source/ThirdParty/tracy/common/TracySystem.cpp @@ -351,3 +351,20 @@ TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadNa #ifdef __cplusplus } #endif + +// Inset graphics integration (within Tracy module) +#define TRACY_GPU_IMPL 1 +#if TRACY_GPU_D3D11 +#include +static_assert(sizeof(tracy::D3D11ZoneScope) <= TracyD3D11ZoneSize, "Invalid zone size"); +#endif +#if TRACY_GPU_D3D12 +#include +static_assert(sizeof(tracy::D3D12ZoneScope) <= TracyD3D12ZoneSize, "Invalid zone size"); +#endif +#if TRACY_GPU_VULKAN +#define GRAPHICS_API_VULKAN 1 +#include "Engine/GraphicsDevice/Vulkan/IncludeVulkanHeaders.h" +#include +static_assert(sizeof(tracy::VkCtxScope) <= TracyVulkanZoneSize, "Invalid zone size"); +#endif diff --git a/Source/ThirdParty/tracy/tracy.Build.cs b/Source/ThirdParty/tracy/tracy.Build.cs index beb0e9f89..6a26d21ff 100644 --- a/Source/ThirdParty/tracy/tracy.Build.cs +++ b/Source/ThirdParty/tracy/tracy.Build.cs @@ -15,6 +15,11 @@ public class tracy : ThirdPartyModule /// public static bool OnDemand = true; + /// + /// Enables GPU profiling. + /// + public static bool GPU = true; + /// public override void Init() { @@ -56,8 +61,25 @@ public class tracy : ThirdPartyModule options.PrivateDefinitions.Add("TRACY_USE_MALLOC"); options.PrivateDefinitions.Add("TRACY_ONLY_IPV4"); options.PrivateDefinitions.Add("TRACY_NO_PIPE"); + options.PrivateDefinitions.Add("TRACY_NO_CODE_TRANSFER"); break; } + + if (GPU) + { + // Ask Graphics module which graphics backends are active + var graphics = new Graphics(); + graphics.FilePath = FilePath; + graphics.FolderPath = FolderPath; + var graphicsOptions = (BuildOptions)options.Clone(); + graphics.Setup(graphicsOptions); + if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceDX11")) + options.PrivateDefinitions.Add("TRACY_GPU_D3D11"); + if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceDX12")) + options.PrivateDefinitions.Add("TRACY_GPU_D3D12"); + if (graphicsOptions.PrivateDependencies.Contains("GraphicsDeviceVulkan")) + options.PrivateDefinitions.Add("TRACY_GPU_VULKAN"); + } } /// diff --git a/Source/ThirdParty/tracy/tracy/Tracy.hpp b/Source/ThirdParty/tracy/tracy/Tracy.hpp index fbc0746eb..5e26dec10 100644 --- a/Source/ThirdParty/tracy/tracy/Tracy.hpp +++ b/Source/ThirdParty/tracy/tracy/Tracy.hpp @@ -119,6 +119,8 @@ namespace tracy { +TRACY_API bool IsConnected(); + class TRACY_API Profiler { public: @@ -143,7 +145,6 @@ public: static void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ); static void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ); static void SendCallstack( int depth ); - static void ParameterRegister( ParameterCallback cb ); static void ParameterRegister( ParameterCallback cb, void* data ); static void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val ); }; @@ -255,7 +256,7 @@ public: #define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) #define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) #define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ) -#define TracyIsConnected tracy::GetProfiler().IsConnected() +#define TracyIsConnected tracy::IsConnected() #define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); #ifdef TRACY_FIBERS diff --git a/Source/ThirdParty/tracy/tracy/TracyD3D11.hpp b/Source/ThirdParty/tracy/tracy/TracyD3D11.hpp new file mode 100644 index 000000000..06fcd6225 --- /dev/null +++ b/Source/ThirdParty/tracy/tracy/TracyD3D11.hpp @@ -0,0 +1,456 @@ +#ifndef __TRACYD3D11_HPP__ +#define __TRACYD3D11_HPP__ + +#define TracyD3D11ZoneSize 16 + +#ifndef TRACY_ENABLE + +#define TracyD3D11Context(device,queue) nullptr +#define TracyD3D11Destroy(ctx) +#define TracyD3D11ContextName(ctx, name, size) + +#define TracyD3D11NewFrame(ctx) + +#define TracyD3D11Zone(ctx, name) +#define TracyD3D11ZoneC(ctx, name, color) +#define TracyD3D11NamedZone(ctx, varname, name, active) +#define TracyD3D11NamedZoneC(ctx, varname, name, color, active) +#define TracyD3D11ZoneTransient(ctx, varname, name, active) + +#define TracyD3D11ZoneS(ctx, name, depth) +#define TracyD3D11ZoneCS(ctx, name, color, depth) +#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active) +#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active) +#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) + +#define TracyD3D11Collect(ctx) + +namespace tracy +{ +class D3D11ZoneScope {}; +} + +using TracyD3D11Ctx = void*; + +#elif TRACY_GPU_IMPL + +#include +#include +#include + +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyYield.hpp" +#include "../common/TracyColor.hpp" + +#include + +#define TRACY_CALLSTACK 0 +#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) +#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); + +namespace tracy +{ + +class D3D11Ctx +{ + friend class D3D11ZoneScope; + + static constexpr uint32_t MaxQueries = 64 * 1024; + + enum CollectMode { POLL, BLOCK }; + +public: + tracy_force_inline D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx ) + { + // TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument + m_device = device; + device->AddRef(); + m_immediateDevCtx = devicectx; + devicectx->AddRef(); + + { + D3D11_QUERY_DESC desc = { }; + desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery))) + { + TracyD3D11Panic("unable to create disjoint timestamp query.", return); + } + } + + for (ID3D11Query*& query : m_queries) + { + D3D11_QUERY_DESC desc = { }; + desc.Query = D3D11_QUERY_TIMESTAMP; + if (FAILED(m_device->CreateQuery(&desc, &query))) + { + TracyD3D11Panic("unable to create timestamp query.", return); + } + } + + // Calibrate CPU and GPU timestamps + int64_t tcpu = 0; + int64_t tgpu = 0; + for (int attempts = 0; attempts < 50; attempts++) + { + m_immediateDevCtx->Begin(m_disjointQuery); + m_immediateDevCtx->End(m_queries[0]); + m_immediateDevCtx->End(m_disjointQuery); + + int64_t tcpu0 = Profiler::GetTime(); + WaitForQuery(m_disjointQuery); + // NOTE: one would expect that by waiting for the enclosing disjoint query to finish, + // all timestamp queries within would also be readily available, but that does not + // seem to be the case here... See https://github.com/wolfpld/tracy/issues/947 + WaitForQuery(m_queries[0]); + int64_t tcpu1 = Profiler::GetTime(); + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; + if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK) + { + TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato); + continue; + } + + if (disjoint.Disjoint) + continue; + + UINT64 timestamp = 0; + if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK) + continue; // this should never happen (we waited for the query to finish above) + + tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2; + tgpu = timestamp * (1000000000 / disjoint.Frequency); + break; + } + + // ready to roll + m_contextId = GetGpuCtxCounter().fetch_add(1); + m_immediateDevCtx->Begin(m_disjointQuery); + m_previousCheckpoint = m_nextCheckpoint = 0; + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()? + MemWrite( &item->gpuNewContext.period, 1.0f ); + MemWrite( &item->gpuNewContext.context, m_contextId); + MemWrite( &item->gpuNewContext.flags, uint8_t(0) ); + MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 ); + +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + + Profiler::QueueSerialFinish(); + } + + tracy_force_inline ~D3D11Ctx() + { + // collect all pending timestamps before destroying everything + do + { + Collect(BLOCK); + } while (m_previousCheckpoint != m_queryCounter); + + for (ID3D11Query* query : m_queries) + { + query->Release(); + } + m_immediateDevCtx->End(m_disjointQuery); + m_disjointQuery->Release(); + m_immediateDevCtx->Release(); + m_device->Release(); + } + + tracy_force_inline void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_contextId ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + + void Collect(CollectMode mode = POLL) + { +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) + { + m_previousCheckpoint = m_nextCheckpoint = m_queryCounter; + return; + } +#endif + + if (m_previousCheckpoint == m_nextCheckpoint) + { + uintptr_t nextCheckpoint = m_queryCounter; + if (nextCheckpoint == m_nextCheckpoint) + { + return; + } + m_nextCheckpoint = nextCheckpoint; + m_immediateDevCtx->End(m_disjointQuery); + } + + if (mode == CollectMode::BLOCK) + { + WaitForQuery(m_disjointQuery); + } + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; + if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK) + { + return; + } + + if (disjoint.Disjoint == TRUE) + { + m_previousCheckpoint = m_nextCheckpoint; + TracyD3D11Panic("disjoint timestamps detected; dropping."); + return; + } + + auto begin = m_previousCheckpoint; + auto end = m_nextCheckpoint; + for (auto i = begin; i != end; ++i) + { + uint32_t k = RingIndex(i); + UINT64 timestamp = 0; + if (m_immediateDevCtx->GetData(m_queries[k], ×tamp, sizeof(timestamp), 0) != S_OK) + { + TracyD3D11Panic("timestamp expected to be ready, but it was not!"); + break; + } + timestamp *= (1000000000ull / disjoint.Frequency); + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(timestamp)); + MemWrite(&item->gpuTime.queryId, static_cast(k)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + + // disjoint timestamp queries should only be invoked once per frame or less + // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query + m_immediateDevCtx->Begin(m_disjointQuery); + m_previousCheckpoint = m_nextCheckpoint; + } + +private: + tracy_force_inline uint32_t RingIndex(uintptr_t index) + { + index %= MaxQueries; + return static_cast(index); + } + + tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) + { + // wrap-around safe: all unsigned + uintptr_t count = end - begin; + return static_cast(count); + } + + tracy_force_inline uint32_t NextQueryId() + { + auto id = m_queryCounter++; + if (RingCount(m_previousCheckpoint, id) >= MaxQueries) + { + TracyD3D11Panic("too many pending timestamp queries."); + // #TODO: return some sentinel value; ideally a "hidden" query index + } + return RingIndex(id); + } + + tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id) + { + return m_queries[id]; + } + + tracy_force_inline void WaitForQuery(ID3D11Query* query) + { + m_immediateDevCtx->Flush(); + while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK) + YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends... + } + + tracy_force_inline uint8_t GetContextId() const + { + return m_contextId; + } + + ID3D11Device* m_device = nullptr; + ID3D11DeviceContext* m_immediateDevCtx = nullptr; + + ID3D11Query* m_queries[MaxQueries]; + ID3D11Query* m_disjointQuery = nullptr; + + uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere? + + uintptr_t m_queryCounter = 0; + + uintptr_t m_previousCheckpoint = 0; + uintptr_t m_nextCheckpoint = 0; +}; + +class D3D11ZoneScope +{ +public: + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active ) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); + } + + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active ) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + if( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); + } + } + + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + if ( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + } + + tracy_force_inline ~D3D11ZoneScope() + { + if( !m_active ) return; + + const auto queryId = m_ctx->NextQueryId(); + m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + } + +private: + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active ) +#ifdef TRACY_ON_DEMAND + : m_active( active && GetProfiler().IsConnected() ) +#else + : m_active( active ) +#endif + { + if( !m_active ) return; + m_ctx = ctx; + } + + void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation) + { + const auto queryId = m_ctx->NextQueryId(); + m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); + + MemWrite( &item->hdr.type, queueItemType); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + } + + const bool m_active; + + D3D11Ctx* m_ctx; +}; + +void* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx ) +{ + auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) ); + new(ctx) D3D11Ctx( device, devicectx ); + ctx->Name("D3D11", 5); + return ctx; +} + +void CollectD3D11Context(void* ctx) +{ + ((D3D11Ctx*)ctx)->Collect(); +} + +void DestroyD3D11Context(void* ctx ) +{ + ((D3D11Ctx*)ctx)->~D3D11Ctx(); + tracy_free( ctx ); +} + +void BeginD3D11ZoneScope(void* zone, void* ctx, const char* name, size_t nameLen) +{ + new(zone) tracy::D3D11ZoneScope{ (tracy::D3D11Ctx*)ctx, 0, 0, 0, 0, 0, name, nameLen, true }; +} + +void EndD3D11ZoneScope(void* zone) +{ + ((tracy::D3D11ZoneScope*)zone)->~D3D11ZoneScope(); +} +} + +#undef TracyD3D11Panic + +#else + +// Forward declarations to be used in engine +namespace tracy +{ + extern void* CreateD3D11Context(ID3D11Device* device, ID3D11DeviceContext* devicectx); + extern void DestroyD3D11Context(void* ctx); + extern void CollectD3D11Context(void* ctx); + extern void BeginD3D11ZoneScope(void* zone, void* ctx, const char* name, size_t nameLen); + extern void EndD3D11ZoneScope(void* zone); +} + +#endif + +#endif diff --git a/Source/ThirdParty/tracy/tracy/TracyD3D12.hpp b/Source/ThirdParty/tracy/tracy/TracyD3D12.hpp new file mode 100644 index 000000000..75aaedefc --- /dev/null +++ b/Source/ThirdParty/tracy/tracy/TracyD3D12.hpp @@ -0,0 +1,529 @@ +#ifndef __TRACYD3D12_HPP__ +#define __TRACYD3D12_HPP__ + +#define TracyD3D12ZoneSize 32 + +#ifndef TRACY_ENABLE + +#define TracyD3D12Context(device, queue) nullptr +#define TracyD3D12Destroy(ctx) +#define TracyD3D12ContextName(ctx, name, size) + +#define TracyD3D12NewFrame(ctx) + +#define TracyD3D12Zone(ctx, cmdList, name) +#define TracyD3D12ZoneC(ctx, cmdList, name, color) +#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) +#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) +#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) + +#define TracyD3D12ZoneS(ctx, cmdList, name, depth) +#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) +#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) +#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) +#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) + +#define TracyD3D12Collect(ctx) + +namespace tracy +{ + class D3D12ZoneScope {}; +} + +using TracyD3D12Ctx = void*; + +#elif TRACY_GPU_IMPL + +#include "../client/TracyProfiler.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyColor.hpp" + +#include +#include +#include +#include +#include + +#define TRACY_CALLSTACK 0 +#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) +#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); + +namespace tracy +{ + + struct D3D12QueryPayload + { + uint32_t m_queryIdStart = 0; + uint32_t m_queryCount = 0; + }; + + // Command queue context. + class D3D12QueueCtx + { + friend class D3D12ZoneScope; + + ID3D12Device* m_device = nullptr; + ID3D12CommandQueue* m_queue = nullptr; + uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere? + ID3D12QueryHeap* m_queryHeap = nullptr; + ID3D12Resource* m_readbackBuffer = nullptr; + + // In-progress payload. + uint32_t m_queryLimit = 0; + std::atomic m_queryCounter = 0; + uint32_t m_previousQueryCounter = 0; + + uint32_t m_activePayload = 0; + ID3D12Fence* m_payloadFence = nullptr; + std::queue m_payloadQueue; + + UINT64 m_prevCalibrationTicksCPU = 0; + + void RecalibrateClocks() + { + UINT64 cpuTimestamp; + UINT64 gpuTimestamp; + if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + TracyD3D12Panic("failed to obtain queue clock calibration counters.", return); + } + + int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU; + if (cpuDeltaTicks > 0) + { + static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc(); + int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick; + // Save the device cpu timestamp, not the Tracy profiler timestamp: + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuCalibration); + MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); + MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); + MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS); + MemWrite(&item->gpuCalibration.context, GetId()); + SubmitQueueItem(item); + } + } + + tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + public: + D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) + : m_device(device) + , m_queue(queue) + { + // Verify we support timestamp queries on this queue. + + if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{}; + + HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)); + if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE)) + { + TracyD3D12Panic("Platform does not support profiling of copy queues.", return); + } + } + + static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs + m_queryLimit = MaxQueries; + + D3D12_QUERY_HEAP_DESC heapDesc{}; + heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + heapDesc.Count = m_queryLimit; + heapDesc.NodeMask = 0; // #TODO: Support multiple adapters. + + while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap)))) + { + m_queryLimit /= 2; + heapDesc.Count = m_queryLimit; + } + + // Create a readback buffer, which will be used as a destination for the query data. + + D3D12_RESOURCE_DESC readbackBufferDesc{}; + readbackBufferDesc.Alignment = 0; + readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t); + readbackBufferDesc.Height = 1; + readbackBufferDesc.DepthOrArraySize = 1; + readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN; + readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major. + readbackBufferDesc.MipLevels = 1; + readbackBufferDesc.SampleDesc.Count = 1; + readbackBufferDesc.SampleDesc.Quality = 0; + readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + D3D12_HEAP_PROPERTIES readbackHeapProps{}; + readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK; + readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + readbackHeapProps.CreationNodeMask = 0; + readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters. + + if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer)))) + { + TracyD3D12Panic("Failed to create query readback buffer.", return); + } + + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence)))) + { + TracyD3D12Panic("Failed to create payload fence.", return); + } + + float period = [queue]() + { + uint64_t timestampFrequency; + if (FAILED(queue->GetTimestampFrequency(×tampFrequency))) + { + return 0.0f; + } + return static_cast( 1E+09 / static_cast(timestampFrequency) ); + }(); + + if (period == 0.0f) + { + TracyD3D12Panic("Failed to get timestamp frequency.", return); + } + + uint64_t cpuTimestamp; + uint64_t gpuTimestamp; + if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + TracyD3D12Panic("Failed to get queue clock calibration.", return); + } + + // Save the device cpu timestamp, not the profiler's timestamp. + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + + // all checked: ready to roll + m_contextId = GetGpuCtxCounter().fetch_add(1); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp); + MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp); + MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); + MemWrite(&item->gpuNewContext.context, GetId()); + MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); + MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); + SubmitQueueItem(item); + } + + ~D3D12QueueCtx() + { + // collect all pending timestamps + while (m_payloadFence->GetCompletedValue() != m_activePayload) + /* busy-wait ... */; + Collect(); + m_payloadFence->Release(); + m_readbackBuffer->Release(); + m_queryHeap->Release(); + } + + + void NewFrame() + { + uint32_t queryCounter = m_queryCounter.exchange(0); + m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter }); + m_previousQueryCounter += queryCounter; + + if (m_previousQueryCounter >= m_queryLimit) + { + m_previousQueryCounter -= m_queryLimit; + } + + m_queue->Signal(m_payloadFence, ++m_activePayload); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, GetId()); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); + SubmitQueueItem(item); + } + + void Collect() + { +#ifdef TRACY_ON_DEMAND + if (!GetProfiler().IsConnected()) + { + m_queryCounter = 0; + + return; + } +#endif + + // Find out what payloads are available. + const auto newestReadyPayload = m_payloadFence->GetCompletedValue(); + const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload); + + if (!payloadCount) + { + return; // No payloads are available yet, exit out. + } + + D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) }; + + // Map the readback buffer so we can fetch the query data from the GPU. + void* readbackBufferMapping = nullptr; + + if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping))) + { + TracyD3D12Panic("Failed to map readback buffer.", return); + } + + auto* timestampData = static_cast(readbackBufferMapping); + + for (uint32_t i = 0; i < payloadCount; ++i) + { + const auto& payload = m_payloadQueue.front(); + + for (uint32_t j = 0; j < payload.m_queryCount; ++j) + { + const auto counter = (payload.m_queryIdStart + j) % m_queryLimit; + const auto timestamp = timestampData[counter]; + const auto queryId = counter; + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, timestamp); + MemWrite(&item->gpuTime.queryId, static_cast(queryId)); + MemWrite(&item->gpuTime.context, GetId()); + + Profiler::QueueSerialFinish(); + } + + m_payloadQueue.pop(); + } + + m_readbackBuffer->Unmap(0, nullptr); + + // Recalibrate to account for drift. + RecalibrateClocks(); + } + + private: + tracy_force_inline uint32_t NextQueryId() + { + uint32_t queryCounter = m_queryCounter.fetch_add(2); + if (queryCounter >= m_queryLimit) + { + TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries."); + // #TODO: consider returning an invalid id or sentinel value here + } + + const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit; + + return id; + } + + tracy_force_inline uint8_t GetId() const + { + return m_contextId; + } + }; + + class D3D12ZoneScope + { + const bool m_active; + D3D12QueueCtx* m_ctx = nullptr; + ID3D12GraphicsCommandList* m_cmdList = nullptr; + uint32_t m_queryId = 0; // Used for tracking in nested zones. + + tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation) + { + MemWrite(&item->hdr.type, type); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, srcLocation); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); + MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active) +#ifdef TRACY_ON_DEMAND + : m_active(active&& GetProfiler().IsConnected()) +#else + : m_active(active) +#endif + { + if (!m_active) return; + + m_ctx = ctx; + m_cmdList = cmdList; + + m_queryId = m_ctx->NextQueryId(); + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); + } + + public: + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcLocation)); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + + tracy_force_inline ~D3D12ZoneScope() + { + if (!m_active) return; + + const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot. + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial); + MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneEnd.queryId, static_cast(queryId)); + MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + + m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t)); + } + }; + + void* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue) + { + auto* ctx = static_cast(tracy_malloc(sizeof(D3D12QueueCtx))); + new (ctx) D3D12QueueCtx{ device, queue }; + ctx->Name("D3D12", 5); + return ctx; + } + + void CollectD3D12Context(void* ctx) + { + ((D3D12QueueCtx*)ctx)->Collect(); + ((D3D12QueueCtx*)ctx)->NewFrame(); + } + + void DestroyD3D12Context(void* ctx) + { + ((D3D12QueueCtx*)ctx)->~D3D12QueueCtx(); + tracy_free(ctx); + } + + void BeginD3D12ZoneScope(void* zone, void* ctx, ID3D12GraphicsCommandList* cmdList, const char* name, size_t nameLen) + { + new(zone) tracy::D3D12ZoneScope{ (tracy::D3D12QueueCtx*)ctx, 0, 0, 0, 0, 0, name, nameLen, cmdList, true }; + } + + void EndD3D12ZoneScope(void* zone) + { + ((tracy::D3D12ZoneScope*)zone)->~D3D12ZoneScope(); + } +} + +#undef TracyD3D12Panic + +using TracyD3D12Ctx = tracy::D3D12QueueCtx*; + +#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue); +#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx); +#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size); + +#define TracyD3D12NewFrame(ctx) ctx->NewFrame(); + +#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone +#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine) +#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; +# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active) +#else +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; +# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active }; +#endif + +#ifdef TRACY_HAS_CALLSTACK +# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true) +# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true) +# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; +# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; +# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active }; +#else +# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name) +# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color) +# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active) +# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) +# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) +#endif + +#define TracyD3D12Collect(ctx) ctx->Collect(); + +#else + +// Forward declarations to be used in engine +namespace tracy +{ + extern void* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue); + extern void DestroyD3D12Context(void* ctx); + extern void CollectD3D12Context(void* ctx); + extern void BeginD3D12ZoneScope(void* zone, void* ctx, ID3D12GraphicsCommandList* cmdList, const char* name, size_t nameLen); + extern void EndD3D12ZoneScope(void* zone); +} + +#endif + +#endif diff --git a/Source/ThirdParty/tracy/tracy/TracyVulkan.hpp b/Source/ThirdParty/tracy/tracy/TracyVulkan.hpp new file mode 100644 index 000000000..334d2cd24 --- /dev/null +++ b/Source/ThirdParty/tracy/tracy/TracyVulkan.hpp @@ -0,0 +1,779 @@ +#ifndef __TRACYVULKAN_HPP__ +#define __TRACYVULKAN_HPP__ + +#define TracyVulkanZoneSize 24 + +#if !defined TRACY_ENABLE + +#define TracyVkContext(x,y,z,w) nullptr +#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr +#if defined VK_EXT_host_query_reset +#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr +#endif +#define TracyVkDestroy(x) +#define TracyVkContextName(c,x,y) +#define TracyVkNamedZone(c,x,y,z,w) +#define TracyVkNamedZoneC(c,x,y,z,w,a) +#define TracyVkZone(c,x,y) +#define TracyVkZoneC(c,x,y,z) +#define TracyVkZoneTransient(c,x,y,z,w) +#define TracyVkCollect(c,x) + +#define TracyVkNamedZoneS(c,x,y,z,w,a) +#define TracyVkNamedZoneCS(c,x,y,z,w,v,a) +#define TracyVkZoneS(c,x,y,z) +#define TracyVkZoneCS(c,x,y,z,w) +#define TracyVkZoneTransientS(c,x,y,z,w,a) + +namespace tracy +{ +class VkCtxScope {}; +} + +using TracyVkCtx = void*; + +#elif TRACY_GPU_VULKAN + +#if !defined VK_NULL_HANDLE +# error "You must include Vulkan headers before including TracyVulkan.hpp" +#endif + +#include +#include +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" + +#include + +namespace tracy +{ + +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define LoadVkDeviceCoreSymbols(Operation) \ + Operation(vkBeginCommandBuffer) \ + Operation(vkCmdResetQueryPool) \ + Operation(vkCmdWriteTimestamp) \ + Operation(vkCreateQueryPool) \ + Operation(vkDestroyQueryPool) \ + Operation(vkEndCommandBuffer) \ + Operation(vkGetQueryPoolResults) \ + Operation(vkQueueSubmit) \ + Operation(vkQueueWaitIdle) \ + Operation(vkResetQueryPool) + +#define LoadVkDeviceExtensionSymbols(Operation) \ + Operation(vkGetCalibratedTimestampsEXT) + +#define LoadVkInstanceExtensionSymbols(Operation) \ + Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) + +#define LoadVkInstanceCoreSymbols(Operation) \ + Operation(vkGetPhysicalDeviceProperties) + +struct VkSymbolTable +{ +#define MAKE_PFN(name) PFN_##name name; + LoadVkDeviceCoreSymbols(MAKE_PFN) + LoadVkDeviceExtensionSymbols(MAKE_PFN) + LoadVkInstanceExtensionSymbols(MAKE_PFN) + LoadVkInstanceCoreSymbols(MAKE_PFN) +#undef MAKE_PFN +}; + +#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature +#else +#define VK_FUNCTION_WRAPPER(callSignature) callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature +#endif + +class VkCtx +{ + friend class VkCtxScope; + + enum { QueryCount = 64 * 1024 }; + +public: +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated ) +#else + VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT) +#endif + : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) + , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) + , m_head( 0 ) + , m_tail( 0 ) + , m_oldCnt( 0 ) + , m_queryCount( QueryCount ) +#if !defined TRACY_VK_USE_SYMBOL_TABLE + , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) +#endif + { + assert( m_context != 255 ); + +#if defined TRACY_VK_USE_SYMBOL_TABLE + PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); + if ( calibrated ) + { + m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; + } + +#endif + + if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT ) + { + FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); + } + + CreateQueryPool(); + + VkCommandBufferBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmdbuf; + + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + + int64_t tcpu, tgpu; + if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT ) + { + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + + tcpu = Profiler::GetTime(); + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) ); + + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + } + else + { + FindCalibratedTimestampDeviation(); + Calibrate( device, m_prevCalibration, tgpu ); + tcpu = Profiler::GetTime(); + } + + WriteInitialItem( physdev, tcpu, tgpu ); + + m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); + } + +#if defined VK_EXT_host_query_reset + /** + * This alternative constructor does not use command buffers and instead uses functionality from + * VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires + * the physical device to have another time domain apart from DEVICE to be calibrateable. + */ +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) +#else + VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT ) +#endif + : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) + , m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) ) + , m_head( 0 ) + , m_tail( 0 ) + , m_oldCnt( 0 ) + , m_queryCount( QueryCount ) +#if !defined TRACY_VK_USE_SYMBOL_TABLE + , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) +#endif + { + assert( m_context != 255); + +#if defined TRACY_VK_USE_SYMBOL_TABLE + PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); + m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; +#endif + + assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr ); + assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr ); + assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr ); + + FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); + + // We require a host time domain to be available to properly calibrate. + FindCalibratedTimestampDeviation(); + int64_t tgpu; + Calibrate( device, m_prevCalibration, tgpu ); + int64_t tcpu = Profiler::GetTime(); + + CreateQueryPool(); + VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) ); + + WriteInitialItem( physdev, tcpu, tgpu ); + + // We need the buffer to be twice as large for availability values + size_t resSize = sizeof( int64_t ) * m_queryCount * 2; + m_res = (int64_t*)tracy_malloc( resSize ); + } +#endif + + ~VkCtx() + { + tracy_free( m_res ); + VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) ); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_context ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + + void Collect( VkCommandBuffer cmdbuf ) + { + const uint64_t head = m_head.load(std::memory_order_relaxed); + if( m_tail == head ) return; + +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) + { + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); + m_tail = head; + m_oldCnt = 0; + int64_t tgpu; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); + return; + } +#endif + assert( head > m_tail ); + + const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount ); + + unsigned int cnt; + if( m_oldCnt != 0 ) + { + cnt = m_oldCnt; + m_oldCnt = 0; + } + else + { + cnt = (unsigned int)( head - m_tail ); + assert( cnt <= m_queryCount ); + if( wrappedTail + cnt > m_queryCount ) + { + cnt = m_queryCount - wrappedTail; + } + } + + + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) ); + + for( unsigned int idx=0; idxhdr.type, QueueType::GpuTime ); + MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] ); + MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) ); + MemWrite( &item->gpuTime.context, m_context ); + Profiler::QueueSerialFinish(); + } + + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) + { + int64_t tgpu, tcpu; + Calibrate( m_device, tcpu, tgpu ); + const auto refCpu = Profiler::GetTime(); + const auto delta = tcpu - m_prevCalibration; + if( delta > 0 ) + { + m_prevCalibration = tcpu; + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuCalibration ); + MemWrite( &item->gpuCalibration.gpuTime, tgpu ); + MemWrite( &item->gpuCalibration.cpuTime, refCpu ); + MemWrite( &item->gpuCalibration.cpuDelta, delta ); + MemWrite( &item->gpuCalibration.context, m_context ); + Profiler::QueueSerialFinish(); + } + } + + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ); + + m_tail += cnt; + } + + tracy_force_inline unsigned int NextQueryId() + { + const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed); + return id % m_queryCount; + } + + tracy_force_inline uint8_t GetId() const + { + return m_context; + } + + tracy_force_inline VkQueryPool GetQueryPool() const + { + return m_query; + } + +private: + tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) + { + assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation; + do + { + m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation ); + } + while( deviation > m_deviation ); + +#if defined _WIN32 + tGpu = ts[0]; + tCpu = ts[1] * m_qpcToNs; +#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW + tGpu = ts[0]; + tCpu = ts[1]; +#else + assert( false ); +#endif + } + + tracy_force_inline void CreateQueryPool() + { + VkQueryPoolCreateInfo poolInfo = {}; + poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + poolInfo.queryCount = m_queryCount; + poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) ) + { + m_queryCount /= 2; + poolInfo.queryCount = m_queryCount; + } + } + + tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) + { + uint32_t num; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr ); + if(num > 4) num = 4; + VkTimeDomainEXT data[4]; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data ); + VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1; +#if defined _WIN32 + supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; +#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW + supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; +#endif + for( uint32_t i=0; i deviation[i] ) { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + +#if defined _WIN32 + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); +#endif + } + + tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu ) + { + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; + + VkPhysicalDeviceProperties prop; + VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) ); + const float period = prop.limits.timestampPeriod; + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); + MemWrite( &item->gpuNewContext.period, period ); + MemWrite( &item->gpuNewContext.context, m_context ); + MemWrite( &item->gpuNewContext.flags, flags ); + MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); + +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + +#if defined TRACY_VK_USE_SYMBOL_TABLE + void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) + { +#define VK_GET_DEVICE_SYMBOL( name ) \ + (PFN_##name)deviceProcAddr( m_device, #name ); +#define VK_LOAD_DEVICE_SYMBOL( name ) \ + m_symbols.name = VK_GET_DEVICE_SYMBOL( name ); +#define VK_GET_INSTANCE_SYMBOL( name ) \ + (PFN_##name)instanceProcAddr( instance, #name ); +#define VK_LOAD_INSTANCE_SYMBOL( name ) \ + m_symbols.name = VK_GET_INSTANCE_SYMBOL( name ); + + LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL ) + LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL ) +#undef VK_GET_DEVICE_SYMBOL +#undef VK_LOAD_DEVICE_SYMBOL +#undef VK_GET_INSTANCE_SYMBOL +#undef VK_LOAD_INSTANCE_SYMBOL + } +#endif + + VkDevice m_device; + VkQueryPool m_query; + VkTimeDomainEXT m_timeDomain; +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkSymbolTable m_symbols; +#endif + uint64_t m_deviation; +#ifdef _WIN32 + int64_t m_qpcToNs; +#endif + int64_t m_prevCalibration; + uint8_t m_context; + + std::atomic m_head; + uint64_t m_tail; + unsigned int m_oldCnt; + unsigned int m_queryCount; + + int64_t* m_res; + + PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT; +}; + +class VkCtxScope +{ +public: + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + } + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + } + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline ~VkCtxScope() + { + if( !m_active ) return; + + const auto queryId = m_ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + +private: + const bool m_active; + + VkCommandBuffer m_cmdbuf; + VkCtx* m_ctx; +}; + +#if defined TRACY_VK_USE_SYMBOL_TABLE +void* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false ) +#else +void* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) +#endif +{ + auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); +#if defined TRACY_VK_USE_SYMBOL_TABLE + new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated ); +#else + new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); +#endif + return ctx; +} + +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE +void* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr ) +#else +void* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) +#endif +{ + auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); +#if defined TRACY_VK_USE_SYMBOL_TABLE + new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr ); +#else + new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct ); +#endif + ctx->Name("Vulkan", 6); + return ctx; +} +#endif + +void DestroyVkContext( void* ctx ) +{ + ((VkCtx*)ctx)->~VkCtx(); + tracy_free( ctx ); +} + +void CollectVkContext( void* ctx, VkCommandBuffer cmdbuf ) +{ + ((VkCtx*)ctx)->Collect(cmdbuf); +} + +void BeginVkZoneScope(void* zone, void* ctx, VkCommandBuffer cmdbuf, const char* name, size_t nameLen) +{ + new(zone) tracy::VkCtxScope{ (tracy::VkCtx*)ctx, 0, 0, 0, 0, 0, name, nameLen, cmdbuf, true }; +} + +void EndVkZoneScope(void* zone) +{ + ((tracy::VkCtxScope*)zone)->~VkCtxScope(); +} + +} + +using TracyVkCtx = tracy::VkCtx*; + +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ); +#else +#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); +#endif +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true ); +#else +#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); +#endif +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr ); +#else +#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct ); +#endif +#endif +#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); +#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size ); +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true ) +# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true ) +# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active ) +#else +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); +# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true ) +# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true ) +# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active ); +#endif +#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf ); + +#ifdef TRACY_HAS_CALLSTACK +# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); +# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); +# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true ) +# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true ) +# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, depth, active ); +#else +# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) +# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) +# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name ) +# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color ) +# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) +#endif + +#else + +// Forward declarations to be used in engine +namespace tracy +{ +#if defined TRACY_VK_USE_SYMBOL_TABLE + extern void* CreateVkContext(VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false); +#else + extern void* CreateVkContext(VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct); +#endif +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE + extern void* CreateVkContext(VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr); +#else + extern void* CreateVkContext(VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct); +#endif +#endif + extern void DestroyVkContext(void* ctx); + extern void CollectVkContext(void* ctx, VkCommandBuffer cmdbuf); + extern void BeginVkZoneScope(void* zone, void* ctx, VkCommandBuffer cmdbuf, const char* name, size_t nameLen); + extern void EndVkZoneScope(void* zone); +} + +#endif + +#endif diff --git a/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs b/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs index 0aa37b84e..95695e203 100644 --- a/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs +++ b/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs @@ -73,7 +73,7 @@ namespace Flax.Build.NativeCpp /// /// The native C++ module build settings container. /// - public sealed class BuildOptions + public sealed class BuildOptions : ICloneable { /// /// The target that builds this module. @@ -442,5 +442,26 @@ namespace Flax.Build.NativeCpp SourcePaths.Clear(); } } + + /// + public object Clone() + { + var clone = new BuildOptions + { + Target = Target, + Platform = Platform, + Toolchain = Toolchain, + Architecture = Architecture, + Configuration = Configuration, + CompileEnv = (CompileEnvironment)CompileEnv.Clone(), + LinkEnv = (LinkEnvironment)LinkEnv.Clone(), + IntermediateFolder = IntermediateFolder, + OutputFolder = OutputFolder, + WorkingDirectory = WorkingDirectory, + HotReloadPostfix = HotReloadPostfix, + Flags = Flags, + }; + return clone; + } } }