#ifndef __TRACYPROFILER_HPP__ #define __TRACYPROFILER_HPP__ #include #include #include #include #include #include "tracy_concurrentqueue.h" #include "TracyCallstack.hpp" #include "TracySysTime.hpp" #include "TracyFastVector.hpp" #include "../common/TracyQueue.hpp" #include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" #include "../common/TracyMutex.hpp" #include "../common/TracyProtocol.hpp" #if defined _WIN32 || defined __CYGWIN__ # include #endif #ifdef __APPLE__ # include # include #endif #if !defined TRACY_TIMER_FALLBACK && ( defined _WIN32 || defined __CYGWIN__ || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) # define TRACY_HW_TIMER #endif #if !defined TRACY_HW_TIMER # include #endif namespace tracy { #if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME) TRACY_API void StartupProfiler(); TRACY_API void ShutdownProfiler(); #endif class GpuCtx; class Profiler; class Socket; class UdpBroadcast; struct GpuCtxWrapper { GpuCtx* ptr; }; TRACY_API moodycamel::ConcurrentQueue::ExplicitProducer* GetToken(); TRACY_API Profiler& GetProfiler(); TRACY_API std::atomic& GetLockCounter(); TRACY_API std::atomic& GetGpuCtxCounter(); TRACY_API GpuCtxWrapper& GetGpuCtx(); TRACY_API uint64_t GetThreadHandle(); TRACY_API void InitRPMallocThread(); TRACY_API bool ProfilerAvailable(); TRACY_API int64_t GetFrequencyQpc(); #ifdef TRACY_ON_DEMAND struct LuaZoneState { uint32_t counter; bool active; }; #endif #define TracyLfqPrepare( _type ) \ moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ auto __token = GetToken(); \ auto& __tail = __token->get_tail_index(); \ auto item = __token->enqueue_begin( __magic ); \ MemWrite( &item->hdr.type, _type ); #define TracyLfqCommit \ __tail.store( __magic + 1, std::memory_order_release ); #define TracyLfqPrepareC( _type ) \ tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ auto __token = tracy::GetToken(); \ auto& __tail = __token->get_tail_index(); \ auto item = __token->enqueue_begin( __magic ); \ tracy::MemWrite( &item->hdr.type, _type ); #define TracyLfqCommitC \ __tail.store( __magic + 1, std::memory_order_release ); class TRACY_API Profiler { struct FrameImageQueueItem { void* image; uint32_t frame; uint16_t w; uint16_t h; uint8_t offset; bool flip; }; public: Profiler(); ~Profiler(); void SpawnWorkerThreads(); static tracy_force_inline int64_t GetTime() { #ifdef TRACY_HW_TIMER # if defined TARGET_OS_IOS && TARGET_OS_IOS == 1 return mach_absolute_time(); # elif defined _WIN32 || defined __CYGWIN__ # ifdef TRACY_TIMER_QPC return GetTimeQpc(); # else return int64_t( __rdtsc() ); # endif # elif defined __i386 || defined _M_IX86 uint32_t eax, edx; asm volatile ( "rdtsc" : "=a" (eax), "=d" (edx) ); return ( uint64_t( edx ) << 32 ) + uint64_t( eax ); # elif defined __x86_64__ || defined _M_X64 uint64_t rax, rdx; asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) ); return (int64_t)(( rdx << 32 ) + rax); # else # error "TRACY_HW_TIMER detection logic needs fixing" # endif #else # if defined __linux__ && defined CLOCK_MONOTONIC_RAW struct timespec ts; clock_gettime( CLOCK_MONOTONIC_RAW, &ts ); return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec ); # else return std::chrono::duration_cast( std::chrono::high_resolution_clock::now().time_since_epoch() ).count(); # endif #endif } tracy_force_inline uint32_t GetNextZoneId() { return m_zoneId.fetch_add( 1, std::memory_order_relaxed ); } static tracy_force_inline QueueItem* QueueSerial() { auto& p = GetProfiler(); p.m_serialLock.lock(); return p.m_serialQueue.prepare_next(); } static tracy_force_inline QueueItem* QueueSerialCallstack( void* ptr ) { auto& p = GetProfiler(); p.m_serialLock.lock(); p.SendCallstackSerial( ptr ); return p.m_serialQueue.prepare_next(); } static tracy_force_inline void QueueSerialFinish() { auto& p = GetProfiler(); p.m_serialQueue.commit_next(); p.m_serialLock.unlock(); } static void SendFrameMark( const char* name ); static void SendFrameMark( const char* name, QueueType type ); static void PlotData( const char* name, int64_t val ); static void PlotData( const char* name, float val ); static void PlotData( const char* name, double val ); static void ConfigurePlot( const char* name, PlotFormatType type ); static void Message( const char* txt, size_t size, int callstack ); static void Message( const char* txt, int callstack ); static void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ); static void MessageColor( const char* txt, uint32_t color, int callstack ); static void MessageAppInfo( const char* txt, size_t size ); static void MemAlloc( const void* ptr, size_t size, bool secure ); static void MemFree( const void* ptr, bool secure ); static void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure ); static void MemFreeCallstack( const void* ptr, int depth, bool secure ); static void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ); static void MemFreeNamed( const void* ptr, bool secure, const char* name ); static void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ); static void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ); static void SendCallstack( int depth ); static void ParameterRegister( ParameterCallback cb ); void SendCallstack( int depth, const char* skipBefore ); static void CutCallstack( void* callstack, const char* skipBefore ); static bool ShouldExit(); tracy_force_inline bool IsConnected() const { return m_isConnected.load( std::memory_order_acquire ); } #ifdef TRACY_ON_DEMAND tracy_force_inline uint64_t ConnectionId() const { return m_connectionId.load( std::memory_order_acquire ); } tracy_force_inline void DeferItem( const QueueItem& item ) { m_deferredLock.lock(); auto dst = m_deferredQueue.push_next(); memcpy( dst, &item, sizeof( item ) ); m_deferredLock.unlock(); } #endif void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); } bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); } void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); } void SendString( uint64_t str, const char* ptr, size_t len, QueueType type ); void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); } void SendSingleString( const char* ptr, size_t len ); void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); } void SendSecondString( const char* ptr, size_t len ); // Allocated source location data layout: // 2b payload size // 4b color // 4b source line // fsz function name // 1b null terminator // ssz source file name // 1b null terminator // nsz zone name (optional) static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function ) { return AllocSourceLocation( line, source, function, nullptr, 0 ); } static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) { return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz ); } static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 ); } static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); assert( sz32 <= std::numeric_limits::max() ); const auto sz = uint16_t( sz32 ); auto ptr = (char*)tracy_malloc( sz ); memcpy( ptr, &sz, 2 ); memset( ptr + 2, 0, 4 ); memcpy( ptr + 6, &line, 4 ); memcpy( ptr + 10, function, functionSz ); ptr[10 + functionSz] = '\0'; memcpy( ptr + 10 + functionSz + 1, source, sourceSz ); ptr[10 + functionSz + 1 + sourceSz] = '\0'; if( nameSz != 0 ) { memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz ); } return uint64_t( ptr ); } private: enum class DequeueStatus { DataDequeued, ConnectionLost, QueueEmpty }; static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); } void Worker(); void ClearQueues( tracy::moodycamel::ConsumerToken& token ); void ClearSerial(); DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token ); DequeueStatus DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop ); DequeueStatus DequeueSerial(); bool CommitData(); tracy_force_inline bool AppendData( const void* data, size_t len ) { const auto ret = NeedDataSize( len ); AppendDataUnsafe( data, len ); return ret; } tracy_force_inline bool NeedDataSize( size_t len ) { assert( len <= TargetFrameSize ); bool ret = true; if( m_bufferOffset - m_bufferStart + (int)len > TargetFrameSize ) { ret = CommitData(); } return ret; } tracy_force_inline void AppendDataUnsafe( const void* data, size_t len ) { memcpy( m_buffer + m_bufferOffset, data, len ); m_bufferOffset += int( len ); } bool SendData( const char* data, size_t len ); void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type ); void SendSourceLocation( uint64_t ptr ); void SendSourceLocationPayload( uint64_t ptr ); void SendCallstackPayload( uint64_t ptr ); void SendCallstackPayload64( uint64_t ptr ); void SendCallstackAlloc( uint64_t ptr ); void SendCallstackFrame( uint64_t ptr ); void SendCodeLocation( uint64_t ptr ); bool HandleServerQuery(); void HandleDisconnect(); void HandleParameter( uint64_t payload ); void HandleSymbolQuery( uint64_t symbol ); void HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ); void HandleSourceCodeQuery(); void AckServerQuery(); void AckSourceCodeNotAvailable(); void CalibrateTimer(); void CalibrateDelay(); void ReportTopology(); static tracy_force_inline void SendCallstackSerial( void* ptr ) { #ifdef TRACY_HAS_CALLSTACK auto item = GetProfiler().m_serialQueue.prepare_next(); MemWrite( &item->hdr.type, QueueType::CallstackSerial ); MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); GetProfiler().m_serialQueue.commit_next(); #endif } static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size ) { assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed ); auto item = GetProfiler().m_serialQueue.prepare_next(); MemWrite( &item->hdr.type, type ); MemWrite( &item->memAlloc.time, GetTime() ); MemWrite( &item->memAlloc.thread, thread ); MemWrite( &item->memAlloc.ptr, (uint64_t)ptr ); if( compile_time_condition::value ) { memcpy( &item->memAlloc.size, &size, 4 ); memset( &item->memAlloc.size + 4, 0, 2 ); } else { assert( sizeof( size ) == 8 ); memcpy( &item->memAlloc.size, &size, 4 ); memcpy( ((char*)&item->memAlloc.size)+4, ((char*)&size)+4, 2 ); } GetProfiler().m_serialQueue.commit_next(); } static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr ) { assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed ); auto item = GetProfiler().m_serialQueue.prepare_next(); MemWrite( &item->hdr.type, type ); MemWrite( &item->memFree.time, GetTime() ); MemWrite( &item->memFree.thread, thread ); MemWrite( &item->memFree.ptr, (uint64_t)ptr ); GetProfiler().m_serialQueue.commit_next(); } static tracy_force_inline void SendMemName( const char* name ) { assert( name ); auto item = GetProfiler().m_serialQueue.prepare_next(); MemWrite( &item->hdr.type, QueueType::MemNamePayload ); MemWrite( &item->memName.name, (uint64_t)name ); GetProfiler().m_serialQueue.commit_next(); } #if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC static int64_t GetTimeQpc(); #endif double m_timerMul; uint64_t m_resolution; uint64_t m_delay; std::atomic m_timeBegin; uint64_t m_mainThread; uint64_t m_epoch, m_exectime; std::atomic m_shutdown; std::atomic m_shutdownManual; std::atomic m_shutdownFinished; Socket* m_sock; UdpBroadcast* m_broadcast; bool m_noExit; uint32_t m_userPort; std::atomic m_zoneId; int64_t m_samplingPeriod; uint64_t m_threadCtx; int64_t m_refTimeThread; int64_t m_refTimeSerial; int64_t m_refTimeCtx; int64_t m_refTimeGpu; void* m_stream; // LZ4_stream_t* char* m_buffer; int m_bufferOffset; int m_bufferStart; char* m_lz4Buf; FastVector m_serialQueue, m_serialDequeue; TracyMutex m_serialLock; std::atomic m_frameCount; std::atomic m_isConnected; #ifdef TRACY_ON_DEMAND std::atomic m_connectionId; TracyMutex m_deferredLock; FastVector m_deferredQueue; #endif #ifdef TRACY_HAS_SYSTIME void ProcessSysTime(); SysTime m_sysTime; uint64_t m_sysTimeLast = 0; #else void ProcessSysTime() {} #endif ParameterCallback m_paramCallback; char* m_queryData; char* m_queryDataPtr; }; } #endif