From 7dc645c1147e61895ef39500c6c7b451d74b3a35 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 6 Nov 2023 20:50:37 +0100 Subject: [PATCH] Update Tracy Profiler to `0.10` --- Source/ThirdParty/tracy/TracyClient.cpp | 7 +- .../tracy/client/TracyCallstack.cpp | 9 +- .../tracy/client/TracyCallstack.hpp | 9 +- Source/ThirdParty/tracy/client/TracyLock.hpp | 8 +- .../ThirdParty/tracy/client/TracyOverride.cpp | 26 + .../ThirdParty/tracy/client/TracyProfiler.cpp | 544 +--- .../ThirdParty/tracy/client/TracyProfiler.hpp | 105 +- .../ThirdParty/tracy/client/TracyScoped.hpp | 28 +- .../ThirdParty/tracy/client/TracySysPower.cpp | 164 + .../ThirdParty/tracy/client/TracySysPower.hpp | 44 + .../ThirdParty/tracy/client/TracySysTrace.cpp | 49 +- .../tracy/client/tracy_concurrentqueue.h | 13 +- .../tracy/client/tracy_rpmalloc.cpp | 2 +- .../ThirdParty/tracy/common/TracyProtocol.hpp | 4 +- Source/ThirdParty/tracy/common/TracyQueue.hpp | 32 +- .../ThirdParty/tracy/common/TracySocket.cpp | 6 +- .../ThirdParty/tracy/common/TracySocket.hpp | 2 +- .../ThirdParty/tracy/common/TracySystem.cpp | 97 +- .../ThirdParty/tracy/common/TracySystem.hpp | 9 - .../ThirdParty/tracy/common/TracyVersion.hpp | 2 +- Source/ThirdParty/tracy/libbacktrace/config.h | 4 + .../ThirdParty/tracy/libbacktrace/dwarf.cpp | 88 +- Source/ThirdParty/tracy/libbacktrace/elf.cpp | 2659 ++++++++++++++++- .../tracy/libbacktrace/internal.hpp | 9 + Source/ThirdParty/tracy/tracy/Tracy.hpp | 61 +- 25 files changed, 3169 insertions(+), 812 deletions(-) create mode 100644 Source/ThirdParty/tracy/client/TracyOverride.cpp create mode 100644 Source/ThirdParty/tracy/client/TracySysPower.cpp create mode 100644 Source/ThirdParty/tracy/client/TracySysPower.hpp diff --git a/Source/ThirdParty/tracy/TracyClient.cpp b/Source/ThirdParty/tracy/TracyClient.cpp index 3548c5752..1fbd78f96 100644 --- a/Source/ThirdParty/tracy/TracyClient.cpp +++ b/Source/ThirdParty/tracy/TracyClient.cpp @@ -11,10 +11,10 @@ // Define TRACY_ENABLE to enable profiler. -#ifdef TRACY_ENABLE - #include "common/TracySystem.cpp" +#ifdef TRACY_ENABLE + #ifdef _MSC_VER # pragma warning(push, 0) #endif @@ -22,12 +22,13 @@ #include #include "client/TracyProfiler.cpp" #include "client/TracyCallstack.cpp" +#include "client/TracySysPower.cpp" #include "client/TracySysTime.cpp" #include "client/TracySysTrace.cpp" #include "common/TracySocket.cpp" #include "client/tracy_rpmalloc.cpp" #include "client/TracyAlloc.cpp" - +#include "client/TracyOverride.cpp" #if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 # include "libbacktrace/alloc.cpp" diff --git a/Source/ThirdParty/tracy/client/TracyCallstack.cpp b/Source/ThirdParty/tracy/client/TracyCallstack.cpp index ca19a543b..0de7c9d2e 100644 --- a/Source/ThirdParty/tracy/client/TracyCallstack.cpp +++ b/Source/ThirdParty/tracy/client/TracyCallstack.cpp @@ -154,7 +154,6 @@ void InitCallstack() DBGHELP_LOCK; #endif - //SymInitialize( GetCurrentProcess(), "C:\\Flax\\FlaxEngine\\Binaries\\Editor\\Win64\\Debug;C:\\Flax\\FlaxEngine\\Cache\\Projects", true ); SymInitialize( GetCurrentProcess(), nullptr, true ); SymSetOptions( SYMOPT_LOAD_LINES ); @@ -228,6 +227,10 @@ void InitCallstack() const auto res = GetModuleFileNameA( mod[i], name, 1021 ); if( res > 0 ) { + // This may be a new module loaded since our call to SymInitialize. + // Just in case, force DbgHelp to load its pdb ! + SymLoadModuleEx(proc, NULL, name, NULL, (DWORD64)info.lpBaseOfDll, info.SizeOfImage, NULL, 0); + auto ptr = name + res; while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--; if( ptr > name ) ptr++; @@ -683,7 +686,9 @@ void InitCallstackCritical() void InitCallstack() { cb_bts = backtrace_create_state( nullptr, 0, nullptr, nullptr ); +#ifndef TRACY_DEMANGLE ___tracy_init_demangle_buffer(); +#endif #ifdef __linux InitKernelSymbols(); @@ -758,7 +763,9 @@ debuginfod_client* GetDebuginfodClient() void EndCallstack() { +#ifndef TRACY_DEMANGLE ___tracy_free_demangle_buffer(); +#endif #ifdef TRACY_DEBUGINFOD ClearDebugInfoVector( s_di_known ); debuginfod_end( s_debuginfod ); diff --git a/Source/ThirdParty/tracy/client/TracyCallstack.hpp b/Source/ThirdParty/tracy/client/TracyCallstack.hpp index 8cfede8fb..96bee3f51 100644 --- a/Source/ThirdParty/tracy/client/TracyCallstack.hpp +++ b/Source/ThirdParty/tracy/client/TracyCallstack.hpp @@ -10,7 +10,14 @@ #endif -#ifdef TRACY_HAS_CALLSTACK +#ifndef TRACY_HAS_CALLSTACK + +namespace tracy +{ +static tracy_force_inline void* Callstack( int depth ) { return nullptr; } +} + +#else #ifdef TRACY_DEBUGINFOD # include diff --git a/Source/ThirdParty/tracy/client/TracyLock.hpp b/Source/ThirdParty/tracy/client/TracyLock.hpp index 296a41ba1..d12a3c16d 100644 --- a/Source/ThirdParty/tracy/client/TracyLock.hpp +++ b/Source/ThirdParty/tracy/client/TracyLock.hpp @@ -21,7 +21,7 @@ public: , m_active( false ) #endif { - assert( m_id != std::numeric_limits::max() ); + assert( m_id != (std::numeric_limits::max)() ); auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::LockAnnounce ); @@ -154,7 +154,7 @@ public: tracy_force_inline void CustomName( const char* name, size_t size ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, name, size ); auto item = Profiler::QueueSerial(); @@ -235,7 +235,7 @@ public: , m_active( false ) #endif { - assert( m_id != std::numeric_limits::max() ); + assert( m_id != (std::numeric_limits::max)() ); auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::LockAnnounce ); @@ -450,7 +450,7 @@ public: tracy_force_inline void CustomName( const char* name, size_t size ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, name, size ); auto item = Profiler::QueueSerial(); diff --git a/Source/ThirdParty/tracy/client/TracyOverride.cpp b/Source/ThirdParty/tracy/client/TracyOverride.cpp new file mode 100644 index 000000000..591508a7f --- /dev/null +++ b/Source/ThirdParty/tracy/client/TracyOverride.cpp @@ -0,0 +1,26 @@ +#ifdef TRACY_ENABLE +# ifdef __linux__ +# include "TracyDebug.hpp" +# ifdef TRACY_VERBOSE +# include +# include +# endif + +extern "C" int dlclose( void* hnd ) +{ +#ifdef TRACY_VERBOSE + struct link_map* lm; + if( dlinfo( hnd, RTLD_DI_LINKMAP, &lm ) == 0 ) + { + TracyDebug( "Overriding dlclose for %s\n", lm->l_name ); + } + else + { + TracyDebug( "Overriding dlclose for unknown object (%s)\n", dlerror() ); + } +#endif + return 0; +} + +# endif +#endif diff --git a/Source/ThirdParty/tracy/client/TracyProfiler.cpp b/Source/ThirdParty/tracy/client/TracyProfiler.cpp index dfbb22a83..59c31d6f8 100644 --- a/Source/ThirdParty/tracy/client/TracyProfiler.cpp +++ b/Source/ThirdParty/tracy/client/TracyProfiler.cpp @@ -81,7 +81,9 @@ #endif #ifdef __APPLE__ -# define TRACY_DELAYED_INIT +# ifndef TRACY_DELAYED_INIT +# define TRACY_DELAYED_INIT +# endif #else # ifdef __GNUC__ # define init_order( val ) __attribute__ ((init_priority(val))) @@ -1074,7 +1076,9 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) } closedir( dp ); +#ifdef TRACY_HAS_CALLSTACK if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release ); +#endif TracyLfqPrepare( QueueType::Crash ); TracyLfqCommit; @@ -1355,6 +1359,7 @@ Profiler::Profiler() , m_queryImage( nullptr ) , m_queryData( nullptr ) , m_crashHandlerInstalled( false ) + , m_programName( nullptr ) { assert( !s_instance ); s_instance = this; @@ -1417,7 +1422,9 @@ void Profiler::SpawnWorkerThreads() #if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER s_profilerThreadId = GetThreadId( s_thread->Handle() ); +# ifdef TRACY_HAS_CALLSTACK s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); +# endif m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); #endif @@ -1456,7 +1463,7 @@ Profiler::~Profiler() if( m_crashHandlerInstalled ) RemoveVectoredExceptionHandler( m_exceptionHandler ); #endif -#ifdef __linux__ +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER if( m_crashHandlerInstalled ) { sigaction( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr, nullptr ); @@ -1522,7 +1529,7 @@ bool Profiler::ShouldExit() void Profiler::Worker() { -#ifdef __linux__ +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER s_profilerTid = syscall( SYS_gettid ); #endif @@ -1711,6 +1718,9 @@ void Profiler::Worker() if( m_sock ) break; #ifndef TRACY_ON_DEMAND ProcessSysTime(); +# ifdef TRACY_HAS_SYSPOWER + m_sysPower.Tick(); +# endif #endif if( m_broadcast ) @@ -1718,6 +1728,14 @@ void Profiler::Worker() const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); if( t - lastBroadcast > 3000000000 ) // 3s { + m_programNameLock.lock(); + if( m_programName ) + { + broadcastMsg = GetBroadcastMessage( m_programName, strlen( m_programName ), broadcastLen, dataPort ); + m_programName = nullptr; + } + m_programNameLock.unlock(); + lastBroadcast = t; const auto ts = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); broadcastMsg.activeTime = int32_t( ts - m_epoch ); @@ -1828,6 +1846,9 @@ void Profiler::Worker() for(;;) { ProcessSysTime(); +#ifdef TRACY_HAS_SYSPOWER + m_sysPower.Tick(); +#endif const auto status = Dequeue( token ); const auto serialStatus = DequeueSerial(); if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost ) @@ -3026,9 +3047,9 @@ void Profiler::SendSourceLocation( uint64_t ptr ) MemWrite( &item.srcloc.file, (uint64_t)srcloc->file ); MemWrite( &item.srcloc.function, (uint64_t)srcloc->function ); MemWrite( &item.srcloc.line, srcloc->line ); - MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color ) & 0xFF ) ); + MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color ) & 0xFF ) ); MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8 ) & 0xFF ) ); - MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); + MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) ); AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] ); } @@ -3331,10 +3352,8 @@ bool Profiler::HandleServerQuery() uint8_t type; uint64_t ptr; - uint32_t extra; memcpy( &type, &payload.type, sizeof( payload.type ) ); memcpy( &ptr, &payload.ptr, sizeof( payload.ptr ) ); - memcpy( &extra, &payload.extra, sizeof( payload.extra ) ); switch( type ) { @@ -3381,7 +3400,7 @@ bool Profiler::HandleServerQuery() break; #ifndef TRACY_NO_CODE_TRANSFER case ServerQuerySymbolCode: - HandleSymbolCodeQuery( ptr, extra ); + HandleSymbolCodeQuery( ptr, payload.extra ); break; #endif case ServerQuerySourceCode: @@ -3398,7 +3417,7 @@ bool Profiler::HandleServerQuery() break; case ServerQueryDataTransferPart: memcpy( m_queryDataPtr, &ptr, 8 ); - memcpy( m_queryDataPtr+8, &extra, 4 ); + memcpy( m_queryDataPtr+8, &payload.extra, 4 ); m_queryDataPtr += 12; AckServerQuery(); break; @@ -3753,7 +3772,7 @@ void Profiler::SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_ { #ifndef TRACY_NO_FRAME_IMAGE auto& profiler = GetProfiler(); - assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits::max() ); + assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < (std::numeric_limits::max)() ); # ifdef TRACY_ON_DEMAND if( !profiler.IsConnected() ) return; # endif @@ -3770,6 +3789,12 @@ void Profiler::SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_ fi->flip = flip; profiler.m_fiQueue.commit_next(); profiler.m_fiLock.unlock(); +#else + static_cast(image); // unused + static_cast(w); // unused + static_cast(h); // unused + static_cast(offset); // unused + static_cast(flip); // unused #endif } @@ -3827,7 +3852,7 @@ void Profiler::ConfigurePlot( const char* name, PlotFormatType type, bool step, void Profiler::Message( const char* txt, size_t size, int callstack ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif @@ -3864,7 +3889,7 @@ void Profiler::Message( const char* txt, int callstack ) void Profiler::MessageColor( const char* txt, size_t size, uint32_t color, int callstack ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif @@ -3879,9 +3904,9 @@ void Profiler::MessageColor( const char* txt, size_t size, uint32_t color, int c TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); MemWrite( &item->messageColorFat.time, GetTime() ); MemWrite( &item->messageColorFat.text, (uint64_t)ptr ); - MemWrite( &item->messageColorFat.r, uint8_t( ( color ) & 0xFF ) ); + MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) ); MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->messageColorFat.b, uint8_t( ( color >> 16 ) & 0xFF ) ); + MemWrite( &item->messageColorFat.r, uint8_t( ( color >> 16 ) & 0xFF ) ); MemWrite( &item->messageColorFat.size, (uint16_t)size ); TracyQueueCommit( messageColorFatThread ); } @@ -3899,15 +3924,15 @@ void Profiler::MessageColor( const char* txt, uint32_t color, int callstack ) TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); MemWrite( &item->messageColorLiteral.time, GetTime() ); MemWrite( &item->messageColorLiteral.text, (uint64_t)txt ); - MemWrite( &item->messageColorLiteral.r, uint8_t( ( color ) & 0xFF ) ); + MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) ); MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->messageColorLiteral.b, uint8_t( ( color >> 16 ) & 0xFF ) ); + MemWrite( &item->messageColorLiteral.r, uint8_t( ( color >> 16 ) & 0xFF ) ); TracyQueueCommit( messageColorLiteralThread ); } void Profiler::MessageAppInfo( const char* txt, size_t size ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); TracyLfqPrepare( QueueType::MessageAppInfo ); @@ -3922,7 +3947,7 @@ void Profiler::MessageAppInfo( const char* txt, size_t size ) TracyLfqCommit; } -void Profiler::MemAlloc(const void* ptr, size_t size, bool secure) +void Profiler::MemAlloc( const void* ptr, size_t size, bool secure ) { if( secure && !ProfilerAvailable() ) return; #ifdef TRACY_ON_DEMAND @@ -4085,7 +4110,6 @@ void Profiler::SendCallstack( int depth ) #endif } -void Profiler::ParameterRegister( ParameterCallback cb ) { GetProfiler().m_paramCallback = cb; } void Profiler::ParameterRegister( ParameterCallback cb, void* data ) { auto& profiler = GetProfiler(); @@ -4301,486 +4325,4 @@ int64_t Profiler::GetTimeQpc() } -#if 0 -#ifdef __cplusplus -extern "C" { -#endif - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) return ctx; - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneBegin ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) return ctx; - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - tracy::GetProfiler().SendCallstack( depth ); - { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) - { - tracy::tracy_free( (void*)srcloc ); - return ctx; - } - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ) -{ - ___tracy_c_zone_context ctx; -#ifdef TRACY_ON_DEMAND - ctx.active = active && tracy::GetProfiler().IsConnected(); -#else - ctx.active = active; -#endif - if( !ctx.active ) - { - tracy::tracy_free( (void*)srcloc ); - return ctx; - } - const auto id = tracy::GetProfiler().GetNextZoneId(); - ctx.id = id; - -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - tracy::GetProfiler().SendCallstack( depth ); - { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); - } - return ctx; -} - -TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ) -{ - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneEnd ); - tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() ); - TracyQueueCommitC( zoneEndThread ); - } -} - -TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) -{ - assert( size < std::numeric_limits::max() ); - if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size ); - memcpy( ptr, txt, size ); -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneText ); - tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommitC( zoneTextFatThread ); - } -} - -TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) -{ - assert( size < std::numeric_limits::max() ); - if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size ); - memcpy( ptr, txt, size ); -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneName ); - tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); - tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); - TracyQueueCommitC( zoneTextFatThread ); - } -} - -TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ) { - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneColor ); - tracy::MemWrite( &item->zoneColor.r, uint8_t( ( color ) & 0xFF ) ); - tracy::MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - tracy::MemWrite( &item->zoneColor.b, uint8_t( ( color >> 16 ) & 0xFF ) ); - TracyQueueCommitC( zoneColorThread ); - } -} - -TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) -{ - if( !ctx.active ) return; -#ifndef TRACY_NO_VERIFY - { - TracyQueuePrepareC( tracy::QueueType::ZoneValidation ); - tracy::MemWrite( &item->zoneValidation.id, ctx.id ); - TracyQueueCommitC( zoneValidationThread ); - } -#endif - { - TracyQueuePrepareC( tracy::QueueType::ZoneValue ); - tracy::MemWrite( &item->zoneValue.value, value ); - TracyQueueCommitC( zoneValueThread ); - } -} - -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); } -TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } -TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } -TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); } -TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); } -TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); } -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } -TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } - -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz ); -} - -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - tracy::GetProfiler().SendCallstack( data.depth ); - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginCallstack ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - tracy::GetProfiler().SendCallstack( data.depth ); - TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); - tracy::MemWrite( &item->gpuTime.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuZoneEnd ); - tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); - memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); - tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuNewContext ); - tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuNewContext.period, data.period ); - tracy::MemWrite( &item->gpuNewContext.context, data.context ); - tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); - tracy::MemWrite( &item->gpuNewContext.type, data.type ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data data ) -{ - auto ptr = (char*)tracy::tracy_malloc( data.len ); - memcpy( ptr, data.name, data.len ); - - TracyLfqPrepareC( tracy::QueueType::GpuContextName ); - tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); - tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data data ) -{ - TracyLfqPrepareC( tracy::QueueType::GpuCalibration ); - tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); - tracy::MemWrite( &item->gpuCalibration.context, data.context ); - TracyLfqCommitC; -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginCallstackSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data data ) -{ - auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) ); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneBegin.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuTime.queryId, data.queryId ); - tracy::MemWrite( &item->gpuTime.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial ); - tracy::MemWrite( &item->gpuZoneEnd.cpuTime, tracy::Profiler::GetTime() ); - memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); - tracy::MemWrite( &item->gpuZoneEnd.queryId, data.queryId ); - tracy::MemWrite( &item->gpuZoneEnd.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_new_context_serial( ___tracy_gpu_new_context_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext ); - tracy::MemWrite( &item->gpuNewContext.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuNewContext.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuNewContext.period, data.period ); - tracy::MemWrite( &item->gpuNewContext.context, data.context ); - tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); - tracy::MemWrite( &item->gpuNewContext.type, data.type ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data data ) -{ - auto ptr = (char*)tracy::tracy_malloc( data.len ); - memcpy( ptr, data.name, data.len ); - - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName ); - tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); - tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); - tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data data ) -{ - auto item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration ); - tracy::MemWrite( &item->gpuCalibration.cpuTime, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->gpuCalibration.gpuTime, data.gpuTime ); - tracy::MemWrite( &item->gpuCalibration.cpuDelta, data.cpuDelta ); - tracy::MemWrite( &item->gpuCalibration.context, data.context ); - tracy::Profiler::QueueSerialFinish(); -} - -TRACY_API int ___tracy_connected( void ) -{ - return tracy::GetProfiler().IsConnected(); -} - -#ifdef TRACY_FIBERS -TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber ); } -TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } -#endif - -# ifdef TRACY_MANUAL_LIFETIME -TRACY_API void ___tracy_startup_profiler( void ) -{ - tracy::StartupProfiler(); -} - -TRACY_API void ___tracy_shutdown_profiler( void ) -{ - tracy::ShutdownProfiler(); -} -# endif - -#ifdef __cplusplus -} -#endif -#endif - #endif diff --git a/Source/ThirdParty/tracy/client/TracyProfiler.hpp b/Source/ThirdParty/tracy/client/TracyProfiler.hpp index 99ae63e4f..8892fb14f 100644 --- a/Source/ThirdParty/tracy/client/TracyProfiler.hpp +++ b/Source/ThirdParty/tracy/client/TracyProfiler.hpp @@ -10,6 +10,7 @@ #include "tracy_concurrentqueue.h" #include "tracy_SPSCQueue.h" #include "TracyCallstack.hpp" +#include "TracySysPower.hpp" #include "TracySysTime.hpp" #include "TracyFastVector.hpp" #include "../common/TracyQueue.hpp" @@ -190,7 +191,22 @@ public: if( HardwareSupportsInvariantTSC() ) { uint64_t rax, rdx; +#ifdef TRACY_PATCHABLE_NOPSLEDS + // Some external tooling (such as rr) wants to patch our rdtsc and replace it by a + // branch to control the external input seen by a program. This kind of patching is + // not generally possible depending on the surrounding code and can lead to significant + // slowdowns if the compiler generated unlucky code and rr and tracy are used together. + // To avoid this, use the rr-safe `nopl 0(%rax, %rax, 1); rdtsc` instruction sequence, + // which rr promises will be patchable independent of the surrounding code. + asm volatile ( + // This is nopl 0(%rax, %rax, 1), but assemblers are inconsistent about whether + // they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use + // the 5 byte one. + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" + "rdtsc" : "=a" (rax), "=d" (rdx) ); +#else asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) ); +#endif return (int64_t)(( rdx << 32 ) + rax); } # else @@ -240,6 +256,30 @@ public: p.m_serialLock.unlock(); } + static void SendFrameMark( const char* name ); + static void SendFrameMark( const char* name, QueueType type ); + static void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip ); + static void PlotData( const char* name, int64_t val ); + static void PlotData( const char* name, float val ); + static void PlotData( const char* name, double val ); + static void ConfigurePlot( const char* name, PlotFormatType type, bool step, bool fill, uint32_t color ); + static void Message( const char* txt, size_t size, int callstack ); + static void Message( const char* txt, int callstack ); + static void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ); + static void MessageColor( const char* txt, uint32_t color, int callstack ); + static void MessageAppInfo( const char* txt, size_t size ); + static void MemAlloc( const void* ptr, size_t size, bool secure ); + static void MemFree( const void* ptr, bool secure ); + static void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure ); + static void MemFreeCallstack( const void* ptr, int depth, bool secure ); + static void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ); + static void MemFreeNamed( const void* ptr, bool secure, const char* name ); + static void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ); + static void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ); + static void SendCallstack( int depth ); + static void ParameterRegister( ParameterCallback cb, void* data ); + static void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val ); + static tracy_force_inline void SourceCallbackRegister( SourceContentsCallback cb, void* data ) { auto& profiler = GetProfiler(); @@ -264,31 +304,6 @@ public: } #endif - static void SendFrameMark( const char* name ); - static void SendFrameMark( const char* name, QueueType type ); - static void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip ); - static void PlotData( const char* name, int64_t val ); - static void PlotData( const char* name, float val ); - static void PlotData( const char* name, double val ); - static void ConfigurePlot( const char* name, PlotFormatType type, bool step, bool fill, uint32_t color ); - static void Message( const char* txt, size_t size, int callstack ); - static void Message( const char* txt, int callstack ); - static void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ); - static void MessageColor( const char* txt, uint32_t color, int callstack ); - static void MessageAppInfo( const char* txt, size_t size ); - static void MemAlloc( const void* ptr, size_t size, bool secure ); - static void MemFree( const void* ptr, bool secure ); - static void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure ); - static void MemFreeCallstack( const void* ptr, int depth, bool secure ); - static void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ); - static void MemFreeNamed( const void* ptr, bool secure, const char* name ); - static void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ); - static void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ); - static void SendCallstack( int depth ); - static void ParameterRegister( ParameterCallback cb ); - static void ParameterRegister( ParameterCallback cb, void* data ); - static void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val ); - void SendCallstack( int depth, const char* skipBefore ); static void CutCallstack( void* callstack, const char* skipBefore ); @@ -299,6 +314,13 @@ public: return m_isConnected.load( std::memory_order_acquire ); } + tracy_force_inline void SetProgramName( const char* name ) + { + m_programNameLock.lock(); + m_programName = name; + m_programNameLock.unlock(); + } + #ifdef TRACY_ON_DEMAND tracy_force_inline uint64_t ConnectionId() const { @@ -347,13 +369,13 @@ public: static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { - return AllocSourceLocation( line, source, sourceSz, function, functionSz, (const char*)nullptr, 0 ); + return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 ); } static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); - assert( sz32 <= std::numeric_limits::max() ); + assert( sz32 <= (std::numeric_limits::max)() ); const auto sz = uint16_t( sz32 ); auto ptr = (char*)tracy_malloc( sz ); memcpy( ptr, &sz, 2 ); @@ -370,28 +392,6 @@ public: return uint64_t( ptr ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const Char* name, size_t nameSz ) - { - const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); - assert( sz32 <= std::numeric_limits::max() ); - const auto sz = uint16_t( sz32 ); - auto ptr = (char*)tracy_malloc( sz ); - memcpy( ptr, &sz, 2 ); - memset( ptr + 2, 0, 4 ); - memcpy( ptr + 6, &line, 4 ); - memcpy( ptr + 10, function, functionSz ); - ptr[10 + functionSz] = '\0'; - memcpy( ptr + 10 + functionSz + 1, source, sourceSz ); - ptr[10 + functionSz + 1 + sourceSz] = '\0'; - if( nameSz != 0 ) - { - char* dst = ptr + 10 + functionSz + 1 + sourceSz + 1; - for ( size_t i = 0; i < nameSz; i++) - dst[i] = (char)name[i]; - } - return uint64_t( ptr ); - } - private: enum class DequeueStatus { DataDequeued, ConnectionLost, QueueEmpty }; enum class ThreadCtxStatus { Same, Changed, ConnectionLost }; @@ -586,6 +586,10 @@ private: void ProcessSysTime() {} #endif +#ifdef TRACY_HAS_SYSPOWER + SysPower m_sysPower; +#endif + ParameterCallback m_paramCallback; void* m_paramCallbackData; SourceContentsCallback m_sourceCallback; @@ -604,6 +608,9 @@ private: } m_prevSignal; #endif bool m_crashHandlerInstalled; + + const char* m_programName; + TracyMutex m_programNameLock; }; } diff --git a/Source/ThirdParty/tracy/client/TracyScoped.hpp b/Source/ThirdParty/tracy/client/TracyScoped.hpp index 1e5b6c809..bb916aa57 100644 --- a/Source/ThirdParty/tracy/client/TracyScoped.hpp +++ b/Source/ThirdParty/tracy/client/TracyScoped.hpp @@ -20,19 +20,7 @@ void ScopedZone::Begin(const SourceLocationData* srcloc) TracyLfqPrepare( QueueType::ZoneBegin ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyLfqCommit; -} - -void ScopedZone::Begin(uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const Char* name, size_t nameSz) -{ -#ifdef TRACY_ON_DEMAND - if (!GetProfiler().IsConnected()) return; -#endif - TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyLfqCommit; + TracyQueueCommit( zoneBeginThread ); } void ScopedZone::End() @@ -42,7 +30,7 @@ void ScopedZone::End() #endif TracyLfqPrepare( QueueType::ZoneEnd ); MemWrite( &item->zoneEnd.time, Profiler::GetTime() ); - TracyLfqCommit; + TracyQueueCommit( zoneEndThread ); } ScopedZone::ScopedZone( const SourceLocationData* srcloc, bool is_active ) @@ -132,7 +120,7 @@ ScopedZone::~ScopedZone() void ScopedZone::Text( const char* txt, size_t size ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); if( !m_active ) return; #ifdef TRACY_ON_DEMAND if( GetProfiler().ConnectionId() != m_connectionId ) return; @@ -147,7 +135,7 @@ void ScopedZone::Text( const char* txt, size_t size ) void ScopedZone::Text( const Char* txt, size_t size ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); if( !m_active ) return; #ifdef TRACY_ON_DEMAND if( GetProfiler().ConnectionId() != m_connectionId ) return; @@ -163,7 +151,7 @@ void ScopedZone::Text( const Char* txt, size_t size ) void ScopedZone::Name( const char* txt, size_t size ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); if( !m_active ) return; #ifdef TRACY_ON_DEMAND if( GetProfiler().ConnectionId() != m_connectionId ) return; @@ -178,7 +166,7 @@ void ScopedZone::Name( const char* txt, size_t size ) void ScopedZone::Name( const Char* txt, size_t size ) { - assert( size < std::numeric_limits::max() ); + assert( size < (std::numeric_limits::max)() ); if( !m_active ) return; #ifdef TRACY_ON_DEMAND if( GetProfiler().ConnectionId() != m_connectionId ) return; @@ -199,9 +187,9 @@ void ScopedZone::Color( uint32_t color ) if( GetProfiler().ConnectionId() != m_connectionId ) return; #endif TracyQueuePrepare( QueueType::ZoneColor ); - MemWrite( &item->zoneColor.r, uint8_t( ( color ) & 0xFF ) ); + MemWrite( &item->zoneColor.b, uint8_t( ( color ) & 0xFF ) ); MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->zoneColor.b, uint8_t( ( color >> 16 ) & 0xFF ) ); + MemWrite( &item->zoneColor.r, uint8_t( ( color >> 16 ) & 0xFF ) ); TracyQueueCommit( zoneColorThread ); } diff --git a/Source/ThirdParty/tracy/client/TracySysPower.cpp b/Source/ThirdParty/tracy/client/TracySysPower.cpp new file mode 100644 index 000000000..bd5939da2 --- /dev/null +++ b/Source/ThirdParty/tracy/client/TracySysPower.cpp @@ -0,0 +1,164 @@ +#include "TracySysPower.hpp" + +#ifdef TRACY_HAS_SYSPOWER + +#include +#include +#include +#include +#include +#include + +#include "TracyDebug.hpp" +#include "TracyProfiler.hpp" +#include "../common/TracyAlloc.hpp" + +namespace tracy +{ + +SysPower::SysPower() + : m_domains( 4 ) + , m_lastTime( 0 ) +{ + ScanDirectory( "/sys/devices/virtual/powercap/intel-rapl", -1 ); +} + +SysPower::~SysPower() +{ + for( auto& v : m_domains ) + { + fclose( v.handle ); + // Do not release v.name, as it may be still needed + } +} + +void SysPower::Tick() +{ + auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + if( t - m_lastTime > 10000000 ) // 10 ms + { + m_lastTime = t; + for( auto& v : m_domains ) + { + char tmp[32]; + if( fread( tmp, 1, 32, v.handle ) > 0 ) + { + rewind( v.handle ); + auto p = (uint64_t)atoll( tmp ); + uint64_t delta; + if( p >= v.value ) + { + delta = p - v.value; + } + else + { + delta = v.overflow - v.value + p; + } + v.value = p; + + TracyLfqPrepare( QueueType::SysPowerReport ); + MemWrite( &item->sysPower.time, Profiler::GetTime() ); + MemWrite( &item->sysPower.delta, delta ); + MemWrite( &item->sysPower.name, (uint64_t)v.name ); + TracyLfqCommit; + } + } + } +} + +void SysPower::ScanDirectory( const char* path, int parent ) +{ + DIR* dir = opendir( path ); + if( !dir ) return; + struct dirent* ent; + uint64_t maxRange = 0; + char* name = nullptr; + FILE* handle = nullptr; + while( ( ent = readdir( dir ) ) ) + { + if( ent->d_type == DT_REG ) + { + if( strcmp( ent->d_name, "max_energy_range_uj" ) == 0 ) + { + char tmp[PATH_MAX]; + snprintf( tmp, PATH_MAX, "%s/max_energy_range_uj", path ); + FILE* f = fopen( tmp, "r" ); + if( f ) + { + fscanf( f, "%" PRIu64, &maxRange ); + fclose( f ); + } + } + else if( strcmp( ent->d_name, "name" ) == 0 ) + { + char tmp[PATH_MAX]; + snprintf( tmp, PATH_MAX, "%s/name", path ); + FILE* f = fopen( tmp, "r" ); + if( f ) + { + char ntmp[128]; + if( fgets( ntmp, 128, f ) ) + { + // Last character is newline, skip it + const auto sz = strlen( ntmp ) - 1; + if( parent < 0 ) + { + name = (char*)tracy_malloc( sz + 1 ); + memcpy( name, ntmp, sz ); + name[sz] = '\0'; + } + else + { + const auto p = m_domains[parent]; + const auto psz = strlen( p.name ); + name = (char*)tracy_malloc( psz + sz + 2 ); + memcpy( name, p.name, psz ); + name[psz] = ':'; + memcpy( name+psz+1, ntmp, sz ); + name[psz+sz+1] = '\0'; + } + } + fclose( f ); + } + } + else if( strcmp( ent->d_name, "energy_uj" ) == 0 ) + { + char tmp[PATH_MAX]; + snprintf( tmp, PATH_MAX, "%s/energy_uj", path ); + handle = fopen( tmp, "r" ); + } + } + if( name && handle && maxRange > 0 ) break; + } + if( name && handle && maxRange > 0 ) + { + parent = (int)m_domains.size(); + Domain* domain = m_domains.push_next(); + domain->value = 0; + domain->overflow = maxRange; + domain->handle = handle; + domain->name = name; + TracyDebug( "Power domain id %i, %s found at %s\n", parent, name, path ); + } + else + { + if( name ) tracy_free( name ); + if( handle ) fclose( handle ); + } + + rewinddir( dir ); + while( ( ent = readdir( dir ) ) ) + { + if( ent->d_type == DT_DIR && strncmp( ent->d_name, "intel-rapl:", 11 ) == 0 ) + { + char tmp[PATH_MAX]; + snprintf( tmp, PATH_MAX, "%s/%s", path, ent->d_name ); + ScanDirectory( tmp, parent ); + } + } + closedir( dir ); +} + +} + +#endif diff --git a/Source/ThirdParty/tracy/client/TracySysPower.hpp b/Source/ThirdParty/tracy/client/TracySysPower.hpp new file mode 100644 index 000000000..210123bce --- /dev/null +++ b/Source/ThirdParty/tracy/client/TracySysPower.hpp @@ -0,0 +1,44 @@ +#ifndef __TRACYSYSPOWER_HPP__ +#define __TRACYSYSPOWER_HPP__ + +#if defined __linux__ +# define TRACY_HAS_SYSPOWER +#endif + +#ifdef TRACY_HAS_SYSPOWER + +#include +#include + +#include "TracyFastVector.hpp" + +namespace tracy +{ + +class SysPower +{ + struct Domain + { + uint64_t value; + uint64_t overflow; + FILE* handle; + const char* name; + }; + +public: + SysPower(); + ~SysPower(); + + void Tick(); + +private: + void ScanDirectory( const char* path, int parent ); + + FastVector m_domains; + uint64_t m_lastTime; +}; + +} +#endif + +#endif diff --git a/Source/ThirdParty/tracy/client/TracySysTrace.cpp b/Source/ThirdParty/tracy/client/TracySysTrace.cpp index 23b1020a5..af0641fef 100644 --- a/Source/ThirdParty/tracy/client/TracySysTrace.cpp +++ b/Source/ThirdParty/tracy/client/TracySysTrace.cpp @@ -409,6 +409,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) return false; } +#ifndef TRACY_NO_SAMPLING if( isOs64Bit ) { CLASSIC_EVENT_ID stackId[2] = {}; @@ -423,6 +424,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) return false; } } +#endif #ifdef UNICODE WCHAR KernelLoggerName[sizeof( KERNEL_LOGGER_NAME )]; @@ -768,6 +770,13 @@ bool SysTraceStart( int64_t& samplingPeriod ) TracyDebug( "sched_wakeup id: %i\n", wakeupId ); TracyDebug( "drm_vblank_event id: %i\n", vsyncId ); +#ifdef TRACY_NO_SAMPLING + const bool noSoftwareSampling = true; +#else + const char* noSoftwareSamplingEnv = GetEnvVar( "TRACY_NO_SAMPLING" ); + const bool noSoftwareSampling = noSoftwareSamplingEnv && noSoftwareSamplingEnv[0] == '1'; +#endif + #ifdef TRACY_NO_SAMPLE_RETIREMENT const bool noRetirement = true; #else @@ -837,28 +846,31 @@ bool SysTraceStart( int64_t& samplingPeriod ) pe.clockid = CLOCK_MONOTONIC_RAW; #endif - TracyDebug( "Setup software sampling\n" ); - ProbePreciseIp( pe, currentPid ); - for( int i=0; i static inline bool circular_less_than(T a, T b) { static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "circular_less_than is intended to be used only with unsigned integer types"); - return static_cast(a - b) > (static_cast(static_cast(1) << static_cast(sizeof(T) * CHAR_BIT - 1))); + return static_cast(a - b) > static_cast(static_cast(1) << (static_cast(sizeof(T) * CHAR_BIT - 1))); + // Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931 + // silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here. } -#ifdef _MSC_VER -#pragma warning(pop) -#endif template static inline char* align_for(char* ptr) diff --git a/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp b/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp index 8efa626a9..711505d21 100644 --- a/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp +++ b/Source/ThirdParty/tracy/client/tracy_rpmalloc.cpp @@ -147,7 +147,7 @@ # if defined(__APPLE__) # include # if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR -# include +# include # include # endif # include diff --git a/Source/ThirdParty/tracy/common/TracyProtocol.hpp b/Source/ThirdParty/tracy/common/TracyProtocol.hpp index dd30e5391..5eb1639db 100644 --- a/Source/ThirdParty/tracy/common/TracyProtocol.hpp +++ b/Source/ThirdParty/tracy/common/TracyProtocol.hpp @@ -9,14 +9,14 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 63 }; +enum : uint32_t { ProtocolVersion = 64 }; enum : uint16_t { BroadcastVersion = 3 }; using lz4sz_t = uint32_t; enum { TargetFrameSize = 256 * 1024 }; enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) }; -static_assert( LZ4Size <= std::numeric_limits::max(), "LZ4Size greater than lz4sz_t" ); +static_assert( LZ4Size <= (std::numeric_limits::max)(), "LZ4Size greater than lz4sz_t" ); static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" ); enum { HandshakeShibbolethSize = 8 }; diff --git a/Source/ThirdParty/tracy/common/TracyQueue.hpp b/Source/ThirdParty/tracy/common/TracyQueue.hpp index 092d26969..051d412ab 100644 --- a/Source/ThirdParty/tracy/common/TracyQueue.hpp +++ b/Source/ThirdParty/tracy/common/TracyQueue.hpp @@ -1,6 +1,7 @@ #ifndef __TRACYQUEUE_HPP__ #define __TRACYQUEUE_HPP__ +#include #include namespace tracy @@ -89,6 +90,7 @@ enum class QueueType : uint8_t GpuNewContext, CallstackFrame, SysTimeReport, + SysPowerReport, TidToPid, HwSampleCpuCycle, HwSampleInstructionRetired, @@ -165,9 +167,9 @@ struct QueueZoneValidationThread : public QueueZoneValidation struct QueueZoneColor { - uint8_t r; - uint8_t g; uint8_t b; + uint8_t g; + uint8_t r; }; struct QueueZoneColorThread : public QueueZoneColor @@ -221,9 +223,9 @@ struct QueueSourceLocation uint64_t function; // ptr uint64_t file; // ptr uint32_t line; - uint8_t r; - uint8_t g; uint8_t b; + uint8_t g; + uint8_t r; }; struct QueueZoneTextFat @@ -324,7 +326,7 @@ struct QueuePlotDataInt : public QueuePlotDataBase int64_t val; }; -struct QueuePlotDataFloat : public QueuePlotDataBase +struct QueuePlotDataFloat : public QueuePlotDataBase { float val; }; @@ -341,9 +343,9 @@ struct QueueMessage struct QueueMessageColor : public QueueMessage { - uint8_t r; - uint8_t g; uint8_t b; + uint8_t g; + uint8_t r; }; struct QueueMessageLiteral : public QueueMessage @@ -562,6 +564,13 @@ struct QueueSysTime float sysTime; }; +struct QueueSysPower +{ + int64_t time; + uint64_t delta; + uint64_t name; // ptr +}; + struct QueueContextSwitch { int64_t time; @@ -590,6 +599,13 @@ struct QueueHwSample int64_t time; }; +enum class PlotFormatType : uint8_t +{ + Number, + Memory, + Percentage +}; + struct QueuePlotConfig { uint64_t name; // ptr @@ -721,6 +737,7 @@ struct QueueItem QueueCrashReport crashReport; QueueCrashReportThread crashReportThread; QueueSysTime sysTime; + QueueSysPower sysPower; QueueContextSwitch contextSwitch; QueueThreadWakeup threadWakeup; QueueTidToPid tidToPid; @@ -824,6 +841,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ), sizeof( QueueHeader ) + sizeof( QueueSysTime ), + sizeof( QueueHeader ) + sizeof( QueueSysPower ), sizeof( QueueHeader ) + sizeof( QueueTidToPid ), sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired diff --git a/Source/ThirdParty/tracy/common/TracySocket.cpp b/Source/ThirdParty/tracy/common/TracySocket.cpp index 176bbc7aa..259678989 100644 --- a/Source/ThirdParty/tracy/common/TracySocket.cpp +++ b/Source/ThirdParty/tracy/common/TracySocket.cpp @@ -353,7 +353,7 @@ int Socket::Recv( void* _buf, int len, int timeout ) } } -int Socket::ReadUpTo( void* _buf, int len, int timeout ) +int Socket::ReadUpTo( void* _buf, int len ) { const auto sock = m_sock.load( std::memory_order_relaxed ); auto buf = (char*)_buf; @@ -678,10 +678,10 @@ bool UdpListen::Listen( uint16_t port ) #endif #if defined _WIN32 unsigned long reuse = 1; - setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) ); + setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) ); #else int reuse = 1; - setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) ); + setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) ); #endif #if defined _WIN32 unsigned long broadcast = 1; diff --git a/Source/ThirdParty/tracy/common/TracySocket.hpp b/Source/ThirdParty/tracy/common/TracySocket.hpp index 4b3075e29..f7713aac6 100644 --- a/Source/ThirdParty/tracy/common/TracySocket.hpp +++ b/Source/ThirdParty/tracy/common/TracySocket.hpp @@ -29,7 +29,7 @@ public: int Send( const void* buf, int len ); int GetSendBufSize(); - int ReadUpTo( void* buf, int len, int timeout ); + int ReadUpTo( void* buf, int len ); bool Read( void* buf, int len, int timeout ); template diff --git a/Source/ThirdParty/tracy/common/TracySystem.cpp b/Source/ThirdParty/tracy/common/TracySystem.cpp index 5ca8e1f45..9a477aa31 100644 --- a/Source/ThirdParty/tracy/common/TracySystem.cpp +++ b/Source/ThirdParty/tracy/common/TracySystem.cpp @@ -205,61 +205,60 @@ TRACY_API const char* GetThreadName( uint32_t id ) } ptr = ptr->next; } -#else -# if defined _WIN32 -# ifdef TRACY_UWP - static auto _GetThreadDescription = &::GetThreadDescription; -# else - static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); -# endif +#endif + +#if defined _WIN32 +# ifdef TRACY_UWP + static auto _GetThreadDescription = &::GetThreadDescription; +# else + static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); +# endif if( _GetThreadDescription ) { auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id ); if( hnd != 0 ) { PWSTR tmp; - _GetThreadDescription( hnd, &tmp ); - auto ret = wcstombs( buf, tmp, 256 ); - CloseHandle( hnd ); - if( ret != 0 ) + if( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) ) { - return buf; + auto ret = wcstombs( buf, tmp, 256 ); + CloseHandle( hnd ); + LocalFree( tmp ); + if( ret != static_cast( -1 ) ) + { + return buf; + } } } } -# elif defined __linux__ - int cs, fd; - char path[32]; -# ifdef __ANDROID__ - int tid = gettid(); -# else - int tid = (int) syscall( SYS_gettid ); -# endif - snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid ); - sprintf( buf, "%" PRIu32, id ); -# ifndef __ANDROID__ - pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs ); -# endif - if ( ( fd = open( path, O_RDONLY ) ) > 0) { - int len = read( fd, buf, 255 ); - if( len > 0 ) - { - buf[len] = 0; - if( len > 1 && buf[len-1] == '\n' ) - { - buf[len-1] = 0; - } - } - close( fd ); - } -# ifndef __ANDROID__ - pthread_setcancelstate( cs, 0 ); -# endif - return buf; -# endif +#elif defined __linux__ + int cs, fd; + char path[32]; + snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", id ); + sprintf( buf, "%" PRIu32, id ); +# ifndef __ANDROID__ + pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs ); +# endif + if ( ( fd = open( path, O_RDONLY ) ) > 0) { + int len = read( fd, buf, 255 ); + if( len > 0 ) + { + buf[len] = 0; + if( len > 1 && buf[len-1] == '\n' ) + { + buf[len-1] = 0; + } + } + close( fd ); + } +# ifndef __ANDROID__ + pthread_setcancelstate( cs, 0 ); +# endif + return buf; #endif - sprintf( buf, "%" PRIu32, id ); - return buf; + + sprintf( buf, "%" PRIu32, id ); + return buf; } TRACY_API const char* GetEnvVar( const char* name ) @@ -295,3 +294,13 @@ TRACY_API const char* GetEnvVar( const char* name ) } } + +#ifdef __cplusplus +extern "C" { +#endif + +TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); } + +#ifdef __cplusplus +} +#endif diff --git a/Source/ThirdParty/tracy/common/TracySystem.hpp b/Source/ThirdParty/tracy/common/TracySystem.hpp index edcc5cf31..7a88a00b1 100644 --- a/Source/ThirdParty/tracy/common/TracySystem.hpp +++ b/Source/ThirdParty/tracy/common/TracySystem.hpp @@ -25,13 +25,6 @@ namespace tracy { -enum class PlotFormatType : uint8_t -{ - Number, - Memory, - Percentage -}; - typedef void(*ParameterCallback)( void* data, uint32_t idx, int32_t val ); struct TRACY_API SourceLocationData @@ -47,7 +40,6 @@ class TRACY_API ScopedZone { public: static void Begin( const SourceLocationData* srcloc ); - static void Begin( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const Char* name, size_t nameSz ); static void End(); ScopedZone( const ScopedZone& ) = delete; @@ -68,7 +60,6 @@ public: void Name( const Char* txt, size_t size ); void Color( uint32_t color ); void Value( uint64_t value ); - bool IsActive() const; private: const bool m_active; diff --git a/Source/ThirdParty/tracy/common/TracyVersion.hpp b/Source/ThirdParty/tracy/common/TracyVersion.hpp index 983d1c51f..2355279f7 100644 --- a/Source/ThirdParty/tracy/common/TracyVersion.hpp +++ b/Source/ThirdParty/tracy/common/TracyVersion.hpp @@ -6,7 +6,7 @@ namespace tracy namespace Version { enum { Major = 0 }; -enum { Minor = 9 }; +enum { Minor = 10 }; enum { Patch = 0 }; } } diff --git a/Source/ThirdParty/tracy/libbacktrace/config.h b/Source/ThirdParty/tracy/libbacktrace/config.h index aa3259d11..87e38a95b 100644 --- a/Source/ThirdParty/tracy/libbacktrace/config.h +++ b/Source/ThirdParty/tracy/libbacktrace/config.h @@ -1,4 +1,8 @@ #include +#if defined(__linux__) && !defined(__GLIBC__) && !defined(__WORDSIZE) +// include __WORDSIZE headers for musl +# include +#endif #if __WORDSIZE == 64 # define BACKTRACE_ELF_SIZE 64 #else diff --git a/Source/ThirdParty/tracy/libbacktrace/dwarf.cpp b/Source/ThirdParty/tracy/libbacktrace/dwarf.cpp index 246cb9f36..f3899cbce 100644 --- a/Source/ThirdParty/tracy/libbacktrace/dwarf.cpp +++ b/Source/ThirdParty/tracy/libbacktrace/dwarf.cpp @@ -473,7 +473,7 @@ enum attr_val_encoding /* An address. */ ATTR_VAL_ADDRESS, /* An index into the .debug_addr section, whose value is relative to - * the DW_AT_addr_base attribute of the compilation unit. */ + the DW_AT_addr_base attribute of the compilation unit. */ ATTR_VAL_ADDRESS_INDEX, /* A unsigned integer. */ ATTR_VAL_UINT, @@ -611,8 +611,8 @@ struct function struct function_addrs { /* Range is LOW <= PC < HIGH. */ - uint64_t low; - uint64_t high; + uintptr_t low; + uintptr_t high; /* Function for this address range. */ struct function *function; }; @@ -693,8 +693,8 @@ struct unit struct unit_addrs { /* Range is LOW <= PC < HIGH. */ - uint64_t low; - uint64_t high; + uintptr_t low; + uintptr_t high; /* Compilation unit for this address range. */ struct unit *u; }; @@ -1431,7 +1431,7 @@ resolve_addr_index (const struct dwarf_sections *dwarf_sections, uint64_t addr_base, int addrsize, int is_bigendian, uint64_t addr_index, backtrace_error_callback error_callback, void *data, - uint64_t *address) + uintptr_t *address) { uint64_t offset; struct dwarf_buf addr_buf; @@ -1452,7 +1452,7 @@ resolve_addr_index (const struct dwarf_sections *dwarf_sections, addr_buf.data = data; addr_buf.reported_underflow = 0; - *address = read_address (&addr_buf, addrsize); + *address = (uintptr_t) read_address (&addr_buf, addrsize); return 1; } @@ -1531,7 +1531,7 @@ function_addrs_search (const void *vkey, const void *ventry) static int add_unit_addr (struct backtrace_state *state, void *rdata, - uint64_t lowpc, uint64_t highpc, + uintptr_t lowpc, uintptr_t highpc, backtrace_error_callback error_callback, void *data, void *pvec) { @@ -1867,10 +1867,10 @@ lookup_abbrev (struct abbrevs *abbrevs, uint64_t code, lowpc/highpc is set or ranges is set. */ struct pcrange { - uint64_t lowpc; /* The low PC value. */ + uintptr_t lowpc; /* The low PC value. */ int have_lowpc; /* Whether a low PC value was found. */ int lowpc_is_addr_index; /* Whether lowpc is in .debug_addr. */ - uint64_t highpc; /* The high PC value. */ + uintptr_t highpc; /* The high PC value. */ int have_highpc; /* Whether a high PC value was found. */ int highpc_is_relative; /* Whether highpc is relative to lowpc. */ int highpc_is_addr_index; /* Whether highpc is in .debug_addr. */ @@ -1890,12 +1890,12 @@ update_pcrange (const struct attr* attr, const struct attr_val* val, case DW_AT_low_pc: if (val->encoding == ATTR_VAL_ADDRESS) { - pcrange->lowpc = val->u.uint; + pcrange->lowpc = (uintptr_t) val->u.uint; pcrange->have_lowpc = 1; } else if (val->encoding == ATTR_VAL_ADDRESS_INDEX) { - pcrange->lowpc = val->u.uint; + pcrange->lowpc = (uintptr_t) val->u.uint; pcrange->have_lowpc = 1; pcrange->lowpc_is_addr_index = 1; } @@ -1904,18 +1904,18 @@ update_pcrange (const struct attr* attr, const struct attr_val* val, case DW_AT_high_pc: if (val->encoding == ATTR_VAL_ADDRESS) { - pcrange->highpc = val->u.uint; + pcrange->highpc = (uintptr_t) val->u.uint; pcrange->have_highpc = 1; } else if (val->encoding == ATTR_VAL_UINT) { - pcrange->highpc = val->u.uint; + pcrange->highpc = (uintptr_t) val->u.uint; pcrange->have_highpc = 1; pcrange->highpc_is_relative = 1; } else if (val->encoding == ATTR_VAL_ADDRESS_INDEX) { - pcrange->highpc = val->u.uint; + pcrange->highpc = (uintptr_t) val->u.uint; pcrange->have_highpc = 1; pcrange->highpc_is_addr_index = 1; } @@ -1950,16 +1950,16 @@ add_low_high_range (struct backtrace_state *state, uintptr_t base_address, int is_bigendian, struct unit *u, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, - void *rdata, uint64_t lowpc, - uint64_t highpc, + void *rdata, uintptr_t lowpc, + uintptr_t highpc, backtrace_error_callback error_callback, void *data, void *vec), void *rdata, backtrace_error_callback error_callback, void *data, void *vec) { - uint64_t lowpc; - uint64_t highpc; + uintptr_t lowpc; + uintptr_t highpc; lowpc = pcrange->lowpc; if (pcrange->lowpc_is_addr_index) @@ -1997,10 +1997,10 @@ add_ranges_from_ranges ( struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, uintptr_t base_address, int is_bigendian, - struct unit *u, uint64_t base, + struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, - uint64_t lowpc, uint64_t highpc, + uintptr_t lowpc, uintptr_t highpc, backtrace_error_callback error_callback, void *data, void *vec), void *rdata, @@ -2039,12 +2039,12 @@ add_ranges_from_ranges ( break; if (is_highest_address (low, u->addrsize)) - base = high; + base = (uintptr_t) high; else { if (!add_range (state, rdata, - low + base + base_address, - high + base + base_address, + (uintptr_t) low + base + base_address, + (uintptr_t) high + base + base_address, error_callback, data, vec)) return 0; } @@ -2064,10 +2064,10 @@ add_ranges_from_rnglists ( struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, uintptr_t base_address, int is_bigendian, - struct unit *u, uint64_t base, + struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, - uint64_t lowpc, uint64_t highpc, + uintptr_t lowpc, uintptr_t highpc, backtrace_error_callback error_callback, void *data, void *vec), void *rdata, @@ -2133,8 +2133,8 @@ add_ranges_from_rnglists ( case DW_RLE_startx_endx: { uint64_t index; - uint64_t low; - uint64_t high; + uintptr_t low; + uintptr_t high; index = read_uleb128 (&rnglists_buf); if (!resolve_addr_index (dwarf_sections, u->addr_base, @@ -2156,8 +2156,8 @@ add_ranges_from_rnglists ( case DW_RLE_startx_length: { uint64_t index; - uint64_t low; - uint64_t length; + uintptr_t low; + uintptr_t length; index = read_uleb128 (&rnglists_buf); if (!resolve_addr_index (dwarf_sections, u->addr_base, @@ -2187,16 +2187,16 @@ add_ranges_from_rnglists ( break; case DW_RLE_base_address: - base = read_address (&rnglists_buf, u->addrsize); + base = (uintptr_t) read_address (&rnglists_buf, u->addrsize); break; case DW_RLE_start_end: { - uint64_t low; - uint64_t high; + uintptr_t low; + uintptr_t high; - low = read_address (&rnglists_buf, u->addrsize); - high = read_address (&rnglists_buf, u->addrsize); + low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); + high = (uintptr_t) read_address (&rnglists_buf, u->addrsize); if (!add_range (state, rdata, low + base_address, high + base_address, error_callback, data, vec)) @@ -2206,11 +2206,11 @@ add_ranges_from_rnglists ( case DW_RLE_start_length: { - uint64_t low; - uint64_t length; + uintptr_t low; + uintptr_t length; - low = read_address (&rnglists_buf, u->addrsize); - length = read_uleb128 (&rnglists_buf); + low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); + length = (uintptr_t) read_uleb128 (&rnglists_buf); low += base_address; if (!add_range (state, rdata, low, low + length, error_callback, data, vec)) @@ -2240,9 +2240,9 @@ static int add_ranges (struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, uintptr_t base_address, int is_bigendian, - struct unit *u, uint64_t base, const struct pcrange *pcrange, + struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, - uint64_t lowpc, uint64_t highpc, + uintptr_t lowpc, uintptr_t highpc, backtrace_error_callback error_callback, void *data, void *vec), void *rdata, @@ -3520,7 +3520,7 @@ read_referenced_name (struct dwarf_data *ddata, struct unit *u, static int add_function_range (struct backtrace_state *state, void *rdata, - uint64_t lowpc, uint64_t highpc, + uintptr_t lowpc, uintptr_t highpc, backtrace_error_callback error_callback, void *data, void *pvec) { @@ -3560,7 +3560,7 @@ add_function_range (struct backtrace_state *state, void *rdata, static int read_function_entry (struct backtrace_state *state, struct dwarf_data *ddata, - struct unit *u, uint64_t base, struct dwarf_buf *unit_buf, + struct unit *u, uintptr_t base, struct dwarf_buf *unit_buf, const struct line_header *lhdr, backtrace_error_callback error_callback, void *data, struct function_vector *vec_function, @@ -3624,7 +3624,7 @@ read_function_entry (struct backtrace_state *state, struct dwarf_data *ddata, && abbrev->attrs[i].name == DW_AT_low_pc) { if (val.encoding == ATTR_VAL_ADDRESS) - base = val.u.uint; + base = (uintptr_t) val.u.uint; else if (val.encoding == ATTR_VAL_ADDRESS_INDEX) { if (!resolve_addr_index (&ddata->dwarf_sections, diff --git a/Source/ThirdParty/tracy/libbacktrace/elf.cpp b/Source/ThirdParty/tracy/libbacktrace/elf.cpp index 9e62f090d..c65bc4e76 100644 --- a/Source/ThirdParty/tracy/libbacktrace/elf.cpp +++ b/Source/ThirdParty/tracy/libbacktrace/elf.cpp @@ -193,6 +193,7 @@ dl_iterate_phdr (int (*callback) (struct dl_phdr_info *, #undef STT_FUNC #undef NT_GNU_BUILD_ID #undef ELFCOMPRESS_ZLIB +#undef ELFCOMPRESS_ZSTD /* Basic types. */ @@ -350,6 +351,7 @@ typedef struct #endif /* BACKTRACE_ELF_SIZE != 32 */ #define ELFCOMPRESS_ZLIB 1 +#define ELFCOMPRESS_ZSTD 2 /* Names of sections, indexed by enum dwarf_section in internal.h. */ @@ -1130,7 +1132,7 @@ elf_uncompress_failed(void) on error. */ static int -elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, +elf_fetch_bits (const unsigned char **ppin, const unsigned char *pinend, uint64_t *pval, unsigned int *pbits) { unsigned int bits; @@ -1177,6 +1179,118 @@ elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, return 1; } +/* This is like elf_fetch_bits, but it fetchs the bits backward, and ensures at + least 16 bits. This is for zstd. */ + +static int +elf_fetch_bits_backward (const unsigned char **ppin, + const unsigned char *pinend, + uint64_t *pval, unsigned int *pbits) +{ + unsigned int bits; + const unsigned char *pin; + uint64_t val; + uint32_t next; + + bits = *pbits; + if (bits >= 16) + return 1; + pin = *ppin; + val = *pval; + + if (unlikely (pin <= pinend)) + { + if (bits == 0) + { + elf_uncompress_failed (); + return 0; + } + return 1; + } + + pin -= 4; + +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) \ + && defined(__ORDER_BIG_ENDIAN__) \ + && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ \ + || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + /* We've ensured that PIN is aligned. */ + next = *(const uint32_t *)pin; + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + next = __builtin_bswap32 (next); +#endif +#else + next = pin[0] | (pin[1] << 8) | (pin[2] << 16) | (pin[3] << 24); +#endif + + val <<= 32; + val |= next; + bits += 32; + + if (unlikely (pin < pinend)) + { + val >>= (pinend - pin) * 8; + bits -= (pinend - pin) * 8; + } + + *ppin = pin; + *pval = val; + *pbits = bits; + return 1; +} + +/* Initialize backward fetching when the bitstream starts with a 1 bit in the + last byte in memory (which is the first one that we read). This is used by + zstd decompression. Returns 1 on success, 0 on error. */ + +static int +elf_fetch_backward_init (const unsigned char **ppin, + const unsigned char *pinend, + uint64_t *pval, unsigned int *pbits) +{ + const unsigned char *pin; + unsigned int stream_start; + uint64_t val; + unsigned int bits; + + pin = *ppin; + stream_start = (unsigned int)*pin; + if (unlikely (stream_start == 0)) + { + elf_uncompress_failed (); + return 0; + } + val = 0; + bits = 0; + + /* Align to a 32-bit boundary. */ + while ((((uintptr_t)pin) & 3) != 0) + { + val <<= 8; + val |= (uint64_t)*pin; + bits += 8; + --pin; + } + + val <<= 8; + val |= (uint64_t)*pin; + bits += 8; + + *ppin = pin; + *pval = val; + *pbits = bits; + if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) + return 0; + + *pbits -= __builtin_clz (stream_start) - (sizeof (unsigned int) - 1) * 8 + 1; + + if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) + return 0; + + return 1; +} + /* Huffman code tables, like the rest of the zlib format, are defined by RFC 1951. We store a Huffman code table as a series of tables stored sequentially in memory. Each entry in a table is 16 bits. @@ -1211,14 +1325,14 @@ elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, /* Number of entries we allocate to for one code table. We get a page for the two code tables we need. */ -#define HUFFMAN_TABLE_SIZE (1024) +#define ZLIB_HUFFMAN_TABLE_SIZE (1024) /* Bit masks and shifts for the values in the table. */ -#define HUFFMAN_VALUE_MASK 0x01ff -#define HUFFMAN_BITS_SHIFT 9 -#define HUFFMAN_BITS_MASK 0x7 -#define HUFFMAN_SECONDARY_SHIFT 12 +#define ZLIB_HUFFMAN_VALUE_MASK 0x01ff +#define ZLIB_HUFFMAN_BITS_SHIFT 9 +#define ZLIB_HUFFMAN_BITS_MASK 0x7 +#define ZLIB_HUFFMAN_SECONDARY_SHIFT 12 /* For working memory while inflating we need two code tables, we need an array of code lengths (max value 15, so we use unsigned char), @@ -1226,17 +1340,17 @@ elf_zlib_fetch (const unsigned char **ppin, const unsigned char *pinend, latter two arrays must be large enough to hold the maximum number of code lengths, which RFC 1951 defines as 286 + 30. */ -#define ZDEBUG_TABLE_SIZE \ - (2 * HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ +#define ZLIB_TABLE_SIZE \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ + (286 + 30) * sizeof (uint16_t) \ + (286 + 30) * sizeof (unsigned char)) -#define ZDEBUG_TABLE_CODELEN_OFFSET \ - (2 * HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ +#define ZLIB_TABLE_CODELEN_OFFSET \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ + (286 + 30) * sizeof (uint16_t)) -#define ZDEBUG_TABLE_WORK_OFFSET \ - (2 * HUFFMAN_TABLE_SIZE * sizeof (uint16_t)) +#define ZLIB_TABLE_WORK_OFFSET \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)) #ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE @@ -1269,7 +1383,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, next value after VAL with the same bit length. */ next = (uint16_t *) (((unsigned char *) zdebug_table) - + ZDEBUG_TABLE_WORK_OFFSET); + + ZLIB_TABLE_WORK_OFFSET); memset (&count[0], 0, 16 * sizeof (uint16_t)); for (i = 0; i < codes_len; ++i) @@ -1297,7 +1411,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, /* For each length, fill in the table for the codes of that length. */ - memset (table, 0, HUFFMAN_TABLE_SIZE * sizeof (uint16_t)); + memset (table, 0, ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)); /* Handle the values that do not require a secondary table. */ @@ -1331,13 +1445,13 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, /* In the compressed bit stream, the value VAL is encoded as J bits with the value C. */ - if (unlikely ((val & ~HUFFMAN_VALUE_MASK) != 0)) + if (unlikely ((val & ~ZLIB_HUFFMAN_VALUE_MASK) != 0)) { elf_uncompress_failed (); return 0; } - tval = val | ((j - 1) << HUFFMAN_BITS_SHIFT); + tval = val | ((j - 1) << ZLIB_HUFFMAN_BITS_SHIFT); /* The table lookup uses 8 bits. If J is less than 8, we don't know what the other bits will be. We need to fill @@ -1487,7 +1601,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, { /* Start a new secondary table. */ - if (unlikely ((next_secondary & HUFFMAN_VALUE_MASK) + if (unlikely ((next_secondary & ZLIB_HUFFMAN_VALUE_MASK) != next_secondary)) { elf_uncompress_failed (); @@ -1498,22 +1612,23 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, secondary_bits = j - 8; next_secondary += 1 << secondary_bits; table[primary] = (secondary - + ((j - 8) << HUFFMAN_BITS_SHIFT) - + (1U << HUFFMAN_SECONDARY_SHIFT)); + + ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT) + + (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)); } else { /* There is an existing entry. It had better be a secondary table with enough bits. */ - if (unlikely ((tprimary & (1U << HUFFMAN_SECONDARY_SHIFT)) + if (unlikely ((tprimary + & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0)) { elf_uncompress_failed (); return 0; } - secondary = tprimary & HUFFMAN_VALUE_MASK; - secondary_bits = ((tprimary >> HUFFMAN_BITS_SHIFT) - & HUFFMAN_BITS_MASK); + secondary = tprimary & ZLIB_HUFFMAN_VALUE_MASK; + secondary_bits = ((tprimary >> ZLIB_HUFFMAN_BITS_SHIFT) + & ZLIB_HUFFMAN_BITS_MASK); if (unlikely (secondary_bits < j - 8)) { elf_uncompress_failed (); @@ -1524,7 +1639,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, /* Fill in secondary table entries. */ - tval = val | ((j - 8) << HUFFMAN_BITS_SHIFT); + tval = val | ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT); for (ind = code >> 8; ind < (1U << secondary_bits); @@ -1567,7 +1682,7 @@ elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, #include -static uint16_t table[ZDEBUG_TABLE_SIZE]; +static uint16_t table[ZLIB_TABLE_SIZE]; static unsigned char codes[288]; int @@ -1795,7 +1910,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, const uint16_t *tlit; const uint16_t *tdist; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; last = val & 1; @@ -1883,7 +1998,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* Read a Huffman encoding table. The various magic numbers here are from RFC 1951. */ - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; nlit = (val & 0x1f) + 257; @@ -1908,7 +2023,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* There are always at least 4 elements in the table. */ - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[16] = val & 7; @@ -1928,7 +2043,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, if (nclen == 5) goto codebitsdone; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[7] = val & 7; @@ -1966,7 +2081,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, if (nclen == 10) goto codebitsdone; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[11] = val & 7; @@ -2004,7 +2119,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, if (nclen == 15) goto codebitsdone; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; codebits[2] = val & 7; @@ -2043,7 +2158,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, at the end of zdebug_table to hold them. */ plenbase = (((unsigned char *) zdebug_table) - + ZDEBUG_TABLE_CODELEN_OFFSET); + + ZLIB_TABLE_CODELEN_OFFSET); plen = plenbase; plenend = plen + nlit + ndist; while (plen < plenend) @@ -2052,24 +2167,25 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, unsigned int b; uint16_t v; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; t = zdebug_table[val & 0xff]; /* The compression here uses bit lengths up to 7, so a secondary table is never necessary. */ - if (unlikely ((t & (1U << HUFFMAN_SECONDARY_SHIFT)) != 0)) + if (unlikely ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) + != 0)) { elf_uncompress_failed (); return 0; } - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; val >>= b + 1; bits -= b + 1; - v = t & HUFFMAN_VALUE_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; if (v < 16) *plen++ = v; else if (v == 16) @@ -2086,7 +2202,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, } /* We used up to 7 bits since the last - elf_zlib_fetch, so we have at least 8 bits + elf_fetch_bits, so we have at least 8 bits available here. */ c = 3 + (val & 0x3); @@ -2121,7 +2237,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* Store zero 3 to 10 times. */ /* We used up to 7 bits since the last - elf_zlib_fetch, so we have at least 8 bits + elf_fetch_bits, so we have at least 8 bits available here. */ c = 3 + (val & 0x7); @@ -2167,7 +2283,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, /* Store zero 11 to 138 times. */ /* We used up to 7 bits since the last - elf_zlib_fetch, so we have at least 8 bits + elf_fetch_bits, so we have at least 8 bits available here. */ c = 11 + (val & 0x7f); @@ -2204,10 +2320,11 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, zdebug_table)) return 0; if (!elf_zlib_inflate_table (plen + nlit, ndist, zdebug_table, - zdebug_table + HUFFMAN_TABLE_SIZE)) + (zdebug_table + + ZLIB_HUFFMAN_TABLE_SIZE))) return 0; tlit = zdebug_table; - tdist = zdebug_table + HUFFMAN_TABLE_SIZE; + tdist = zdebug_table + ZLIB_HUFFMAN_TABLE_SIZE; } /* Inflate values until the end of the block. This is the @@ -2220,14 +2337,14 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, uint16_t v; unsigned int lit; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; t = tlit[val & 0xff]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - v = t & HUFFMAN_VALUE_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; - if ((t & (1U << HUFFMAN_SECONDARY_SHIFT)) == 0) + if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) { lit = v; val >>= b + 1; @@ -2236,8 +2353,8 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, else { t = tlit[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - lit = t & HUFFMAN_VALUE_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + lit = t & ZLIB_HUFFMAN_VALUE_MASK; val >>= b + 8; bits -= b + 8; } @@ -2282,7 +2399,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, { unsigned int extra; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; /* This is an expression for the table of length @@ -2297,14 +2414,14 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, bits -= extra; } - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; t = tdist[val & 0xff]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - v = t & HUFFMAN_VALUE_MASK; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; - if ((t & (1U << HUFFMAN_SECONDARY_SHIFT)) == 0) + if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) { dist = v; val >>= b + 1; @@ -2313,8 +2430,9 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, else { t = tdist[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; - b = (t >> HUFFMAN_BITS_SHIFT) & HUFFMAN_BITS_MASK; - dist = t & HUFFMAN_VALUE_MASK; + b = ((t >> ZLIB_HUFFMAN_BITS_SHIFT) + & ZLIB_HUFFMAN_BITS_MASK); + dist = t & ZLIB_HUFFMAN_VALUE_MASK; val >>= b + 8; bits -= b + 8; } @@ -2354,7 +2472,7 @@ elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, { unsigned int extra; - if (!elf_zlib_fetch (&pin, pinend, &val, &bits)) + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) return 0; /* This is an expression for the table of @@ -2559,6 +2677,2354 @@ elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, return 1; } +/* For working memory during zstd compression, we need + - a literal length FSE table: 512 64-bit values == 4096 bytes + - a match length FSE table: 512 64-bit values == 4096 bytes + - a offset FSE table: 256 64-bit values == 2048 bytes + - a Huffman tree: 2048 uint16_t values == 4096 bytes + - scratch space, one of + - to build an FSE table: 512 uint16_t values == 1024 bytes + - to build a Huffman tree: 512 uint16_t + 256 uint32_t == 2048 bytes +*/ + +#define ZSTD_TABLE_SIZE \ + (2 * 512 * sizeof (struct elf_zstd_fse_baseline_entry) \ + + 256 * sizeof (struct elf_zstd_fse_baseline_entry) \ + + 2048 * sizeof (uint16_t) \ + + 512 * sizeof (uint16_t) + 256 * sizeof (uint32_t)) + +#define ZSTD_TABLE_LITERAL_FSE_OFFSET (0) + +#define ZSTD_TABLE_MATCH_FSE_OFFSET \ + (512 * sizeof (struct elf_zstd_fse_baseline_entry)) + +#define ZSTD_TABLE_OFFSET_FSE_OFFSET \ + (ZSTD_TABLE_MATCH_FSE_OFFSET \ + + 512 * sizeof (struct elf_zstd_fse_baseline_entry)) + +#define ZSTD_TABLE_HUFFMAN_OFFSET \ + (ZSTD_TABLE_OFFSET_FSE_OFFSET \ + + 256 * sizeof (struct elf_zstd_fse_baseline_entry)) + +#define ZSTD_TABLE_WORK_OFFSET \ + (ZSTD_TABLE_HUFFMAN_OFFSET + 2048 * sizeof (uint16_t)) + +/* An entry in a zstd FSE table. */ + +struct elf_zstd_fse_entry +{ + /* The value that this FSE entry represents. */ + unsigned char symbol; + /* The number of bits to read to determine the next state. */ + unsigned char bits; + /* Add the bits to this base to get the next state. */ + uint16_t base; +}; + +static int +elf_zstd_build_fse (const int16_t *, int, uint16_t *, int, + struct elf_zstd_fse_entry *); + +/* Read a zstd FSE table and build the decoding table in *TABLE, updating *PPIN + as it reads. ZDEBUG_TABLE is scratch space; it must be enough for 512 + uint16_t values (1024 bytes). MAXIDX is the maximum number of symbols + permitted. *TABLE_BITS is the maximum number of bits for symbols in the + table: the size of *TABLE is at least 1 << *TABLE_BITS. This updates + *TABLE_BITS to the actual number of bits. Returns 1 on success, 0 on + error. */ + +static int +elf_zstd_read_fse (const unsigned char **ppin, const unsigned char *pinend, + uint16_t *zdebug_table, int maxidx, + struct elf_zstd_fse_entry *table, int *table_bits) +{ + const unsigned char *pin; + int16_t *norm; + uint16_t *next; + uint64_t val; + unsigned int bits; + int accuracy_log; + uint32_t remaining; + uint32_t threshold; + int bits_needed; + int idx; + int prev0; + + pin = *ppin; + + norm = (int16_t *) zdebug_table; + next = zdebug_table + 256; + + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* Align PIN to a 32-bit boundary. */ + + val = 0; + bits = 0; + while ((((uintptr_t) pin) & 3) != 0) + { + val |= (uint64_t)*pin << bits; + bits += 8; + ++pin; + } + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + accuracy_log = (val & 0xf) + 5; + if (accuracy_log > *table_bits) + { + elf_uncompress_failed (); + return 0; + } + *table_bits = accuracy_log; + val >>= 4; + bits -= 4; + + /* This code is mostly copied from the reference implementation. */ + + /* The number of remaining probabilities, plus 1. This sets the number of + bits that need to be read for the next value. */ + remaining = (1 << accuracy_log) + 1; + + /* The current difference between small and large values, which depends on + the number of remaining values. Small values use one less bit. */ + threshold = 1 << accuracy_log; + + /* The number of bits used to compute threshold. */ + bits_needed = accuracy_log + 1; + + /* The next character value. */ + idx = 0; + + /* Whether the last count was 0. */ + prev0 = 0; + + while (remaining > 1 && idx <= maxidx) + { + uint32_t max; + int32_t count; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + if (prev0) + { + int zidx; + + /* Previous count was 0, so there is a 2-bit repeat flag. If the + 2-bit flag is 0b11, it adds 3 and then there is another repeat + flag. */ + zidx = idx; + while ((val & 0xfff) == 0xfff) + { + zidx += 3 * 6; + val >>= 12; + bits -= 12; + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + } + while ((val & 3) == 3) + { + zidx += 3; + val >>= 2; + bits -= 2; + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + } + /* We have at least 13 bits here, don't need to fetch. */ + zidx += val & 3; + val >>= 2; + bits -= 2; + + if (unlikely (zidx > maxidx)) + { + elf_uncompress_failed (); + return 0; + } + + for (; idx < zidx; idx++) + norm[idx] = 0; + + prev0 = 0; + continue; + } + + max = (2 * threshold - 1) - remaining; + if ((val & (threshold - 1)) < max) + { + /* A small value. */ + count = (int32_t) ((uint32_t) val & (threshold - 1)); + val >>= bits_needed - 1; + bits -= bits_needed - 1; + } + else + { + /* A large value. */ + count = (int32_t) ((uint32_t) val & (2 * threshold - 1)); + if (count >= (int32_t) threshold) + count -= (int32_t) max; + val >>= bits_needed; + bits -= bits_needed; + } + + count--; + if (count >= 0) + remaining -= count; + else + remaining--; + if (unlikely (idx >= 256)) + { + elf_uncompress_failed (); + return 0; + } + norm[idx] = (int16_t) count; + ++idx; + + prev0 = count == 0; + + while (remaining < threshold) + { + bits_needed--; + threshold >>= 1; + } + } + + if (unlikely (remaining != 1)) + { + elf_uncompress_failed (); + return 0; + } + + /* If we've read ahead more than a byte, back up. */ + while (bits >= 8) + { + --pin; + bits -= 8; + } + + *ppin = pin; + + for (; idx <= maxidx; idx++) + norm[idx] = 0; + + return elf_zstd_build_fse (norm, idx, next, *table_bits, table); +} + +/* Build the FSE decoding table from a list of probabilities. This reads from + NORM of length IDX, uses NEXT as scratch space, and writes to *TABLE, whose + size is TABLE_BITS. */ + +static int +elf_zstd_build_fse (const int16_t *norm, int idx, uint16_t *next, + int table_bits, struct elf_zstd_fse_entry *table) +{ + int table_size; + int high_threshold; + int i; + int pos; + int step; + int mask; + + table_size = 1 << table_bits; + high_threshold = table_size - 1; + for (i = 0; i < idx; i++) + { + int16_t n; + + n = norm[i]; + if (n >= 0) + next[i] = (uint16_t) n; + else + { + table[high_threshold].symbol = (unsigned char) i; + high_threshold--; + next[i] = 1; + } + } + + pos = 0; + step = (table_size >> 1) + (table_size >> 3) + 3; + mask = table_size - 1; + for (i = 0; i < idx; i++) + { + int n; + int j; + + n = (int) norm[i]; + for (j = 0; j < n; j++) + { + table[pos].symbol = (unsigned char) i; + pos = (pos + step) & mask; + while (unlikely (pos > high_threshold)) + pos = (pos + step) & mask; + } + } + if (unlikely (pos != 0)) + { + elf_uncompress_failed (); + return 0; + } + + for (i = 0; i < table_size; i++) + { + unsigned char sym; + uint16_t next_state; + int high_bit; + int bits; + + sym = table[i].symbol; + next_state = next[sym]; + ++next[sym]; + + if (next_state == 0) + { + elf_uncompress_failed (); + return 0; + } + high_bit = 31 - __builtin_clz (next_state); + + bits = table_bits - high_bit; + table[i].bits = (unsigned char) bits; + table[i].base = (uint16_t) ((next_state << bits) - table_size); + } + + return 1; +} + +/* Encode the baseline and bits into a single 32-bit value. */ + +#define ZSTD_ENCODE_BASELINE_BITS(baseline, basebits) \ + ((uint32_t)(baseline) | ((uint32_t)(basebits) << 24)) + +#define ZSTD_DECODE_BASELINE(baseline_basebits) \ + ((uint32_t)(baseline_basebits) & 0xffffff) + +#define ZSTD_DECODE_BASEBITS(baseline_basebits) \ + ((uint32_t)(baseline_basebits) >> 24) + +/* Given a literal length code, we need to read a number of bits and add that + to a baseline. For states 0 to 15 the baseline is the state and the number + of bits is zero. */ + +#define ZSTD_LITERAL_LENGTH_BASELINE_OFFSET (16) + +static const uint32_t elf_zstd_literal_length_base[] = +{ + ZSTD_ENCODE_BASELINE_BITS(16, 1), + ZSTD_ENCODE_BASELINE_BITS(18, 1), + ZSTD_ENCODE_BASELINE_BITS(20, 1), + ZSTD_ENCODE_BASELINE_BITS(22, 1), + ZSTD_ENCODE_BASELINE_BITS(24, 2), + ZSTD_ENCODE_BASELINE_BITS(28, 2), + ZSTD_ENCODE_BASELINE_BITS(32, 3), + ZSTD_ENCODE_BASELINE_BITS(40, 3), + ZSTD_ENCODE_BASELINE_BITS(48, 4), + ZSTD_ENCODE_BASELINE_BITS(64, 6), + ZSTD_ENCODE_BASELINE_BITS(128, 7), + ZSTD_ENCODE_BASELINE_BITS(256, 8), + ZSTD_ENCODE_BASELINE_BITS(512, 9), + ZSTD_ENCODE_BASELINE_BITS(1024, 10), + ZSTD_ENCODE_BASELINE_BITS(2048, 11), + ZSTD_ENCODE_BASELINE_BITS(4096, 12), + ZSTD_ENCODE_BASELINE_BITS(8192, 13), + ZSTD_ENCODE_BASELINE_BITS(16384, 14), + ZSTD_ENCODE_BASELINE_BITS(32768, 15), + ZSTD_ENCODE_BASELINE_BITS(65536, 16) +}; + +/* The same applies to match length codes. For states 0 to 31 the baseline is + the state + 3 and the number of bits is zero. */ + +#define ZSTD_MATCH_LENGTH_BASELINE_OFFSET (32) + +static const uint32_t elf_zstd_match_length_base[] = +{ + ZSTD_ENCODE_BASELINE_BITS(35, 1), + ZSTD_ENCODE_BASELINE_BITS(37, 1), + ZSTD_ENCODE_BASELINE_BITS(39, 1), + ZSTD_ENCODE_BASELINE_BITS(41, 1), + ZSTD_ENCODE_BASELINE_BITS(43, 2), + ZSTD_ENCODE_BASELINE_BITS(47, 2), + ZSTD_ENCODE_BASELINE_BITS(51, 3), + ZSTD_ENCODE_BASELINE_BITS(59, 3), + ZSTD_ENCODE_BASELINE_BITS(67, 4), + ZSTD_ENCODE_BASELINE_BITS(83, 4), + ZSTD_ENCODE_BASELINE_BITS(99, 5), + ZSTD_ENCODE_BASELINE_BITS(131, 7), + ZSTD_ENCODE_BASELINE_BITS(259, 8), + ZSTD_ENCODE_BASELINE_BITS(515, 9), + ZSTD_ENCODE_BASELINE_BITS(1027, 10), + ZSTD_ENCODE_BASELINE_BITS(2051, 11), + ZSTD_ENCODE_BASELINE_BITS(4099, 12), + ZSTD_ENCODE_BASELINE_BITS(8195, 13), + ZSTD_ENCODE_BASELINE_BITS(16387, 14), + ZSTD_ENCODE_BASELINE_BITS(32771, 15), + ZSTD_ENCODE_BASELINE_BITS(65539, 16) +}; + +/* An entry in an FSE table used for literal/match/length values. For these we + have to map the symbol to a baseline value, and we have to read zero or more + bits and add that value to the baseline value. Rather than look the values + up in a separate table, we grow the FSE table so that we get better memory + caching. */ + +struct elf_zstd_fse_baseline_entry +{ + /* The baseline for the value that this FSE entry represents.. */ + uint32_t baseline; + /* The number of bits to read to add to the baseline. */ + unsigned char basebits; + /* The number of bits to read to determine the next state. */ + unsigned char bits; + /* Add the bits to this base to get the next state. */ + uint16_t base; +}; + +/* Convert the literal length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_literal_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (symbol < ZSTD_LITERAL_LENGTH_BASELINE_OFFSET) + { + pbaseline->baseline = (uint32_t)symbol; + pbaseline->basebits = 0; + } + else + { + unsigned int idx; + uint32_t basebits; + + if (unlikely (symbol > 35)) + { + elf_uncompress_failed (); + return 0; + } + idx = symbol - ZSTD_LITERAL_LENGTH_BASELINE_OFFSET; + basebits = elf_zstd_literal_length_base[idx]; + pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); + pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); + } + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +/* Convert the offset length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_offset_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (unlikely (symbol > 31)) + { + elf_uncompress_failed (); + return 0; + } + + /* The simple way to write this is + + pbaseline->baseline = (uint32_t)1 << symbol; + pbaseline->basebits = symbol; + + That will give us an offset value that corresponds to the one + described in the RFC. However, for offset values > 3, we have to + subtract 3. And for offset values 1, 2, 3 we use a repeated offset. + The baseline is always a power of 2, and is never 0, so for these low + values we will see one entry that is baseline 1, basebits 0, and one + entry that is baseline 2, basebits 1. All other entries will have + baseline >= 4 and basebits >= 2. + + So we can check for RFC offset <= 3 by checking for basebits <= 1. + And that means that we can subtract 3 here and not worry about doing + it in the hot loop. */ + + pbaseline->baseline = (uint32_t)1 << symbol; + if (symbol >= 2) + pbaseline->baseline -= 3; + pbaseline->basebits = symbol; + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +/* Convert the match length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_match_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (symbol < ZSTD_MATCH_LENGTH_BASELINE_OFFSET) + { + pbaseline->baseline = (uint32_t)symbol + 3; + pbaseline->basebits = 0; + } + else + { + unsigned int idx; + uint32_t basebits; + + if (unlikely (symbol > 52)) + { + elf_uncompress_failed (); + return 0; + } + idx = symbol - ZSTD_MATCH_LENGTH_BASELINE_OFFSET; + basebits = elf_zstd_match_length_base[idx]; + pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); + pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); + } + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +#ifdef BACKTRACE_GENERATE_ZSTD_FSE_TABLES + +/* Used to generate the predefined FSE decoding tables for zstd. */ + +#include + +/* These values are straight from RFC 8878. */ + +static int16_t lit[36] = +{ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 +}; + +static int16_t match[53] = +{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 +}; + +static int16_t offset[29] = +{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 +}; + +static uint16_t next[256]; + +static void +print_table (const struct elf_zstd_fse_baseline_entry *table, size_t size) +{ + size_t i; + + printf ("{\n"); + for (i = 0; i < size; i += 3) + { + int j; + + printf (" "); + for (j = 0; j < 3 && i + j < size; ++j) + printf (" { %u, %d, %d, %d },", table[i + j].baseline, + table[i + j].basebits, table[i + j].bits, + table[i + j].base); + printf ("\n"); + } + printf ("};\n"); +} + +int +main () +{ + struct elf_zstd_fse_entry lit_table[64]; + struct elf_zstd_fse_baseline_entry lit_baseline[64]; + struct elf_zstd_fse_entry match_table[64]; + struct elf_zstd_fse_baseline_entry match_baseline[64]; + struct elf_zstd_fse_entry offset_table[32]; + struct elf_zstd_fse_baseline_entry offset_baseline[32]; + + if (!elf_zstd_build_fse (lit, sizeof lit / sizeof lit[0], next, + 6, lit_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + if (!elf_zstd_make_literal_baseline_fse (lit_table, 6, lit_baseline)) + { + fprintf (stderr, "elf_zstd_make_literal_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " + "elf_zstd_lit_table[64] =\n"); + print_table (lit_baseline, + sizeof lit_baseline / sizeof lit_baseline[0]); + printf ("\n"); + + if (!elf_zstd_build_fse (match, sizeof match / sizeof match[0], next, + 6, match_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + if (!elf_zstd_make_match_baseline_fse (match_table, 6, match_baseline)) + { + fprintf (stderr, "elf_zstd_make_match_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " + "elf_zstd_match_table[64] =\n"); + print_table (match_baseline, + sizeof match_baseline / sizeof match_baseline[0]); + printf ("\n"); + + if (!elf_zstd_build_fse (offset, sizeof offset / sizeof offset[0], next, + 5, offset_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + if (!elf_zstd_make_offset_baseline_fse (offset_table, 5, offset_baseline)) + { + fprintf (stderr, "elf_zstd_make_offset_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " + "elf_zstd_offset_table[32] =\n"); + print_table (offset_baseline, + sizeof offset_baseline / sizeof offset_baseline[0]); + printf ("\n"); + + return 0; +} + +#endif + +/* The fixed tables generated by the #ifdef'ed out main function + above. */ + +static const struct elf_zstd_fse_baseline_entry elf_zstd_lit_table[64] = +{ + { 0, 0, 4, 0 }, { 0, 0, 4, 16 }, { 1, 0, 5, 32 }, + { 3, 0, 5, 0 }, { 4, 0, 5, 0 }, { 6, 0, 5, 0 }, + { 7, 0, 5, 0 }, { 9, 0, 5, 0 }, { 10, 0, 5, 0 }, + { 12, 0, 5, 0 }, { 14, 0, 6, 0 }, { 16, 1, 5, 0 }, + { 20, 1, 5, 0 }, { 22, 1, 5, 0 }, { 28, 2, 5, 0 }, + { 32, 3, 5, 0 }, { 48, 4, 5, 0 }, { 64, 6, 5, 32 }, + { 128, 7, 5, 0 }, { 256, 8, 6, 0 }, { 1024, 10, 6, 0 }, + { 4096, 12, 6, 0 }, { 0, 0, 4, 32 }, { 1, 0, 4, 0 }, + { 2, 0, 5, 0 }, { 4, 0, 5, 32 }, { 5, 0, 5, 0 }, + { 7, 0, 5, 32 }, { 8, 0, 5, 0 }, { 10, 0, 5, 32 }, + { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 1, 5, 32 }, + { 18, 1, 5, 0 }, { 22, 1, 5, 32 }, { 24, 2, 5, 0 }, + { 32, 3, 5, 32 }, { 40, 3, 5, 0 }, { 64, 6, 4, 0 }, + { 64, 6, 4, 16 }, { 128, 7, 5, 32 }, { 512, 9, 6, 0 }, + { 2048, 11, 6, 0 }, { 0, 0, 4, 48 }, { 1, 0, 4, 16 }, + { 2, 0, 5, 32 }, { 3, 0, 5, 32 }, { 5, 0, 5, 32 }, + { 6, 0, 5, 32 }, { 8, 0, 5, 32 }, { 9, 0, 5, 32 }, + { 11, 0, 5, 32 }, { 12, 0, 5, 32 }, { 15, 0, 6, 0 }, + { 18, 1, 5, 32 }, { 20, 1, 5, 32 }, { 24, 2, 5, 32 }, + { 28, 2, 5, 32 }, { 40, 3, 5, 32 }, { 48, 4, 5, 32 }, + { 65536, 16, 6, 0 }, { 32768, 15, 6, 0 }, { 16384, 14, 6, 0 }, + { 8192, 13, 6, 0 }, +}; + +static const struct elf_zstd_fse_baseline_entry elf_zstd_match_table[64] = +{ + { 3, 0, 6, 0 }, { 4, 0, 4, 0 }, { 5, 0, 5, 32 }, + { 6, 0, 5, 0 }, { 8, 0, 5, 0 }, { 9, 0, 5, 0 }, + { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 0, 6, 0 }, + { 19, 0, 6, 0 }, { 22, 0, 6, 0 }, { 25, 0, 6, 0 }, + { 28, 0, 6, 0 }, { 31, 0, 6, 0 }, { 34, 0, 6, 0 }, + { 37, 1, 6, 0 }, { 41, 1, 6, 0 }, { 47, 2, 6, 0 }, + { 59, 3, 6, 0 }, { 83, 4, 6, 0 }, { 131, 7, 6, 0 }, + { 515, 9, 6, 0 }, { 4, 0, 4, 16 }, { 5, 0, 4, 0 }, + { 6, 0, 5, 32 }, { 7, 0, 5, 0 }, { 9, 0, 5, 32 }, + { 10, 0, 5, 0 }, { 12, 0, 6, 0 }, { 15, 0, 6, 0 }, + { 18, 0, 6, 0 }, { 21, 0, 6, 0 }, { 24, 0, 6, 0 }, + { 27, 0, 6, 0 }, { 30, 0, 6, 0 }, { 33, 0, 6, 0 }, + { 35, 1, 6, 0 }, { 39, 1, 6, 0 }, { 43, 2, 6, 0 }, + { 51, 3, 6, 0 }, { 67, 4, 6, 0 }, { 99, 5, 6, 0 }, + { 259, 8, 6, 0 }, { 4, 0, 4, 32 }, { 4, 0, 4, 48 }, + { 5, 0, 4, 16 }, { 7, 0, 5, 32 }, { 8, 0, 5, 32 }, + { 10, 0, 5, 32 }, { 11, 0, 5, 32 }, { 14, 0, 6, 0 }, + { 17, 0, 6, 0 }, { 20, 0, 6, 0 }, { 23, 0, 6, 0 }, + { 26, 0, 6, 0 }, { 29, 0, 6, 0 }, { 32, 0, 6, 0 }, + { 65539, 16, 6, 0 }, { 32771, 15, 6, 0 }, { 16387, 14, 6, 0 }, + { 8195, 13, 6, 0 }, { 4099, 12, 6, 0 }, { 2051, 11, 6, 0 }, + { 1027, 10, 6, 0 }, +}; + +static const struct elf_zstd_fse_baseline_entry elf_zstd_offset_table[32] = +{ + { 1, 0, 5, 0 }, { 61, 6, 4, 0 }, { 509, 9, 5, 0 }, + { 32765, 15, 5, 0 }, { 2097149, 21, 5, 0 }, { 5, 3, 5, 0 }, + { 125, 7, 4, 0 }, { 4093, 12, 5, 0 }, { 262141, 18, 5, 0 }, + { 8388605, 23, 5, 0 }, { 29, 5, 5, 0 }, { 253, 8, 4, 0 }, + { 16381, 14, 5, 0 }, { 1048573, 20, 5, 0 }, { 1, 2, 5, 0 }, + { 125, 7, 4, 16 }, { 2045, 11, 5, 0 }, { 131069, 17, 5, 0 }, + { 4194301, 22, 5, 0 }, { 13, 4, 5, 0 }, { 253, 8, 4, 16 }, + { 8189, 13, 5, 0 }, { 524285, 19, 5, 0 }, { 2, 1, 5, 0 }, + { 61, 6, 4, 16 }, { 1021, 10, 5, 0 }, { 65533, 16, 5, 0 }, + { 268435453, 28, 5, 0 }, { 134217725, 27, 5, 0 }, { 67108861, 26, 5, 0 }, + { 33554429, 25, 5, 0 }, { 16777213, 24, 5, 0 }, +}; + +/* Read a zstd Huffman table and build the decoding table in *TABLE, reading + and updating *PPIN. This sets *PTABLE_BITS to the number of bits of the + table, such that the table length is 1 << *TABLE_BITS. ZDEBUG_TABLE is + scratch space; it must be enough for 512 uint16_t values + 256 32-bit values + (2048 bytes). Returns 1 on success, 0 on error. */ + +static int +elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, + uint16_t *zdebug_table, uint16_t *table, int *ptable_bits) +{ + const unsigned char *pin; + unsigned char hdr; + unsigned char *weights; + size_t count; + uint32_t *weight_mark; + size_t i; + uint32_t weight_mask; + size_t table_bits; + + pin = *ppin; + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + hdr = *pin; + ++pin; + + weights = (unsigned char *) zdebug_table; + + if (hdr < 128) + { + /* Table is compressed using FSE. */ + + struct elf_zstd_fse_entry *fse_table; + int fse_table_bits; + uint16_t *scratch; + const unsigned char *pfse; + const unsigned char *pback; + uint64_t val; + unsigned int bits; + unsigned int state1, state2; + + /* SCRATCH is used temporarily by elf_zstd_read_fse. It overlaps + WEIGHTS. */ + scratch = zdebug_table; + fse_table = (struct elf_zstd_fse_entry *) (scratch + 512); + fse_table_bits = 6; + + pfse = pin; + if (!elf_zstd_read_fse (&pfse, pinend, scratch, 255, fse_table, + &fse_table_bits)) + return 0; + + if (unlikely (pin + hdr > pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* We no longer need SCRATCH. Start recording weights. We need up to + 256 bytes of weights and 64 bytes of rank counts, so it won't overlap + FSE_TABLE. */ + + pback = pin + hdr - 1; + + if (!elf_fetch_backward_init (&pback, pfse, &val, &bits)) + return 0; + + bits -= fse_table_bits; + state1 = (val >> bits) & ((1U << fse_table_bits) - 1); + bits -= fse_table_bits; + state2 = (val >> bits) & ((1U << fse_table_bits) - 1); + + /* There are two independent FSE streams, tracked by STATE1 and STATE2. + We decode them alternately. */ + + count = 0; + while (1) + { + struct elf_zstd_fse_entry *pt; + uint64_t v; + + pt = &fse_table[state1]; + + if (unlikely (pin < pinend) && bits < pt->bits) + { + if (unlikely (count >= 254)) + { + elf_uncompress_failed (); + return 0; + } + weights[count] = (unsigned char) pt->symbol; + weights[count + 1] = (unsigned char) fse_table[state2].symbol; + count += 2; + break; + } + + if (unlikely (pt->bits == 0)) + v = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + } + + state1 = pt->base + v; + + if (unlikely (count >= 255)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = pt->symbol; + ++count; + + pt = &fse_table[state2]; + + if (unlikely (pin < pinend && bits < pt->bits)) + { + if (unlikely (count >= 254)) + { + elf_uncompress_failed (); + return 0; + } + weights[count] = (unsigned char) pt->symbol; + weights[count + 1] = (unsigned char) fse_table[state1].symbol; + count += 2; + break; + } + + if (unlikely (pt->bits == 0)) + v = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + } + + state2 = pt->base + v; + + if (unlikely (count >= 255)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = pt->symbol; + ++count; + } + + pin += hdr; + } + else + { + /* Table is not compressed. Each weight is 4 bits. */ + + count = hdr - 127; + if (unlikely (pin + ((count + 1) / 2) >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + for (i = 0; i < count; i += 2) + { + unsigned char b; + + b = *pin; + ++pin; + weights[i] = b >> 4; + weights[i + 1] = b & 0xf; + } + } + + weight_mark = (uint32_t *) (weights + 256); + memset (weight_mark, 0, 13 * sizeof (uint32_t)); + weight_mask = 0; + for (i = 0; i < count; ++i) + { + unsigned char w; + + w = weights[i]; + if (unlikely (w > 12)) + { + elf_uncompress_failed (); + return 0; + } + ++weight_mark[w]; + if (w > 0) + weight_mask += 1U << (w - 1); + } + if (unlikely (weight_mask == 0)) + { + elf_uncompress_failed (); + return 0; + } + + table_bits = 32 - __builtin_clz (weight_mask); + if (unlikely (table_bits > 11)) + { + elf_uncompress_failed (); + return 0; + } + + /* Work out the last weight value, which is omitted because the weights must + sum to a power of two. */ + { + uint32_t left; + uint32_t high_bit; + + left = ((uint32_t)1 << table_bits) - weight_mask; + if (left == 0) + { + elf_uncompress_failed (); + return 0; + } + high_bit = 31 - __builtin_clz (left); + if (((uint32_t)1 << high_bit) != left) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely (count >= 256)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = high_bit + 1; + ++count; + ++weight_mark[high_bit + 1]; + } + + if (weight_mark[1] < 2 || (weight_mark[1] & 1) != 0) + { + elf_uncompress_failed (); + return 0; + } + + /* Change WEIGHT_MARK from a count of weights to the index of the first + symbol for that weight. We shift the indexes to also store how many we + have seen so far, below. */ + { + uint32_t next; + + next = 0; + for (i = 0; i < table_bits; ++i) + { + uint32_t cur; + + cur = next; + next += weight_mark[i + 1] << i; + weight_mark[i + 1] = cur; + } + } + + for (i = 0; i < count; ++i) + { + unsigned char weight; + uint32_t length; + uint16_t tval; + size_t start; + uint32_t j; + + weight = weights[i]; + if (weight == 0) + continue; + + length = 1U << (weight - 1); + tval = (i << 8) | (table_bits + 1 - weight); + start = weight_mark[weight]; + for (j = 0; j < length; ++j) + table[start + j] = tval; + weight_mark[weight] += length; + } + + *ppin = pin; + *ptable_bits = (int)table_bits; + + return 1; +} + +/* Read and decompress the literals and store them ending at POUTEND. This + works because we are going to use all the literals in the output, so they + must fit into the output buffer. HUFFMAN_TABLE, and PHUFFMAN_TABLE_BITS + store the Huffman table across calls. SCRATCH is used to read a Huffman + table. Store the start of the decompressed literals in *PPLIT. Update + *PPIN. Return 1 on success, 0 on error. */ + +static int +elf_zstd_read_literals (const unsigned char **ppin, + const unsigned char *pinend, + unsigned char *pout, + unsigned char *poutend, + uint16_t *scratch, + uint16_t *huffman_table, + int *phuffman_table_bits, + unsigned char **pplit) +{ + const unsigned char *pin; + unsigned char *plit; + unsigned char hdr; + uint32_t regenerated_size; + uint32_t compressed_size; + int streams; + uint32_t total_streams_size; + unsigned int huffman_table_bits; + uint64_t huffman_mask; + + pin = *ppin; + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + hdr = *pin; + ++pin; + + if ((hdr & 3) == 0 || (hdr & 3) == 1) + { + int raw; + + /* Raw_Literals_Block or RLE_Literals_Block */ + + raw = (hdr & 3) == 0; + + switch ((hdr >> 2) & 3) + { + case 0: case 2: + regenerated_size = hdr >> 3; + break; + case 1: + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (hdr >> 4) + ((uint32_t)(*pin) << 4); + ++pin; + break; + case 3: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = ((hdr >> 4) + + ((uint32_t)*pin << 4) + + ((uint32_t)pin[1] << 12)); + pin += 2; + break; + default: + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((size_t)(poutend - pout) < regenerated_size)) + { + elf_uncompress_failed (); + return 0; + } + + plit = poutend - regenerated_size; + + if (raw) + { + if (unlikely (pin + regenerated_size >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + memcpy (plit, pin, regenerated_size); + pin += regenerated_size; + } + else + { + if (pin >= pinend) + { + elf_uncompress_failed (); + return 0; + } + memset (plit, *pin, regenerated_size); + ++pin; + } + + *ppin = pin; + *pplit = plit; + + return 1; + } + + /* Compressed_Literals_Block or Treeless_Literals_Block */ + + switch ((hdr >> 2) & 3) + { + case 0: case 1: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (hdr >> 4) | ((uint32_t)(*pin & 0x3f) << 4); + compressed_size = (uint32_t)*pin >> 6 | ((uint32_t)pin[1] << 2); + pin += 2; + streams = ((hdr >> 2) & 3) == 0 ? 1 : 4; + break; + case 2: + if (unlikely (pin + 2 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (((uint32_t)hdr >> 4) + | ((uint32_t)*pin << 4) + | (((uint32_t)pin[1] & 3) << 12)); + compressed_size = (((uint32_t)pin[1] >> 2) + | ((uint32_t)pin[2] << 6)); + pin += 3; + streams = 4; + break; + case 3: + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (((uint32_t)hdr >> 4) + | ((uint32_t)*pin << 4) + | (((uint32_t)pin[1] & 0x3f) << 12)); + compressed_size = (((uint32_t)pin[1] >> 6) + | ((uint32_t)pin[2] << 2) + | ((uint32_t)pin[3] << 10)); + pin += 4; + streams = 4; + break; + default: + elf_uncompress_failed (); + return 0; + } + + if (unlikely (pin + compressed_size > pinend)) + { + elf_uncompress_failed (); + return 0; + } + + pinend = pin + compressed_size; + *ppin = pinend; + + if (unlikely ((size_t)(poutend - pout) < regenerated_size)) + { + elf_uncompress_failed (); + return 0; + } + + plit = poutend - regenerated_size; + + *pplit = plit; + + total_streams_size = compressed_size; + if ((hdr & 3) == 2) + { + const unsigned char *ptable; + + /* Compressed_Literals_Block. Read Huffman tree. */ + + ptable = pin; + if (!elf_zstd_read_huff (&ptable, pinend, scratch, huffman_table, + phuffman_table_bits)) + return 0; + + if (unlikely (total_streams_size < (size_t)(ptable - pin))) + { + elf_uncompress_failed (); + return 0; + } + + total_streams_size -= ptable - pin; + pin = ptable; + } + else + { + /* Treeless_Literals_Block. Reuse previous Huffman tree. */ + if (unlikely (*phuffman_table_bits == 0)) + { + elf_uncompress_failed (); + return 0; + } + } + + /* Decompress COMPRESSED_SIZE bytes of data at PIN using the huffman table, + storing REGENERATED_SIZE bytes of decompressed data at PLIT. */ + + huffman_table_bits = (unsigned int)*phuffman_table_bits; + huffman_mask = ((uint64_t)1 << huffman_table_bits) - 1; + + if (streams == 1) + { + const unsigned char *pback; + const unsigned char *pbackend; + uint64_t val; + unsigned int bits; + uint32_t i; + + pback = pin + total_streams_size - 1; + pbackend = pin; + if (!elf_fetch_backward_init (&pback, pbackend, &val, &bits)) + return 0; + + /* This is one of the inner loops of the decompression algorithm, so we + put some effort into optimization. We can't get more than 64 bytes + from a single call to elf_fetch_bits_backward, and we can't subtract + more than 11 bits at a time. */ + + if (regenerated_size >= 64) + { + unsigned char *plitstart; + unsigned char *plitstop; + + plitstart = plit; + plitstop = plit + regenerated_size - 64; + while (plit < plitstop) + { + uint16_t t; + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + + if (bits < 16) + break; + + while (bits >= 33) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } + + while (bits > 11) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } + } + + regenerated_size -= plit - plitstart; + } + + for (i = 0; i < regenerated_size; ++i) + { + uint16_t t; + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + + if (unlikely (bits < huffman_table_bits)) + { + t = huffman_table[(val << (huffman_table_bits - bits)) + & huffman_mask]; + if (unlikely (bits < (t & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } + + return 1; + } + + { + uint32_t stream_size1, stream_size2, stream_size3, stream_size4; + uint32_t tot; + const unsigned char *pback1, *pback2, *pback3, *pback4; + const unsigned char *pbackend1, *pbackend2, *pbackend3, *pbackend4; + uint64_t val1, val2, val3, val4; + unsigned int bits1, bits2, bits3, bits4; + unsigned char *plit1, *plit2, *plit3, *plit4; + uint32_t regenerated_stream_size; + uint32_t regenerated_stream_size4; + uint16_t t1, t2, t3, t4; + uint32_t i; + uint32_t limit; + + /* Read jump table. */ + if (unlikely (pin + 5 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + stream_size1 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + stream_size2 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + stream_size3 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + tot = stream_size1 + stream_size2 + stream_size3; + if (unlikely (tot > total_streams_size - 6)) + { + elf_uncompress_failed (); + return 0; + } + stream_size4 = total_streams_size - 6 - tot; + + pback1 = pin + stream_size1 - 1; + pbackend1 = pin; + + pback2 = pback1 + stream_size2; + pbackend2 = pback1 + 1; + + pback3 = pback2 + stream_size3; + pbackend3 = pback2 + 1; + + pback4 = pback3 + stream_size4; + pbackend4 = pback3 + 1; + + if (!elf_fetch_backward_init (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_backward_init (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_backward_init (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (!elf_fetch_backward_init (&pback4, pbackend4, &val4, &bits4)) + return 0; + + regenerated_stream_size = (regenerated_size + 3) / 4; + + plit1 = plit; + plit2 = plit1 + regenerated_stream_size; + plit3 = plit2 + regenerated_stream_size; + plit4 = plit3 + regenerated_stream_size; + + regenerated_stream_size4 = regenerated_size - regenerated_stream_size * 3; + + /* We can't get more than 64 literal bytes from a single call to + elf_fetch_bits_backward. The fourth stream can be up to 3 bytes less, + so use as the limit. */ + + limit = regenerated_stream_size4 <= 64 ? 0 : regenerated_stream_size4 - 64; + i = 0; + while (i < limit) + { + if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) + return 0; + + /* We can't subtract more than 11 bits at a time. */ + + do + { + t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) + & huffman_mask]; + t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) + & huffman_mask]; + t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) + & huffman_mask]; + t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) + & huffman_mask]; + + *plit1 = t1 >> 8; + ++plit1; + bits1 -= t1 & 0xff; + + *plit2 = t2 >> 8; + ++plit2; + bits2 -= t2 & 0xff; + + *plit3 = t3 >> 8; + ++plit3; + bits3 -= t3 & 0xff; + + *plit4 = t4 >> 8; + ++plit4; + bits4 -= t4 & 0xff; + + ++i; + } + while (bits1 > 11 && bits2 > 11 && bits3 > 11 && bits4 > 11); + } + + while (i < regenerated_stream_size) + { + int use4; + + use4 = i < regenerated_stream_size4; + + if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (use4) + { + if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) + return 0; + } + + if (unlikely (bits1 < huffman_table_bits)) + { + t1 = huffman_table[(val1 << (huffman_table_bits - bits1)) + & huffman_mask]; + if (unlikely (bits1 < (t1 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) + & huffman_mask]; + + if (unlikely (bits2 < huffman_table_bits)) + { + t2 = huffman_table[(val2 << (huffman_table_bits - bits2)) + & huffman_mask]; + if (unlikely (bits2 < (t2 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) + & huffman_mask]; + + if (unlikely (bits3 < huffman_table_bits)) + { + t3 = huffman_table[(val3 << (huffman_table_bits - bits3)) + & huffman_mask]; + if (unlikely (bits3 < (t3 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) + & huffman_mask]; + + if (use4) + { + if (unlikely (bits4 < huffman_table_bits)) + { + t4 = huffman_table[(val4 << (huffman_table_bits - bits4)) + & huffman_mask]; + if (unlikely (bits4 < (t4 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) + & huffman_mask]; + + *plit4 = t4 >> 8; + ++plit4; + bits4 -= t4 & 0xff; + } + + *plit1 = t1 >> 8; + ++plit1; + bits1 -= t1 & 0xff; + + *plit2 = t2 >> 8; + ++plit2; + bits2 -= t2 & 0xff; + + *plit3 = t3 >> 8; + ++plit3; + bits3 -= t3 & 0xff; + + ++i; + } + } + + return 1; +} + +/* The information used to decompress a sequence code, which can be a literal + length, an offset, or a match length. */ + +struct elf_zstd_seq_decode +{ + const struct elf_zstd_fse_baseline_entry *table; + int table_bits; +}; + +/* Unpack a sequence code compression mode. */ + +static int +elf_zstd_unpack_seq_decode (int mode, + const unsigned char **ppin, + const unsigned char *pinend, + const struct elf_zstd_fse_baseline_entry *predef, + int predef_bits, + uint16_t *scratch, + int maxidx, + struct elf_zstd_fse_baseline_entry *table, + int table_bits, + int (*conv)(const struct elf_zstd_fse_entry *, + int, + struct elf_zstd_fse_baseline_entry *), + struct elf_zstd_seq_decode *decode) +{ + switch (mode) + { + case 0: + decode->table = predef; + decode->table_bits = predef_bits; + break; + + case 1: + { + struct elf_zstd_fse_entry entry; + + if (unlikely (*ppin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + entry.symbol = **ppin; + ++*ppin; + entry.bits = 0; + entry.base = 0; + decode->table_bits = 0; + if (!conv (&entry, 0, table)) + return 0; + } + break; + + case 2: + { + struct elf_zstd_fse_entry *fse_table; + + /* We use the same space for the simple FSE table and the baseline + table. */ + fse_table = (struct elf_zstd_fse_entry *)table; + decode->table_bits = table_bits; + if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, + &decode->table_bits)) + return 0; + if (!conv (fse_table, decode->table_bits, table)) + return 0; + decode->table = table; + } + break; + + case 3: + if (unlikely (decode->table_bits == -1)) + { + elf_uncompress_failed (); + return 0; + } + break; + + default: + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +/* Decompress a zstd stream from PIN/SIN to POUT/SOUT. Code based on RFC 8878. + Return 1 on success, 0 on error. */ + +static int +elf_zstd_decompress (const unsigned char *pin, size_t sin, + unsigned char *zdebug_table, unsigned char *pout, + size_t sout) +{ + const unsigned char *pinend; + unsigned char *poutstart; + unsigned char *poutend; + struct elf_zstd_seq_decode literal_decode; + struct elf_zstd_fse_baseline_entry *literal_fse_table; + struct elf_zstd_seq_decode match_decode; + struct elf_zstd_fse_baseline_entry *match_fse_table; + struct elf_zstd_seq_decode offset_decode; + struct elf_zstd_fse_baseline_entry *offset_fse_table; + uint16_t *huffman_table; + int huffman_table_bits; + uint32_t repeated_offset1; + uint32_t repeated_offset2; + uint32_t repeated_offset3; + uint16_t *scratch; + unsigned char hdr; + int has_checksum; + uint64_t content_size; + int last_block; + + pinend = pin + sin; + poutstart = pout; + poutend = pout + sout; + + literal_decode.table = NULL; + literal_decode.table_bits = -1; + literal_fse_table = ((struct elf_zstd_fse_baseline_entry *) + (zdebug_table + ZSTD_TABLE_LITERAL_FSE_OFFSET)); + + match_decode.table = NULL; + match_decode.table_bits = -1; + match_fse_table = ((struct elf_zstd_fse_baseline_entry *) + (zdebug_table + ZSTD_TABLE_MATCH_FSE_OFFSET)); + + offset_decode.table = NULL; + offset_decode.table_bits = -1; + offset_fse_table = ((struct elf_zstd_fse_baseline_entry *) + (zdebug_table + ZSTD_TABLE_OFFSET_FSE_OFFSET)); + huffman_table = ((uint16_t *) + (zdebug_table + ZSTD_TABLE_HUFFMAN_OFFSET)); + huffman_table_bits = 0; + scratch = ((uint16_t *) + (zdebug_table + ZSTD_TABLE_WORK_OFFSET)); + + repeated_offset1 = 1; + repeated_offset2 = 4; + repeated_offset3 = 8; + + if (unlikely (sin < 4)) + { + elf_uncompress_failed (); + return 0; + } + + /* These values are the zstd magic number. */ + if (unlikely (pin[0] != 0x28 + || pin[1] != 0xb5 + || pin[2] != 0x2f + || pin[3] != 0xfd)) + { + elf_uncompress_failed (); + return 0; + } + + pin += 4; + + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + + hdr = *pin++; + + /* We expect a single frame. */ + if (unlikely ((hdr & (1 << 5)) == 0)) + { + elf_uncompress_failed (); + return 0; + } + /* Reserved bit must be zero. */ + if (unlikely ((hdr & (1 << 3)) != 0)) + { + elf_uncompress_failed (); + return 0; + } + /* We do not expect a dictionary. */ + if (unlikely ((hdr & 3) != 0)) + { + elf_uncompress_failed (); + return 0; + } + has_checksum = (hdr & (1 << 2)) != 0; + switch (hdr >> 6) + { + case 0: + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = (uint64_t) *pin++; + break; + case 1: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = (((uint64_t) pin[0]) | (((uint64_t) pin[1]) << 8)) + 256; + pin += 2; + break; + case 2: + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = ((uint64_t) pin[0] + | (((uint64_t) pin[1]) << 8) + | (((uint64_t) pin[2]) << 16) + | (((uint64_t) pin[3]) << 24)); + pin += 4; + break; + case 3: + if (unlikely (pin + 7 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = ((uint64_t) pin[0] + | (((uint64_t) pin[1]) << 8) + | (((uint64_t) pin[2]) << 16) + | (((uint64_t) pin[3]) << 24) + | (((uint64_t) pin[4]) << 32) + | (((uint64_t) pin[5]) << 40) + | (((uint64_t) pin[6]) << 48) + | (((uint64_t) pin[7]) << 56)); + pin += 8; + break; + default: + elf_uncompress_failed (); + return 0; + } + + if (unlikely (content_size != (size_t) content_size + || (size_t) content_size != sout)) + { + elf_uncompress_failed (); + return 0; + } + + last_block = 0; + while (!last_block) + { + uint32_t block_hdr; + int block_type; + uint32_t block_size; + + if (unlikely (pin + 2 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + block_hdr = ((uint32_t) pin[0] + | (((uint32_t) pin[1]) << 8) + | (((uint32_t) pin[2]) << 16)); + pin += 3; + + last_block = block_hdr & 1; + block_type = (block_hdr >> 1) & 3; + block_size = block_hdr >> 3; + + switch (block_type) + { + case 0: + /* Raw_Block */ + if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) + { + elf_uncompress_failed (); + return 0; + } + memcpy (pout, pin, block_size); + pout += block_size; + pin += block_size; + break; + + case 1: + /* RLE_Block */ + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) + { + elf_uncompress_failed (); + return 0; + } + memset (pout, *pin, block_size); + pout += block_size; + pin++; + break; + + case 2: + { + const unsigned char *pblockend; + unsigned char *plitstack; + unsigned char *plit; + uint32_t literal_count; + unsigned char seq_hdr; + size_t seq_count; + size_t seq; + const unsigned char *pback; + uint64_t val; + unsigned int bits; + unsigned int literal_state; + unsigned int offset_state; + unsigned int match_state; + + /* Compressed_Block */ + if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) + { + elf_uncompress_failed (); + return 0; + } + + pblockend = pin + block_size; + + /* Read the literals into the end of the output space, and leave + PLIT pointing at them. */ + + if (!elf_zstd_read_literals (&pin, pblockend, pout, poutend, + scratch, huffman_table, + &huffman_table_bits, + &plitstack)) + return 0; + plit = plitstack; + literal_count = poutend - plit; + + seq_hdr = *pin; + pin++; + if (seq_hdr < 128) + seq_count = seq_hdr; + else if (seq_hdr < 255) + { + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_count = ((seq_hdr - 128) << 8) + *pin; + pin++; + } + else + { + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_count = *pin + (pin[1] << 8) + 0x7f00; + pin += 2; + } + + if (seq_count > 0) + { + int (*pfn)(const struct elf_zstd_fse_entry *, + int, struct elf_zstd_fse_baseline_entry *); + + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_hdr = *pin; + ++pin; + + pfn = elf_zstd_make_literal_baseline_fse; + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 6) & 3, + &pin, pinend, + &elf_zstd_lit_table[0], 6, + scratch, 35, + literal_fse_table, 9, pfn, + &literal_decode)) + return 0; + + pfn = elf_zstd_make_offset_baseline_fse; + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 4) & 3, + &pin, pinend, + &elf_zstd_offset_table[0], 5, + scratch, 31, + offset_fse_table, 8, pfn, + &offset_decode)) + return 0; + + pfn = elf_zstd_make_match_baseline_fse; + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 2) & 3, + &pin, pinend, + &elf_zstd_match_table[0], 6, + scratch, 52, + match_fse_table, 9, pfn, + &match_decode)) + return 0; + } + + pback = pblockend - 1; + if (!elf_fetch_backward_init (&pback, pin, &val, &bits)) + return 0; + + bits -= literal_decode.table_bits; + literal_state = ((val >> bits) + & ((1U << literal_decode.table_bits) - 1)); + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= offset_decode.table_bits; + offset_state = ((val >> bits) + & ((1U << offset_decode.table_bits) - 1)); + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= match_decode.table_bits; + match_state = ((val >> bits) + & ((1U << match_decode.table_bits) - 1)); + + seq = 0; + while (1) + { + const struct elf_zstd_fse_baseline_entry *pt; + uint32_t offset_basebits; + uint32_t offset_baseline; + uint32_t offset_bits; + uint32_t offset_base; + uint32_t offset; + uint32_t match_baseline; + uint32_t match_bits; + uint32_t match_base; + uint32_t match; + uint32_t literal_baseline; + uint32_t literal_bits; + uint32_t literal_base; + uint32_t literal; + uint32_t need; + uint32_t add; + + pt = &offset_decode.table[offset_state]; + offset_basebits = pt->basebits; + offset_baseline = pt->baseline; + offset_bits = pt->bits; + offset_base = pt->base; + + /* This case can be more than 16 bits, which is all that + elf_fetch_bits_backward promises. */ + need = offset_basebits; + add = 0; + if (unlikely (need > 16)) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= 16; + add = (val >> bits) & ((1U << 16) - 1); + need -= 16; + add <<= need; + } + if (need > 0) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add += (val >> bits) & ((1U << need) - 1); + } + + offset = offset_baseline + add; + + pt = &match_decode.table[match_state]; + need = pt->basebits; + match_baseline = pt->baseline; + match_bits = pt->bits; + match_base = pt->base; + + add = 0; + if (need > 0) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add = (val >> bits) & ((1U << need) - 1); + } + + match = match_baseline + add; + + pt = &literal_decode.table[literal_state]; + need = pt->basebits; + literal_baseline = pt->baseline; + literal_bits = pt->bits; + literal_base = pt->base; + + add = 0; + if (need > 0) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add = (val >> bits) & ((1U << need) - 1); + } + + literal = literal_baseline + add; + + /* See the comment in elf_zstd_make_offset_baseline_fse. */ + if (offset_basebits > 1) + { + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + } + else + { + if (unlikely (literal == 0)) + ++offset; + switch (offset) + { + case 1: + offset = repeated_offset1; + break; + case 2: + offset = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; + case 3: + offset = repeated_offset3; + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; + case 4: + offset = repeated_offset1 - 1; + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; + } + } + + ++seq; + if (seq < seq_count) + { + uint32_t v; + + /* Update the three states. */ + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = literal_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + literal_state = literal_base + v; + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = match_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + match_state = match_base + v; + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = offset_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + offset_state = offset_base + v; + } + + /* The next sequence is now in LITERAL, OFFSET, MATCH. */ + + /* Copy LITERAL bytes from the literals. */ + + if (unlikely ((size_t)(poutend - pout) < literal)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely (literal_count < literal)) + { + elf_uncompress_failed (); + return 0; + } + + literal_count -= literal; + + /* Often LITERAL is small, so handle small cases quickly. */ + switch (literal) + { + case 8: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 7: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 6: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 5: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 4: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 3: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 2: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 1: + *pout++ = *plit++; + break; + + case 0: + break; + + default: + if (unlikely ((size_t)(plit - pout) < literal)) + { + uint32_t move; + + move = plit - pout; + while (literal > move) + { + memcpy (pout, plit, move); + pout += move; + plit += move; + literal -= move; + } + } + + memcpy (pout, plit, literal); + pout += literal; + plit += literal; + } + + if (match > 0) + { + /* Copy MATCH bytes from the decoded output at OFFSET. */ + + if (unlikely ((size_t)(poutend - pout) < match)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((size_t)(pout - poutstart) < offset)) + { + elf_uncompress_failed (); + return 0; + } + + if (offset >= match) + { + memcpy (pout, pout - offset, match); + pout += match; + } + else + { + while (match > 0) + { + uint32_t copy; + + copy = match < offset ? match : offset; + memcpy (pout, pout - offset, copy); + match -= copy; + pout += copy; + } + } + } + + if (unlikely (seq >= seq_count)) + { + /* Copy remaining literals. */ + if (literal_count > 0 && plit != pout) + { + if (unlikely ((size_t)(poutend - pout) + < literal_count)) + { + elf_uncompress_failed (); + return 0; + } + + if ((size_t)(plit - pout) < literal_count) + { + uint32_t move; + + move = plit - pout; + while (literal_count > move) + { + memcpy (pout, plit, move); + pout += move; + plit += move; + literal_count -= move; + } + } + + memcpy (pout, plit, literal_count); + } + + pout += literal_count; + + break; + } + } + + pin = pblockend; + } + break; + + case 3: + default: + elf_uncompress_failed (); + return 0; + } + } + + if (has_checksum) + { + if (unlikely (pin + 4 > pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* We don't currently verify the checksum. Currently running GNU ld with + --compress-debug-sections=zstd does not seem to generate a + checksum. */ + + pin += 4; + } + + if (pin != pinend) + { + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +#define ZDEBUG_TABLE_SIZE \ + (ZLIB_TABLE_SIZE > ZSTD_TABLE_SIZE ? ZLIB_TABLE_SIZE : ZSTD_TABLE_SIZE) + /* Uncompress the old compressed debug format, the one emitted by --compress-debug-sections=zlib-gnu. The compressed data is in COMPRESSED / COMPRESSED_SIZE, and the function writes to @@ -2628,6 +5094,8 @@ elf_uncompress_chdr (struct backtrace_state *state, unsigned char **uncompressed, size_t *uncompressed_size) { const b_elf_chdr *chdr; + char *alc; + size_t alc_len; unsigned char *po; *uncompressed = NULL; @@ -2639,31 +5107,50 @@ elf_uncompress_chdr (struct backtrace_state *state, chdr = (const b_elf_chdr *) compressed; - if (chdr->ch_type != ELFCOMPRESS_ZLIB) - { - /* Unsupported compression algorithm. */ - return 1; - } - + alc = NULL; + alc_len = 0; if (*uncompressed != NULL && *uncompressed_size >= chdr->ch_size) po = *uncompressed; else { - po = (unsigned char *) backtrace_alloc (state, chdr->ch_size, - error_callback, data); - if (po == NULL) + alc_len = chdr->ch_size; + alc = (char*)backtrace_alloc (state, alc_len, error_callback, data); + if (alc == NULL) return 0; + po = (unsigned char *) alc; } - if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), - compressed_size - sizeof (b_elf_chdr), - zdebug_table, po, chdr->ch_size)) - return 1; + switch (chdr->ch_type) + { + case ELFCOMPRESS_ZLIB: + if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), + compressed_size - sizeof (b_elf_chdr), + zdebug_table, po, chdr->ch_size)) + goto skip; + break; + + case ELFCOMPRESS_ZSTD: + if (!elf_zstd_decompress (compressed + sizeof (b_elf_chdr), + compressed_size - sizeof (b_elf_chdr), + (unsigned char *)zdebug_table, po, + chdr->ch_size)) + goto skip; + break; + + default: + /* Unsupported compression algorithm. */ + goto skip; + } *uncompressed = po; *uncompressed_size = chdr->ch_size; return 1; + + skip: + if (alc != NULL && alc_len > 0) + backtrace_free (state, alc, alc_len, error_callback, data); + return 1; } /* This function is a hook for testing the zlib support. It is only @@ -2692,6 +5179,31 @@ backtrace_uncompress_zdebug (struct backtrace_state *state, return ret; } +/* This function is a hook for testing the zstd support. It is only used by + tests. */ + +int +backtrace_uncompress_zstd (struct backtrace_state *state, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback error_callback, + void *data, unsigned char *uncompressed, + size_t uncompressed_size) +{ + unsigned char *zdebug_table; + int ret; + + zdebug_table = ((unsigned char *) backtrace_alloc (state, ZDEBUG_TABLE_SIZE, + error_callback, data)); + if (zdebug_table == NULL) + return 0; + ret = elf_zstd_decompress (compressed, compressed_size, + zdebug_table, uncompressed, uncompressed_size); + backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, + error_callback, data); + return ret; +} + /* Number of LZMA states. */ #define LZMA_STATES (12) @@ -4688,7 +7200,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (zdebug_table == NULL) { zdebug_table = ((uint16_t *) - backtrace_alloc (state, ZDEBUG_TABLE_SIZE, + backtrace_alloc (state, ZLIB_TABLE_SIZE, error_callback, data)); if (zdebug_table == NULL) goto fail; @@ -4714,8 +7226,15 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, } } + if (zdebug_table != NULL) + { + backtrace_free (state, zdebug_table, ZLIB_TABLE_SIZE, + error_callback, data); + zdebug_table = NULL; + } + /* Uncompress the official ELF format - (--compress-debug-sections=zlib-gabi). */ + (--compress-debug-sections=zlib-gabi, --compress-debug-sections=zstd). */ for (i = 0; i < (int) DEBUG_MAX; ++i) { unsigned char *uncompressed_data; diff --git a/Source/ThirdParty/tracy/libbacktrace/internal.hpp b/Source/ThirdParty/tracy/libbacktrace/internal.hpp index 96c097e02..f871844b6 100644 --- a/Source/ThirdParty/tracy/libbacktrace/internal.hpp +++ b/Source/ThirdParty/tracy/libbacktrace/internal.hpp @@ -371,6 +371,15 @@ extern int backtrace_uncompress_zdebug (struct backtrace_state *, unsigned char **uncompressed, size_t *uncompressed_size); +/* A test-only hook for elf_zstd_decompress. */ + +extern int backtrace_uncompress_zstd (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char *uncompressed, + size_t uncompressed_size); + /* A test-only hook for elf_uncompress_lzma. */ extern int backtrace_uncompress_lzma (struct backtrace_state *, diff --git a/Source/ThirdParty/tracy/tracy/Tracy.hpp b/Source/ThirdParty/tracy/tracy/Tracy.hpp index 8ef26d59e..e9c943d2f 100644 --- a/Source/ThirdParty/tracy/tracy/Tracy.hpp +++ b/Source/ThirdParty/tracy/tracy/Tracy.hpp @@ -1,6 +1,18 @@ #ifndef __TRACY_HPP__ #define __TRACY_HPP__ +#ifndef TracyFunction +# define TracyFunction __FUNCTION__ +#endif + +#ifndef TracyFile +# define TracyFile __FILE__ +#endif + +#ifndef TracyLine +# define TracyLine __LINE__ +#endif + #ifndef TRACY_ENABLE #define ZoneNamed(x,y) @@ -80,6 +92,8 @@ #define TracySourceCallbackRegister(x,y) #define TracyParameterRegister(x,y) #define TracyParameterSetup(x,y,z,w) +#define TracyIsConnected false +#define TracySetProgramName(x) #define TracyFiberEnter(x) #define TracyFiberLeave @@ -124,21 +138,21 @@ public: } #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ) +# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active ) +# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) +# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) #else -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ) +# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) +# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) +# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) +# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active ) +# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active ) +# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ) #endif #define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) @@ -164,13 +178,13 @@ public: #define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip ) -#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() } -#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() } -#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() } -#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() } +#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } #define LockableBase( type ) tracy::Lockable #define SharedLockableBase( type ) tracy::SharedLockable -#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname ) +#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##varname ) #define LockableName( varname, txt, size ) varname.CustomName( txt, size ) #define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ) @@ -211,13 +225,13 @@ public: #endif #ifdef TRACY_HAS_CALLSTACK -# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ) -# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ) -# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ) -# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ) +# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active ) -# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active ) +# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) +# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) # define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) # define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) @@ -272,6 +286,7 @@ public: #define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) #define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ) #define TracyIsConnected tracy::GetProfiler().IsConnected() +#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); #ifdef TRACY_FIBERS # define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber )