mirror of
https://github.com/wolfpld/tracy
synced 2025-04-29 12:23:53 +00:00
Drop support for CPU id queries.
This commit is contained in:
parent
0b944c88bb
commit
8aa0be39d5
40
TracyLua.hpp
40
TracyLua.hpp
@ -230,13 +230,7 @@ static inline int LuaZoneBeginS( lua_State* L )
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
|
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
|
|
||||||
@ -295,13 +289,7 @@ static inline int LuaZoneBeginNS( lua_State* L )
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
|
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
|
|
||||||
@ -360,13 +348,7 @@ static inline int LuaZoneBegin( lua_State* L )
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
|
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
return 0;
|
return 0;
|
||||||
@ -421,13 +403,7 @@ static inline int LuaZoneBeginN( lua_State* L )
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
|
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
return 0;
|
return 0;
|
||||||
@ -452,13 +428,7 @@ static inline int LuaZoneEnd( lua_State* L )
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneEnd.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2173,13 +2173,7 @@ void Profiler::CalibrateDelay()
|
|||||||
auto& tail = ptoken->get_tail_index();
|
auto& tail = ptoken->get_tail_index();
|
||||||
auto item = ptoken->enqueue_begin( magic );
|
auto item = ptoken->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
|
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
@ -2188,13 +2182,7 @@ void Profiler::CalibrateDelay()
|
|||||||
auto& tail = ptoken->get_tail_index();
|
auto& tail = ptoken->get_tail_index();
|
||||||
auto item = ptoken->enqueue_begin( magic );
|
auto item = ptoken->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneEnd.time, GetTime() );
|
||||||
MemWrite( &item->zoneEnd.time, GetTime( item->zoneEnd.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneEnd.time, GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneEnd.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2213,13 +2201,7 @@ void Profiler::CalibrateDelay()
|
|||||||
auto& tail = ptoken->get_tail_index();
|
auto& tail = ptoken->get_tail_index();
|
||||||
auto item = ptoken->enqueue_begin( magic );
|
auto item = ptoken->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
|
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
@ -2228,13 +2210,7 @@ void Profiler::CalibrateDelay()
|
|||||||
auto& tail = ptoken->get_tail_index();
|
auto& tail = ptoken->get_tail_index();
|
||||||
auto item = ptoken->enqueue_begin( magic );
|
auto item = ptoken->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneEnd.time, GetTime() );
|
||||||
MemWrite( &item->zoneEnd.time, GetTime( item->zoneEnd.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneEnd.time, GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneEnd.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2366,13 +2342,7 @@ TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_da
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBegin );
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBegin );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
|
||||||
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime( cpu ) );
|
|
||||||
tracy::MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
@ -2408,13 +2378,7 @@ TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_l
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBeginCallstack );
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBeginCallstack );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
|
||||||
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime( cpu ) );
|
|
||||||
tracy::MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
@ -2443,13 +2407,7 @@ void ___tracy_emit_zone_end( TracyCZoneCtx ctx )
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneEnd );
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneEnd );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() );
|
||||||
tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime( item->zoneEnd.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime( cpu ) );
|
|
||||||
tracy::MemWrite( &item->zoneEnd.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,11 +27,6 @@
|
|||||||
|
|
||||||
#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6
|
#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6
|
||||||
# define TRACY_HW_TIMER
|
# define TRACY_HW_TIMER
|
||||||
# if defined _WIN32 || defined __CYGWIN__
|
|
||||||
// Enable optimization for MSVC __rdtscp() intrin, saving one LHS of a cpu value on the stack.
|
|
||||||
// This comes at the cost of an unaligned memory write.
|
|
||||||
# define TRACY_RDTSCP_OPT
|
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef TracyConcat
|
#ifndef TracyConcat
|
||||||
@ -103,29 +98,6 @@ public:
|
|||||||
Profiler();
|
Profiler();
|
||||||
~Profiler();
|
~Profiler();
|
||||||
|
|
||||||
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
|
|
||||||
{
|
|
||||||
#ifdef TRACY_HW_TIMER
|
|
||||||
# if TARGET_OS_IOS == 1
|
|
||||||
cpu = 0xFFFFFFFF;
|
|
||||||
return mach_absolute_time();
|
|
||||||
# elif __ARM_ARCH >= 6
|
|
||||||
cpu = 0xFFFFFFFF;
|
|
||||||
return GetTimeImpl();
|
|
||||||
# elif defined _WIN32 || defined __CYGWIN__
|
|
||||||
const auto t = int64_t( __rdtscp( &cpu ) );
|
|
||||||
return t;
|
|
||||||
# elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
|
||||||
uint32_t eax, edx;
|
|
||||||
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
|
|
||||||
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
cpu = 0xFFFFFFFF;
|
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static tracy_force_inline int64_t GetTime()
|
static tracy_force_inline int64_t GetTime()
|
||||||
{
|
{
|
||||||
#ifdef TRACY_HW_TIMER
|
#ifdef TRACY_HW_TIMER
|
||||||
|
@ -29,13 +29,7 @@ public:
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
|
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
@ -54,13 +48,7 @@ public:
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneBeginCallstack );
|
MemWrite( &item->hdr.type, QueueType::ZoneBeginCallstack );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneBegin.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
|
|
||||||
@ -78,13 +66,7 @@ public:
|
|||||||
auto& tail = token->get_tail_index();
|
auto& tail = token->get_tail_index();
|
||||||
auto item = token->enqueue_begin( magic );
|
auto item = token->enqueue_begin( magic );
|
||||||
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
|
||||||
#ifdef TRACY_RDTSCP_OPT
|
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) );
|
|
||||||
#else
|
|
||||||
uint32_t cpu;
|
|
||||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) );
|
|
||||||
MemWrite( &item->zoneEnd.cpu, cpu );
|
|
||||||
#endif
|
|
||||||
tail.store( magic + 1, std::memory_order_release );
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
namespace tracy
|
namespace tracy
|
||||||
{
|
{
|
||||||
|
|
||||||
enum : uint32_t { ProtocolVersion = 15 };
|
enum : uint32_t { ProtocolVersion = 16 };
|
||||||
enum : uint32_t { BroadcastVersion = 0 };
|
enum : uint32_t { BroadcastVersion = 0 };
|
||||||
|
|
||||||
using lz4sz_t = uint32_t;
|
using lz4sz_t = uint32_t;
|
||||||
|
@ -79,13 +79,11 @@ struct QueueZoneBegin
|
|||||||
{
|
{
|
||||||
int64_t time;
|
int64_t time;
|
||||||
uint64_t srcloc; // ptr
|
uint64_t srcloc; // ptr
|
||||||
uint32_t cpu;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct QueueZoneEnd
|
struct QueueZoneEnd
|
||||||
{
|
{
|
||||||
int64_t time;
|
int64_t time;
|
||||||
uint32_t cpu;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct QueueZoneValidation
|
struct QueueZoneValidation
|
||||||
|
@ -167,25 +167,6 @@ Tracy tries to achieve maximum possible timer resolution, to make measurements a
|
|||||||
|
|
||||||
Time values read from the registers are specified in indeterminate units. This is corrected by making two separate measurements of \texttt{GetTime()} and \texttt{std::high\_resolution\_clock} readings, which can be then used to calculate correction multiplier (\texttt{Profiler::CalibrateTimer()}). To save cycles, client sends raw readings to the server, which then can apply the correction.
|
Time values read from the registers are specified in indeterminate units. This is corrected by making two separate measurements of \texttt{GetTime()} and \texttt{std::high\_resolution\_clock} readings, which can be then used to calculate correction multiplier (\texttt{Profiler::CalibrateTimer()}). To save cycles, client sends raw readings to the server, which then can apply the correction.
|
||||||
|
|
||||||
\subsubsection{CPU core queries}
|
|
||||||
|
|
||||||
One of the curiosities of the \texttt{rdtscp} instruction is that it returns the identifier of the CPU core (\texttt{IA32\_TSC\_AUX} signature) on which it is executed. This is exposed through the \texttt{GetTime(uint32\_t\& cpu)} function. If CPU query is not available, the \texttt{cpu} value will be set to $0xFFFFFFFF$.
|
|
||||||
|
|
||||||
\begin{bclogo}[
|
|
||||||
noborder=true,
|
|
||||||
couleur=black!5,
|
|
||||||
logo=\bclampe
|
|
||||||
]{Improvement opportunity}
|
|
||||||
\begin{itemize}
|
|
||||||
\item The CPU core readings are currently very underused by the profiler. It might be profitable to figure out how this information can be used and how to present it to the user.
|
|
||||||
\item Current implementation doesn't handle the CPU identifier correctly, as it is assumed that only a single CPU is present in the system and the value is cast to 8 bits. In reality the format of the identifier is OS-specific and can include NUMA node identifier in the higher bits, for example: \url{https://elixir.bootlin.com/linux/v3.13/source/arch/x86/kernel/vsyscall_64.c#L330}.
|
|
||||||
\end{itemize}
|
|
||||||
\end{bclogo}
|
|
||||||
|
|
||||||
\paragraph{MSVC codegen issues}
|
|
||||||
|
|
||||||
The Microsoft compiler is very insistent on writing the CPU core identifier to the memory address which was specified in the intrinsic call, even if the value should be kept in a register. To fix this issue a separate code path is used, which is enabled by the \texttt{TRACY\_RDTSCP\_OPT} macro. In its case the value will be stored at the required address, omitting the store-load-store, but at the cost of breaking alignment requirements, described in section~\ref{accessingdata}, which technically introduce undefined behavior.
|
|
||||||
|
|
||||||
\subsubsection{Misinformation about \texttt{rdtscp}}
|
\subsubsection{Misinformation about \texttt{rdtscp}}
|
||||||
|
|
||||||
In various internet sources you can find warnings that the \texttt{rdtscp} readings are not reliable, can vary between CPU cores, or can be affected by the CPU frequency adjustments. While this was a sound advice a long time ago, it is no longer valid since the Intel Sandy Bridge microarchitecture (released in 2011) introduced \emph{invariant TSC}. Tracy will check for this feature and refuse to run, if it is not found\footnote{Invariant TSC might be not available in specific scenarios, e.g. in some virtual environments. You may set the environment variable \texttt{TRACY\_NO\_INVARIANT\_CHECK=1} to skip this check, \emph{only if you know what you are doing}.}.
|
In various internet sources you can find warnings that the \texttt{rdtscp} readings are not reliable, can vary between CPU cores, or can be affected by the CPU frequency adjustments. While this was a sound advice a long time ago, it is no longer valid since the Intel Sandy Bridge microarchitecture (released in 2011) introduced \emph{invariant TSC}. Tracy will check for this feature and refuse to run, if it is not found\footnote{Invariant TSC might be not available in specific scenarios, e.g. in some virtual environments. You may set the environment variable \texttt{TRACY\_NO\_INVARIANT\_CHECK=1} to skip this check, \emph{only if you know what you are doing}.}.
|
||||||
|
@ -77,8 +77,6 @@ struct ZoneEvent
|
|||||||
int64_t start;
|
int64_t start;
|
||||||
int64_t end;
|
int64_t end;
|
||||||
int32_t srcloc;
|
int32_t srcloc;
|
||||||
int8_t cpu_start;
|
|
||||||
int8_t cpu_end;
|
|
||||||
StringIdx text;
|
StringIdx text;
|
||||||
uint32_t callstack;
|
uint32_t callstack;
|
||||||
StringIdx name;
|
StringIdx name;
|
||||||
|
@ -7,7 +7,7 @@ namespace Version
|
|||||||
{
|
{
|
||||||
enum { Major = 0 };
|
enum { Major = 0 };
|
||||||
enum { Minor = 5 };
|
enum { Minor = 5 };
|
||||||
enum { Patch = 0 };
|
enum { Patch = 1 };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10761,19 +10761,6 @@ void View::ZoneTooltip( const ZoneEvent& ev )
|
|||||||
ImGui::SameLine();
|
ImGui::SameLine();
|
||||||
ImGui::TextDisabled( "(%.2f%%)", 100.f * selftime / ztime );
|
ImGui::TextDisabled( "(%.2f%%)", 100.f * selftime / ztime );
|
||||||
}
|
}
|
||||||
if( ev.cpu_start >= 0 )
|
|
||||||
{
|
|
||||||
TextDisabledUnformatted( "CPU:" );
|
|
||||||
ImGui::SameLine();
|
|
||||||
if( ev.end < 0 || ev.cpu_start == ev.cpu_end )
|
|
||||||
{
|
|
||||||
ImGui::Text( "%i", ev.cpu_start );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ImGui::Text( "%i -> %i", ev.cpu_start, ev.cpu_end );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( ev.text.active )
|
if( ev.text.active )
|
||||||
{
|
{
|
||||||
ImGui::NewLine();
|
ImGui::NewLine();
|
||||||
|
@ -731,6 +731,11 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
|
|||||||
{
|
{
|
||||||
ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer );
|
ReadTimelinePre042( f, td->timeline, CompressThread( tid ), tsz, fileVer );
|
||||||
}
|
}
|
||||||
|
else if( fileVer <= FileVersion( 0, 5, 0 ) )
|
||||||
|
{
|
||||||
|
int64_t refTime = 0;
|
||||||
|
ReadTimelinePre051( f, td->timeline, CompressThread( tid ), tsz, refTime, fileVer );
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int64_t refTime = 0;
|
int64_t refTime = 0;
|
||||||
@ -2626,8 +2631,6 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev )
|
|||||||
zone->start = TscTime( ev.time );
|
zone->start = TscTime( ev.time );
|
||||||
zone->end = -1;
|
zone->end = -1;
|
||||||
zone->srcloc = ShrinkSourceLocation( ev.srcloc );
|
zone->srcloc = ShrinkSourceLocation( ev.srcloc );
|
||||||
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
|
||||||
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
|
||||||
zone->callstack = 0;
|
zone->callstack = 0;
|
||||||
zone->child = -1;
|
zone->child = -1;
|
||||||
|
|
||||||
@ -2660,8 +2663,6 @@ void Worker::ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBe
|
|||||||
zone->start = TscTime( ev.time );
|
zone->start = TscTime( ev.time );
|
||||||
zone->end = -1;
|
zone->end = -1;
|
||||||
zone->srcloc = it->second;
|
zone->srcloc = it->second;
|
||||||
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
|
||||||
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
|
||||||
zone->callstack = 0;
|
zone->callstack = 0;
|
||||||
zone->child = -1;
|
zone->child = -1;
|
||||||
|
|
||||||
@ -2711,8 +2712,6 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev )
|
|||||||
auto zone = stack.back_and_pop();
|
auto zone = stack.back_and_pop();
|
||||||
assert( zone->end == -1 );
|
assert( zone->end == -1 );
|
||||||
zone->end = TscTime( ev.time );
|
zone->end = TscTime( ev.time );
|
||||||
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
|
||||||
zone->cpu_end = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
|
||||||
assert( zone->end >= zone->start );
|
assert( zone->end >= zone->start );
|
||||||
|
|
||||||
m_data.lastTime = std::max( m_data.lastTime, zone->end );
|
m_data.lastTime = std::max( m_data.lastTime, zone->end );
|
||||||
@ -3800,6 +3799,24 @@ void Worker::ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Worker::ReadTimelinePre051( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer )
|
||||||
|
{
|
||||||
|
uint64_t sz;
|
||||||
|
f.Read( sz );
|
||||||
|
if( sz == 0 )
|
||||||
|
{
|
||||||
|
zone->child = -1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
zone->child = m_data.zoneChildren.size();
|
||||||
|
m_data.zoneChildren.push_back( Vector<ZoneEvent*>() );
|
||||||
|
Vector<ZoneEvent*> tmp;
|
||||||
|
ReadTimelinePre051( f, tmp, thread, sz, refTime, fileVer );
|
||||||
|
m_data.zoneChildren[zone->child] = std::move( tmp );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime )
|
void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime )
|
||||||
{
|
{
|
||||||
uint64_t sz;
|
uint64_t sz;
|
||||||
@ -3891,7 +3908,7 @@ void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread
|
|||||||
{
|
{
|
||||||
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
|
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
|
||||||
// Use zone->end as scratch buffer for zone start time offset.
|
// Use zone->end as scratch buffer for zone start time offset.
|
||||||
f.Read( &zone->end, sizeof( zone->end ) + sizeof( zone->srcloc ) + sizeof( zone->cpu_start ) + sizeof( zone->cpu_end ) + sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) );
|
f.Read( &zone->end, sizeof( zone->end ) + sizeof( zone->srcloc ) + sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) );
|
||||||
refTime += zone->end;
|
refTime += zone->end;
|
||||||
zone->start = refTime;
|
zone->start = refTime;
|
||||||
ReadTimeline( f, zone, thread, refTime );
|
ReadTimeline( f, zone, thread, refTime );
|
||||||
@ -3915,7 +3932,9 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
|
|||||||
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
|
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
|
||||||
auto zone = m_slab.Alloc<ZoneEvent>();
|
auto zone = m_slab.Alloc<ZoneEvent>();
|
||||||
vec[i] = zone;
|
vec[i] = zone;
|
||||||
f.Read( zone, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) );
|
f.Read( &zone->start, sizeof( zone->start ) + sizeof( zone->end ) + sizeof( zone->srcloc ) );
|
||||||
|
f.Skip( 2 );
|
||||||
|
f.Read( &zone->text, sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) );
|
||||||
ReadTimelinePre042( f, zone, thread, fileVer );
|
ReadTimelinePre042( f, zone, thread, fileVer );
|
||||||
#ifdef TRACY_NO_STATISTICS
|
#ifdef TRACY_NO_STATISTICS
|
||||||
ReadTimelineUpdateStatistics( zone, thread );
|
ReadTimelineUpdateStatistics( zone, thread );
|
||||||
@ -3923,6 +3942,37 @@ void Worker::ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Worker::ReadTimelinePre051( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer )
|
||||||
|
{
|
||||||
|
assert( fileVer <= FileVersion( 0, 5, 0 ) );
|
||||||
|
assert( size != 0 );
|
||||||
|
vec.reserve_exact( size, m_slab );
|
||||||
|
m_data.zonesCnt += size;
|
||||||
|
auto zone = (ZoneEvent*)m_slab.AllocBig( sizeof( ZoneEvent ) * size );
|
||||||
|
auto zptr = zone;
|
||||||
|
auto vptr = vec.data();
|
||||||
|
for( uint64_t i=0; i<size; i++ )
|
||||||
|
{
|
||||||
|
*vptr++ = zptr++;
|
||||||
|
}
|
||||||
|
do
|
||||||
|
{
|
||||||
|
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
|
||||||
|
// Use zone->end as scratch buffer for zone start time offset.
|
||||||
|
f.Read( &zone->end, sizeof( zone->end ) + sizeof( zone->srcloc ) );
|
||||||
|
f.Skip( 2 );
|
||||||
|
f.Read( &zone->text, sizeof( zone->text ) + sizeof( zone->callstack ) + sizeof( zone->name ) );
|
||||||
|
refTime += zone->end;
|
||||||
|
zone->start = refTime;
|
||||||
|
ReadTimelinePre051( f, zone, thread, refTime, fileVer );
|
||||||
|
zone->end = ReadTimeOffset( f, refTime );
|
||||||
|
#ifdef TRACY_NO_STATISTICS
|
||||||
|
ReadTimelineUpdateStatistics( zone, thread );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
while( ++zone != zptr );
|
||||||
|
}
|
||||||
|
|
||||||
void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime )
|
void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime )
|
||||||
{
|
{
|
||||||
assert( size != 0 );
|
assert( size != 0 );
|
||||||
@ -4314,8 +4364,6 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec, int64_t
|
|||||||
{
|
{
|
||||||
WriteTimeOffset( f, refTime, v->start );
|
WriteTimeOffset( f, refTime, v->start );
|
||||||
f.Write( &v->srcloc, sizeof( v->srcloc ) );
|
f.Write( &v->srcloc, sizeof( v->srcloc ) );
|
||||||
f.Write( &v->cpu_start, sizeof( v->cpu_start ) );
|
|
||||||
f.Write( &v->cpu_end, sizeof( v->cpu_end ) );
|
|
||||||
f.Write( &v->text, sizeof( v->text ) );
|
f.Write( &v->text, sizeof( v->text ) );
|
||||||
f.Write( &v->callstack, sizeof( v->callstack ) );
|
f.Write( &v->callstack, sizeof( v->callstack ) );
|
||||||
f.Write( &v->name, sizeof( v->name ) );
|
f.Write( &v->name, sizeof( v->name ) );
|
||||||
|
@ -475,6 +475,7 @@ private:
|
|||||||
|
|
||||||
tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime );
|
tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime );
|
||||||
tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer );
|
tracy_force_inline void ReadTimelinePre042( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer );
|
||||||
|
tracy_force_inline void ReadTimelinePre051( FileRead& f, ZoneEvent* zone, uint16_t thread, int64_t& refTime, int fileVer );
|
||||||
tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime );
|
tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime );
|
||||||
tracy_force_inline void ReadTimelinePre044( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer );
|
tracy_force_inline void ReadTimelinePre044( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int fileVer );
|
||||||
|
|
||||||
@ -482,6 +483,7 @@ private:
|
|||||||
|
|
||||||
void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime );
|
void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime );
|
||||||
void ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer );
|
void ReadTimelinePre042( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int fileVer );
|
||||||
|
void ReadTimelinePre051( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, uint64_t size, int64_t& refTime, int fileVer );
|
||||||
void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime );
|
void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime );
|
||||||
void ReadTimelinePre044( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer );
|
void ReadTimelinePre044( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int fileVer );
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user