1
0
mirror of https://github.com/wolfpld/tracy synced 2025-01-15 20:08:00 +00:00

Compare commits

...

12 Commits

Author SHA1 Message Date
Bartosz Taudul
a001683a77
Update NEWS. 2021-04-29 21:05:36 +02:00
Bartosz Taudul
86ff14ad7e
Update manual. 2021-04-29 21:05:10 +02:00
Bartosz Taudul
eb7d220eea
Added support for TRACY_NO_FRAME_IMAGE define. 2021-04-29 20:55:16 +02:00
Bartosz Taudul
de5f258b03
Display complementary assembly address tooltips. 2021-04-29 20:33:10 +02:00
Bartosz Taudul
854439cdb1
Display jump target in asm instruction tooltip. 2021-04-29 20:27:32 +02:00
Bartosz Taudul
ac9b97319a
Extract jump name retrieval. 2021-04-29 20:08:18 +02:00
Bartosz Taudul
05894f6f14
Shuffle code around to avoid error C2712. 2021-04-29 19:10:30 +02:00
Bartosz Taudul
251f331f99
Update NEWS. 2021-04-29 18:24:37 +02:00
Bartosz Taudul
56f0bdd571
ARM doesn't follow x64 canonical address requirements. 2021-04-29 18:24:37 +02:00
Bartosz Taudul
505656df5a
Trace frame count may be zero. 2021-04-29 18:24:37 +02:00
Bartosz Taudul
7cfaaf6310
Exactly known latency shouldn't vary. 2021-04-29 18:24:37 +02:00
Bartosz Taudul
9bc3afabe7
Add robin hood natvis.
https://gist.github.com/ikrima/1184c75b979cbfa655c0883c0d4ab068
2021-04-29 18:24:34 +02:00
8 changed files with 266 additions and 99 deletions

3
NEWS
View File

@ -13,6 +13,9 @@ v0.x.x (xxxx-xx-xx)
- Manually disconnecting from the server will no longer display erroneous
warning message.
- Added ability to display sample time spent in child function calls.
- Fixed issue which may have prevented sampling on ARM64.
- Added TRACY_NO_FRAME_IMAGE macro to disable frame image compression
thread.
v0.7.7 (2021-04-01)

View File

@ -940,7 +940,9 @@ enum { QueuePrealloc = 256 * 1024 };
static Profiler* s_instance = nullptr;
static Thread* s_thread;
#ifndef TRACY_NO_FRAME_IMAGE
static Thread* s_compressThread;
#endif
#ifdef TRACY_HAS_SYSTEM_TRACING
static Thread* s_sysTraceThread = nullptr;
@ -1203,8 +1205,10 @@ Profiler::Profiler()
, m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
, m_serialQueue( 1024*1024 )
, m_serialDequeue( 1024*1024 )
#ifndef TRACY_NO_FRAME_IMAGE
, m_fiQueue( 16 )
, m_fiDequeue( 16 )
#endif
, m_frameCount( 0 )
, m_isConnected( false )
#ifdef TRACY_ON_DEMAND
@ -1254,8 +1258,10 @@ void Profiler::SpawnWorkerThreads()
s_thread = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_thread) Thread( LaunchWorker, this );
#ifndef TRACY_NO_FRAME_IMAGE
s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_compressThread) Thread( LaunchCompressWorker, this );
#endif
#ifdef TRACY_HAS_SYSTEM_TRACING
if( SysTraceStart( m_samplingPeriod ) )
@ -1307,8 +1313,11 @@ Profiler::~Profiler()
}
#endif
#ifndef TRACY_NO_FRAME_IMAGE
s_compressThread->~Thread();
tracy_free( s_compressThread );
#endif
s_thread->~Thread();
tracy_free( s_thread );
@ -1808,6 +1817,7 @@ void Profiler::Worker()
}
}
#ifndef TRACY_NO_FRAME_IMAGE
void Profiler::CompressWorker()
{
ThreadExitHandler threadExitHandler;
@ -1874,6 +1884,7 @@ void Profiler::CompressWorker()
}
}
}
#endif
static void FreeAssociatedMemory( const QueueItem& item )
{

View File

@ -213,11 +213,12 @@ public:
static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip )
{
#ifndef TRACY_NO_FRAME_IMAGE
auto& profiler = GetProfiler();
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits<uint32_t>::max() );
#ifdef TRACY_ON_DEMAND
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
#endif
# endif
const auto sz = size_t( w ) * size_t( h ) * 4;
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, image, sz );
@ -231,6 +232,7 @@ public:
fi->flip = flip;
profiler.m_fiQueue.commit_next();
profiler.m_fiLock.unlock();
#endif
}
static tracy_force_inline void PlotData( const char* name, int64_t val )
@ -642,8 +644,10 @@ private:
static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); }
void Worker();
#ifndef TRACY_NO_FRAME_IMAGE
static void LaunchCompressWorker( void* ptr ) { ((Profiler*)ptr)->CompressWorker(); }
void CompressWorker();
#endif
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
void ClearSerial();
@ -790,8 +794,10 @@ private:
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
TracyMutex m_serialLock;
#ifndef TRACY_NO_FRAME_IMAGE
FastVector<FrameImageQueueItem> m_fiQueue, m_fiDequeue;
TracyMutex m_fiLock;
#endif
std::atomic<uint64_t> m_frameCount;
std::atomic<bool> m_isConnected;

View File

@ -733,54 +733,59 @@ static void SetupSampling( int64_t& samplingPeriod )
s_ring[i].Read( &cnt, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
auto trace = (uint64_t*)tracy_malloc( ( 1 + cnt ) * sizeof( uint64_t ) );
s_ring[i].Read( trace+1, offset, sizeof( uint64_t ) * cnt );
if( cnt > 0 )
{
auto trace = (uint64_t*)tracy_malloc( ( 1 + cnt ) * sizeof( uint64_t ) );
s_ring[i].Read( trace+1, offset, sizeof( uint64_t ) * cnt );
// remove non-canonical pointers
do
{
const auto test = (int64_t)trace[cnt];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 == m2 ) break;
}
while( --cnt > 0 );
for( uint64_t j=1; j<cnt; j++ )
{
const auto test = (int64_t)trace[j];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 != m2 ) trace[j] = 0;
}
// skip kernel frames
uint64_t j;
for( j=0; j<cnt; j++ )
{
if( (int64_t)trace[j+1] >= 0 ) break;
}
if( j == cnt )
{
tracy_free( trace );
}
else
{
if( j > 0 )
#if defined __x86_64__ || defined _M_X64
// remove non-canonical pointers
do
{
cnt -= j;
memmove( trace+1, trace+1+j, sizeof( uint64_t ) * cnt );
const auto test = (int64_t)trace[cnt];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 == m2 ) break;
}
while( --cnt > 0 );
for( uint64_t j=1; j<cnt; j++ )
{
const auto test = (int64_t)trace[j];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 != m2 ) trace[j] = 0;
}
memcpy( trace, &cnt, sizeof( uint64_t ) );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
t0 = s_ring[i].ConvertTimeToTsc( t0 );
#endif
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, t0 );
MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
// skip kernel frames
uint64_t j;
for( j=0; j<cnt; j++ )
{
if( (int64_t)trace[j+1] >= 0 ) break;
}
if( j == cnt )
{
tracy_free( trace );
}
else
{
if( j > 0 )
{
cnt -= j;
memmove( trace+1, trace+1+j, sizeof( uint64_t ) * cnt );
}
memcpy( trace, &cnt, sizeof( uint64_t ) );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
t0 = s_ring[i].ConvertTimeToTsc( t0 );
#endif
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, t0 );
MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
}
}
}

View File

@ -99,6 +99,29 @@ std::atomic<ThreadNameData*>& GetThreadNameData();
TRACY_API void InitRPMallocThread();
#endif
#ifdef _MSC_VER
# pragma pack( push, 8 )
struct THREADNAME_INFO
{
DWORD dwType;
LPCSTR szName;
DWORD dwThreadID;
DWORD dwFlags;
};
# pragma pack(pop)
void ThreadNameMsvcMagic( const THREADNAME_INFO& info )
{
__try
{
RaiseException( 0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
}
}
#endif
TRACY_API void SetThreadName( const char* name )
{
#if defined _WIN32 || defined __CYGWIN__
@ -112,31 +135,12 @@ TRACY_API void SetThreadName( const char* name )
else
{
# if defined _MSC_VER
const DWORD MS_VC_EXCEPTION=0x406D1388;
# pragma pack( push, 8 )
struct THREADNAME_INFO
{
DWORD dwType;
LPCSTR szName;
DWORD dwThreadID;
DWORD dwFlags;
};
# pragma pack(pop)
DWORD ThreadId = GetCurrentThreadId();
THREADNAME_INFO info;
info.dwType = 0x1000;
info.szName = name;
info.dwThreadID = ThreadId;
info.dwThreadID = GetCurrentThreadId();
info.dwFlags = 0;
__try
{
RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
}
ThreadNameMsvcMagic( info );
# endif
}
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__ && !defined __CYGWIN__

View File

@ -914,6 +914,7 @@ logo=\bcattention
]{Caveats}
\begin{itemize}
\item Frame images are compressed on a second client profiler thread\footnote{Small part of compression task is performed on the server.}, to reduce memory usage of queued images. This might have impact on the performance of the profiled application.
\item This second thread will be periodically woken up, even if there are no frame images to compress\footnote{This way of doing things is required to prevent a deadlock in specific circumstances.}. If you are not using the frame image capture functionality and you don't wish this thread to be running, you can define the \texttt{TRACY\_NO\_FRAME\_IMAGE} macro.
\item Due to implementation details of the network buffer, single frame image cannot be greater than 256 KB after compression. Note that a $960\times540$ image fits in this limit.
\end{itemize}
\end{bclogo}

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
<Type Name="tracy::Vector&lt;*&gt;">
<DisplayString>{{ size={m_size} }}</DisplayString>
@ -44,4 +44,67 @@
<Type Name="tracy::Int48">
<DisplayString>{{ value={int64_t( uint64_t(m_val[0]) | (uint64_t(m_val[1])&lt;&lt;8) | (uint64_t(m_val[2])&lt;&lt;16) | (uint64_t(m_val[3])&lt;&lt;24) | (uint64_t(m_val[4])&lt;&lt;32) | (uint64_t(m_val[5])&lt;&lt;40) )} }}</DisplayString>
</Type>
<Type Name="tracy::detail::Table&lt;*,*,*,*,*,*&gt;">
<!--
$T1 = bool IsFlat
$T2 = size_t MaxLoadFactor100
$T3 = typename Key
$T4 = typename T
$T5 = typename Hash
$T6 = typename KeyEqual
-->
<!-- <DisplayString>{map}</DisplayString> -->
<Expand>
<Synthetic Name="[elements]">
<DisplayString>{{size={mNumElements}}}</DisplayString>
<Expand>
<CustomListItems MaxItemsPerView="5000" >
<Variable Name="itKeyVals" InitialValue="mKeyVals " />
<Variable Name="itInfo" InitialValue="mInfo " />
<Variable Name="itEndKeyVals" InitialValue="(void *)mInfo " />
<Variable Name="n" InitialValue="0ULL " />
<Variable Name="inc" InitialValue="(unsigned long)0" />
<Size>mNumElements</Size>
<Loop>
<!-- Fast forward -->
<Exec>n = *((size_t*)itInfo)</Exec>
<Loop>
<Break Condition="n != 0" />
<Exec>itInfo += sizeof(size_t)</Exec>
<Exec>itKeyVals += sizeof(size_t)</Exec>
</Loop>
<!-- Count Trailing Zeros -->
<Exec>
inc = n == 0
? 64
: (
63
- (((n &amp; (~n + 1)) &amp; 0x00000000FFFFFFFF) ? 32 : 0)
- (((n &amp; (~n + 1)) &amp; 0x0000FFFF0000FFFF) ? 16 : 0)
- (((n &amp; (~n + 1)) &amp; 0x00FF00FF00FF00FF) ? 8 : 0)
- (((n &amp; (~n + 1)) &amp; 0x0F0F0F0F0F0F0F0F) ? 4 : 0)
- (((n &amp; (~n + 1)) &amp; 0x3333333333333333) ? 2 : 0)
- (((n &amp; (~n + 1)) &amp; 0x5555555555555555) ? 1 : 0)
)
</Exec>
<Exec>itInfo += inc / 8</Exec>
<Exec>itKeyVals += inc / 8</Exec>
<!-- Fast forward -->
<Break Condition="(void*)itKeyVals == itEndKeyVals" />
<Item Name="[{itKeyVals-&gt;mData.first}]">itKeyVals-&gt;mData.second</Item>
<!-- <Item>itKeyVals-&gt;mData</Item> -->
<Exec>itInfo++ </Exec>
<Exec>itKeyVals++</Exec>
</Loop>
</CustomListItems>
</Expand>
</Synthetic>
<Item Name="[load_factor]" >float(mNumElements) / float(mMask + 1)</Item>
<Item Name="[max_load_factor]">$T2</Item>
<Item Name="[IsFlat]" >$T1</Item>
<Item Name="[hash_function]" >*(WrapHash&lt;$T5&gt;*)this,nd</Item>
<Item Name="[key_eq]" >*(WrapKeyEqual&lt;$T6&gt;*)this,nd</Item>
</Expand>
</Type>
</AutoVisualizer>

View File

@ -2723,13 +2723,42 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
{
TextDisabledUnformatted( buf );
}
if( ImGui::IsItemClicked( 0 ) )
if( ImGui::IsItemHovered() )
{
m_asmCountBase = asmIdx;
}
else if( ImGui::IsItemClicked( 1 ) )
{
m_asmCountBase = -1;
if( m_font ) ImGui::PopFont();
ImGui::BeginTooltip();
if( m_asmCountBase >= 0 )
{
TextDisabledUnformatted( "Absolute address:" );
ImGui::SameLine();
ImGui::Text( "%" PRIx64, line.addr );
TextDisabledUnformatted( "Relative address:" );
ImGui::SameLine();
ImGui::Text( "+%" PRIx64, line.addr - m_baseAddr );
}
else if( m_asmRelative )
{
TextDisabledUnformatted( "Absolute address:" );
ImGui::SameLine();
ImGui::Text( "%" PRIx64, line.addr );
}
else
{
TextDisabledUnformatted( "Relative address:" );
ImGui::SameLine();
ImGui::Text( "+%" PRIx64, line.addr - m_baseAddr );
}
ImGui::EndTooltip();
if( m_font ) ImGui::PushFont( m_font );
if( ImGui::IsItemClicked( 0 ) )
{
m_asmCountBase = asmIdx;
}
else if( ImGui::IsItemClicked( 1 ) )
{
m_asmCountBase = -1;
}
}
const auto stw = ImGui::CalcTextSize( " " ).x;
@ -3025,6 +3054,17 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
ImGui::TextUnformatted( buf );
}
uint32_t jumpOffset;
uint64_t jumpBase;
const char* jumpName = nullptr;
if( line.jumpAddr != 0 )
{
jumpOffset = 0;
jumpBase = worker.GetSymbolForAddress( line.jumpAddr, jumpOffset );
auto jumpSym = jumpBase == 0 ? worker.GetSymbolData( line.jumpAddr ) : worker.GetSymbolData( jumpBase );
if( jumpSym ) jumpName = worker.GetString( jumpSym->name );
}
if( ImGui::IsItemHovered() )
{
if( asmVar )
@ -3032,16 +3072,31 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
const auto& var = *asmVar;
if( m_font ) ImGui::PopFont();
ImGui::BeginTooltip();
if( opdesc != 0 )
if( jumpName || opdesc != 0 )
{
ImGui::TextUnformatted( OpDescList[opdesc] );
if( opdesc != 0 ) ImGui::TextUnformatted( OpDescList[opdesc] );
if( jumpName )
{
if( jumpBase == m_baseAddr )
{
TextDisabledUnformatted( "Local target:" );
}
else
{
TextDisabledUnformatted( "External target:" );
}
ImGui::SameLine();
ImGui::Text( "%s+%" PRIu32, jumpName, jumpOffset );
}
ImGui::Separator();
}
TextFocused( "Throughput:", RealToString( var.tp ) );
ImGui::SameLine();
TextDisabledUnformatted( "(cycles per instruction, lower is better)" );
if( var.maxlat >= 0 )
{
bool exact = false;
TextDisabledUnformatted( "Latency:" );
ImGui::SameLine();
if( var.minlat == var.maxlat && var.minbound == var.maxbound )
@ -3053,6 +3108,7 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
else
{
ImGui::TextUnformatted( RealToString( var.minlat ) );
exact = true;
}
}
else
@ -3076,7 +3132,14 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
}
}
ImGui::SameLine();
TextDisabledUnformatted( "(cycles in execution, may vary by used output)" );
if( exact )
{
TextDisabledUnformatted( "(cycles in execution)" );
}
else
{
TextDisabledUnformatted( "(cycles in execution, may vary by used output)" );
}
}
TextFocused( "\xce\xbcops:", RealToString( var.uops ) );
if( var.port != -1 ) TextFocused( "Ports:", PortList[var.port] );
@ -3134,6 +3197,23 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
ImGui::EndTooltip();
if( m_font ) ImGui::PushFont( m_font );
}
else if( jumpName )
{
if( m_font ) ImGui::PopFont();
ImGui::BeginTooltip();
if( jumpBase == m_baseAddr )
{
TextDisabledUnformatted( "Local target:" );
}
else
{
TextDisabledUnformatted( "External target:" );
}
ImGui::SameLine();
ImGui::Text( "%s+%" PRIu32, jumpName, jumpOffset );
ImGui::EndTooltip();
if( m_font ) ImGui::PushFont( m_font );
}
if( m_cpuArch == CpuArchX86 || m_cpuArch == CpuArchX64 )
{
if( line.readX86[0] != RegsX86::invalid || line.writeX86[0] != RegsX86::invalid )
@ -3265,35 +3345,29 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
TextColoredUnformatted( ImVec4( 0.5f, 0.5, 1, 1 ), "}" );
}
if( line.jumpAddr != 0 )
if( jumpName )
{
uint32_t offset = 0;
const auto base = worker.GetSymbolForAddress( line.jumpAddr, offset );
auto sym = base == 0 ? worker.GetSymbolData( line.jumpAddr ) : worker.GetSymbolData( base );
if( sym )
ImGui::SameLine();
ImGui::Spacing();
ImGui::SameLine();
if( jumpBase == m_baseAddr )
{
ImGui::SameLine();
ImGui::Spacing();
ImGui::SameLine();
if( base == m_baseAddr )
ImGui::TextDisabled( "-> [%s+%" PRIu32"]", jumpName, jumpOffset );
if( ImGui::IsItemHovered() )
{
ImGui::TextDisabled( "-> [%s+%" PRIu32"]", worker.GetString( sym->name ), offset );
if( ImGui::IsItemHovered() )
m_highlightAddr = line.jumpAddr;
if( ImGui::IsItemClicked() )
{
m_highlightAddr = line.jumpAddr;
if( ImGui::IsItemClicked() )
{
m_targetAddr = line.jumpAddr;
m_selectedAddresses.clear();
m_selectedAddresses.emplace( line.jumpAddr );
}
m_targetAddr = line.jumpAddr;
m_selectedAddresses.clear();
m_selectedAddresses.emplace( line.jumpAddr );
}
}
else
{
ImGui::TextDisabled( "[%s+%" PRIu32"]", worker.GetString( sym->name ), offset );
if( ImGui::IsItemClicked() ) jumpOut = line.jumpAddr;
}
}
else
{
ImGui::TextDisabled( "[%s+%" PRIu32"]", jumpName, jumpOffset );
if( ImGui::IsItemClicked() ) jumpOut = line.jumpAddr;
}
}