1
0
mirror of https://github.com/wolfpld/tracy synced 2025-04-29 04:23:51 +00:00

Make ring buffer size adjustable.

If call stack capture is enabled for context switch data, the 64KB buffer is
too small to work without overruns. However, if the default buffer size is
increased, then the maximum locked memory limit is hit.

This change keeps the small buffer size for all the buffers that may be used
without escalated privileges. The context switch buffer is bigger, but it does
not need to obey the limits, as the application is running as root, if it is
to be used.
This commit is contained in:
Bartosz Taudul 2021-12-21 15:48:40 +01:00
parent db64a5fa7e
commit 5741bcfd32
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
2 changed files with 28 additions and 26 deletions

View File

@ -3,19 +3,19 @@
namespace tracy
{
template<size_t Size>
class RingBuffer
{
public:
RingBuffer( int fd, int id, int cpu = -1 )
: m_id( id )
RingBuffer( int size, int fd, int id, int cpu = -1 )
: m_size( size )
, m_id( id )
, m_cpu( cpu )
, m_fd( fd )
{
const auto pageSize = uint32_t( getpagesize() );
assert( Size >= pageSize );
assert( __builtin_popcount( Size ) == 1 );
m_mapSize = Size + pageSize;
assert( size >= pageSize );
assert( __builtin_popcount( size ) == 1 );
m_mapSize = size + pageSize;
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
if( mapAddr == MAP_FAILED )
{
@ -66,16 +66,18 @@ public:
void Read( void* dst, uint64_t offset, uint64_t cnt )
{
auto src = ( m_tail + offset ) % Size;
if( src + cnt <= Size )
const auto size = m_size;
auto src = ( m_tail + offset ) % size;
if( src + cnt <= size )
{
memcpy( dst, m_buffer + src, cnt );
}
else
{
const auto s0 = Size - src;
memcpy( dst, m_buffer + src, s0 );
memcpy( (char*)dst + s0, m_buffer, cnt - s0 );
const auto s0 = size - src;
const auto buf = m_buffer;
memcpy( dst, buf + src, s0 );
memcpy( (char*)dst + s0, buf, cnt - s0 );
}
}
@ -115,6 +117,7 @@ private:
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, m_tail, std::memory_order_release );
}
int m_size;
uint64_t m_tail;
char* m_buffer;
int m_id;

View File

@ -649,8 +649,7 @@ static int s_numCpus = 0;
static int s_numBuffers = 0;
static int s_ctxBufferIdx = 0;
static constexpr size_t RingBufSize = 256*1024;
static RingBuffer<RingBufSize>* s_ring = nullptr;
static RingBuffer* s_ring = nullptr;
static const int ThreadHashSize = 4 * 1024;
static uint32_t s_threadHash[ThreadHashSize] = {};
@ -890,7 +889,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
2 // context switches + wakeups
);
s_numCpus = (int)std::thread::hardware_concurrency();
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * maxNumBuffers );
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
s_numBuffers = 0;
// software sampling
@ -923,13 +922,13 @@ bool SysTraceStart( int64_t& samplingPeriod )
fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd == -1 )
{
for( int j=0; j<s_numBuffers; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
for( int j=0; j<s_numBuffers; j++ ) s_ring[j].~RingBuffer();
tracy_free( s_ring );
return false;
}
TracyDebug( " No access to kernel samples\n" );
}
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCallstack );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -960,7 +959,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCpuCycles );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCpuCycles );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -972,7 +971,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventInstructionsRetired );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventInstructionsRetired );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -994,7 +993,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheReference );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheReference );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -1006,7 +1005,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheMiss );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheMiss );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -1023,7 +1022,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchRetired );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchRetired );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -1035,7 +1034,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchMiss );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchMiss );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -1069,7 +1068,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventContextSwitch, i );
new( s_ring+s_numBuffers ) RingBuffer( 256*1024, fd, EventContextSwitch, i );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -1086,7 +1085,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventWakeup, i );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
s_numBuffers++;
TracyDebug( " Core %i ok\n", i );
}
@ -1105,7 +1104,7 @@ void SysTraceStop()
traceActive.store( false, std::memory_order_relaxed );
}
static uint64_t* GetCallstackBlock( uint64_t cnt, RingBuffer<RingBufSize>& ring, uint64_t offset )
static uint64_t* GetCallstackBlock( uint64_t cnt, RingBuffer& ring, uint64_t offset )
{
auto trace = (uint64_t*)tracy_malloc_fast( ( 1 + cnt ) * sizeof( uint64_t ) );
ring.Read( trace+1, offset, sizeof( uint64_t ) * cnt );
@ -1459,7 +1458,7 @@ void SysTraceWorker( void* ptr )
}
}
for( int i=0; i<s_numBuffers; i++ ) s_ring[i].~RingBuffer<RingBufSize>();
for( int i=0; i<s_numBuffers; i++ ) s_ring[i].~RingBuffer();
tracy_free_fast( s_ring );
}