mirror of
https://github.com/wolfpld/tracy
synced 2025-04-29 04:23:51 +00:00
Make ring buffer size adjustable.
If call stack capture is enabled for context switch data, the 64KB buffer is too small to work without overruns. However, if the default buffer size is increased, then the maximum locked memory limit is hit. This change keeps the small buffer size for all the buffers that may be used without escalated privileges. The context switch buffer is bigger, but it does not need to obey the limits, as the application is running as root, if it is to be used.
This commit is contained in:
parent
db64a5fa7e
commit
5741bcfd32
@ -3,19 +3,19 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
template<size_t Size>
|
||||
class RingBuffer
|
||||
{
|
||||
public:
|
||||
RingBuffer( int fd, int id, int cpu = -1 )
|
||||
: m_id( id )
|
||||
RingBuffer( int size, int fd, int id, int cpu = -1 )
|
||||
: m_size( size )
|
||||
, m_id( id )
|
||||
, m_cpu( cpu )
|
||||
, m_fd( fd )
|
||||
{
|
||||
const auto pageSize = uint32_t( getpagesize() );
|
||||
assert( Size >= pageSize );
|
||||
assert( __builtin_popcount( Size ) == 1 );
|
||||
m_mapSize = Size + pageSize;
|
||||
assert( size >= pageSize );
|
||||
assert( __builtin_popcount( size ) == 1 );
|
||||
m_mapSize = size + pageSize;
|
||||
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
|
||||
if( mapAddr == MAP_FAILED )
|
||||
{
|
||||
@ -66,16 +66,18 @@ public:
|
||||
|
||||
void Read( void* dst, uint64_t offset, uint64_t cnt )
|
||||
{
|
||||
auto src = ( m_tail + offset ) % Size;
|
||||
if( src + cnt <= Size )
|
||||
const auto size = m_size;
|
||||
auto src = ( m_tail + offset ) % size;
|
||||
if( src + cnt <= size )
|
||||
{
|
||||
memcpy( dst, m_buffer + src, cnt );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto s0 = Size - src;
|
||||
memcpy( dst, m_buffer + src, s0 );
|
||||
memcpy( (char*)dst + s0, m_buffer, cnt - s0 );
|
||||
const auto s0 = size - src;
|
||||
const auto buf = m_buffer;
|
||||
memcpy( dst, buf + src, s0 );
|
||||
memcpy( (char*)dst + s0, buf, cnt - s0 );
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,6 +117,7 @@ private:
|
||||
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, m_tail, std::memory_order_release );
|
||||
}
|
||||
|
||||
int m_size;
|
||||
uint64_t m_tail;
|
||||
char* m_buffer;
|
||||
int m_id;
|
||||
|
@ -649,8 +649,7 @@ static int s_numCpus = 0;
|
||||
static int s_numBuffers = 0;
|
||||
static int s_ctxBufferIdx = 0;
|
||||
|
||||
static constexpr size_t RingBufSize = 256*1024;
|
||||
static RingBuffer<RingBufSize>* s_ring = nullptr;
|
||||
static RingBuffer* s_ring = nullptr;
|
||||
|
||||
static const int ThreadHashSize = 4 * 1024;
|
||||
static uint32_t s_threadHash[ThreadHashSize] = {};
|
||||
@ -890,7 +889,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
2 // context switches + wakeups
|
||||
);
|
||||
s_numCpus = (int)std::thread::hardware_concurrency();
|
||||
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * maxNumBuffers );
|
||||
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
|
||||
s_numBuffers = 0;
|
||||
|
||||
// software sampling
|
||||
@ -923,13 +922,13 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd == -1 )
|
||||
{
|
||||
for( int j=0; j<s_numBuffers; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
|
||||
for( int j=0; j<s_numBuffers; j++ ) s_ring[j].~RingBuffer();
|
||||
tracy_free( s_ring );
|
||||
return false;
|
||||
}
|
||||
TracyDebug( " No access to kernel samples\n" );
|
||||
}
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCallstack );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -960,7 +959,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCpuCycles );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCpuCycles );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -972,7 +971,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventInstructionsRetired );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventInstructionsRetired );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -994,7 +993,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheReference );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheReference );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -1006,7 +1005,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventCacheMiss );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheMiss );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -1023,7 +1022,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchRetired );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchRetired );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -1035,7 +1034,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventBranchMiss );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchMiss );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -1069,7 +1068,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventContextSwitch, i );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 256*1024, fd, EventContextSwitch, i );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -1086,7 +1085,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer<RingBufSize>( fd, EventWakeup, i );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
@ -1105,7 +1104,7 @@ void SysTraceStop()
|
||||
traceActive.store( false, std::memory_order_relaxed );
|
||||
}
|
||||
|
||||
static uint64_t* GetCallstackBlock( uint64_t cnt, RingBuffer<RingBufSize>& ring, uint64_t offset )
|
||||
static uint64_t* GetCallstackBlock( uint64_t cnt, RingBuffer& ring, uint64_t offset )
|
||||
{
|
||||
auto trace = (uint64_t*)tracy_malloc_fast( ( 1 + cnt ) * sizeof( uint64_t ) );
|
||||
ring.Read( trace+1, offset, sizeof( uint64_t ) * cnt );
|
||||
@ -1459,7 +1458,7 @@ void SysTraceWorker( void* ptr )
|
||||
}
|
||||
}
|
||||
|
||||
for( int i=0; i<s_numBuffers; i++ ) s_ring[i].~RingBuffer<RingBufSize>();
|
||||
for( int i=0; i<s_numBuffers; i++ ) s_ring[i].~RingBuffer();
|
||||
tracy_free_fast( s_ring );
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user