1
0
mirror of https://github.com/wolfpld/tracy synced 2025-04-29 04:23:51 +00:00

Add transient OpenGL zones.

This commit is contained in:
Bartosz Taudul 2021-01-15 20:13:09 +01:00
parent 69372c2dcb
commit 5a8d30ddc3
4 changed files with 102 additions and 5 deletions

View File

@ -12,12 +12,14 @@
#define TracyGpuNamedZoneC(x,y,z,w) #define TracyGpuNamedZoneC(x,y,z,w)
#define TracyGpuZone(x) #define TracyGpuZone(x)
#define TracyGpuZoneC(x,y) #define TracyGpuZoneC(x,y)
#define TracyGpuZoneTransient(x,y,z)
#define TracyGpuCollect #define TracyGpuCollect
#define TracyGpuNamedZoneS(x,y,z,w) #define TracyGpuNamedZoneS(x,y,z,w)
#define TracyGpuNamedZoneCS(x,y,z,w,a) #define TracyGpuNamedZoneCS(x,y,z,w,a)
#define TracyGpuZoneS(x,y) #define TracyGpuZoneS(x,y)
#define TracyGpuZoneCS(x,y,z) #define TracyGpuZoneCS(x,y,z)
#define TracyGpuZoneTransientS(x,y,z,w)
namespace tracy namespace tracy
{ {
@ -56,11 +58,13 @@ public:
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true ) # define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
#else #else
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); # define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true ) # define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
#endif #endif
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect(); #define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
@ -69,11 +73,13 @@ public:
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); # define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true ) # define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true ) # define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
#else #else
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active ) # define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active ) # define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name ) # define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color ) # define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
#endif #endif
namespace tracy namespace tracy
@ -196,10 +202,10 @@ public:
TracyLfqPrepare( QueueType::GpuZoneBegin ); TracyLfqPrepare( QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit; TracyLfqCommit;
} }
@ -220,10 +226,57 @@ public:
const auto thread = GetThreadHandle(); const auto thread = GetThreadHandle();
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack ); TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, thread ); MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
GetProfiler().SendCallstack( depth );
const auto thread = GetThreadHandle();
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit; TracyLfqCommit;
} }

View File

@ -1771,6 +1771,13 @@ static void FreeAssociatedMemory( const QueueItem& item )
ptr = MemRead<uint64_t>( &item.zoneBegin.srcloc ); ptr = MemRead<uint64_t>( &item.zoneBegin.srcloc );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::GpuZoneBeginAllocSrcLoc:
case QueueType::GpuZoneBeginAllocSrcLocCallstack:
case QueueType::GpuZoneBeginAllocSrcLocSerial:
case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial:
ptr = MemRead<uint64_t>( &item.gpuZoneBegin.srcloc );
tracy_free( (void*)ptr );
break;
case QueueType::CallstackMemory: case QueueType::CallstackMemory:
case QueueType::Callstack: case QueueType::Callstack:
ptr = MemRead<uint64_t>( &item.callstackFat.ptr ); ptr = MemRead<uint64_t>( &item.callstackFat.ptr );
@ -1976,6 +1983,18 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
MemWrite( &item->gpuZoneBegin.cpuTime, dt ); MemWrite( &item->gpuZoneBegin.cpuTime, dt );
break; break;
} }
case QueueType::GpuZoneBeginAllocSrcLoc:
case QueueType::GpuZoneBeginAllocSrcLocCallstack:
{
int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
int64_t dt = t - refThread;
refThread = t;
MemWrite( &item->gpuZoneBegin.cpuTime, dt );
ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc );
SendSourceLocationPayload( ptr );
tracy_free( (void*)ptr );
break;
}
case QueueType::GpuZoneEnd: case QueueType::GpuZoneEnd:
{ {
int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime ); int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
@ -2206,6 +2225,18 @@ Profiler::DequeueStatus Profiler::DequeueSerial()
MemWrite( &item->gpuZoneBegin.cpuTime, dt ); MemWrite( &item->gpuZoneBegin.cpuTime, dt );
break; break;
} }
case QueueType::GpuZoneBeginAllocSrcLocSerial:
case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial:
{
int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
int64_t dt = t - refSerial;
refSerial = t;
MemWrite( &item->gpuZoneBegin.cpuTime, dt );
ptr = MemRead<uint64_t>( &item->gpuZoneBegin.srcloc );
SendSourceLocationPayload( ptr );
tracy_free( (void*)ptr );
break;
}
case QueueType::GpuZoneEndSerial: case QueueType::GpuZoneEndSerial:
{ {
int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime ); int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );

View File

@ -9,7 +9,7 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 43 }; enum : uint32_t { ProtocolVersion = 44 };
enum : uint16_t { BroadcastVersion = 2 }; enum : uint16_t { BroadcastVersion = 2 };
using lz4sz_t = uint32_t; using lz4sz_t = uint32_t;

View File

@ -42,9 +42,13 @@ enum class QueueType : uint8_t
MemFreeCallstackNamed, MemFreeCallstackNamed,
GpuZoneBegin, GpuZoneBegin,
GpuZoneBeginCallstack, GpuZoneBeginCallstack,
GpuZoneBeginAllocSrcLoc,
GpuZoneBeginAllocSrcLocCallstack,
GpuZoneEnd, GpuZoneEnd,
GpuZoneBeginSerial, GpuZoneBeginSerial,
GpuZoneBeginCallstackSerial, GpuZoneBeginCallstackSerial,
GpuZoneBeginAllocSrcLocSerial,
GpuZoneBeginAllocSrcLocCallstackSerial,
GpuZoneEndSerial, GpuZoneEndSerial,
PlotData, PlotData,
ContextSwitch, ContextSwitch,
@ -317,15 +321,19 @@ struct QueueGpuNewContext
GpuContextType type; GpuContextType type;
}; };
struct QueueGpuZoneBegin struct QueueGpuZoneBeginLean
{ {
int64_t cpuTime; int64_t cpuTime;
uint64_t srcloc;
uint64_t thread; uint64_t thread;
uint16_t queryId; uint16_t queryId;
uint8_t context; uint8_t context;
}; };
struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean
{
uint64_t srcloc;
};
struct QueueGpuZoneEnd struct QueueGpuZoneEnd
{ {
int64_t cpuTime; int64_t cpuTime;
@ -522,6 +530,7 @@ struct QueueItem
QueueMessageColorFat messageColorFat; QueueMessageColorFat messageColorFat;
QueueGpuNewContext gpuNewContext; QueueGpuNewContext gpuNewContext;
QueueGpuZoneBegin gpuZoneBegin; QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneBeginLean gpuZoneBeginLean;
QueueGpuZoneEnd gpuZoneEnd; QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime; QueueGpuTime gpuTime;
QueueGpuCalibration gpuCalibration; QueueGpuCalibration gpuCalibration;
@ -586,9 +595,13 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial
sizeof( QueueHeader ) + sizeof( QueuePlotData ), sizeof( QueueHeader ) + sizeof( QueuePlotData ),
sizeof( QueueHeader ) + sizeof( QueueContextSwitch ), sizeof( QueueHeader ) + sizeof( QueueContextSwitch ),