diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index cfa057a3..9ecc000f 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -12,12 +12,14 @@ #define TracyGpuNamedZoneC(x,y,z,w) #define TracyGpuZone(x) #define TracyGpuZoneC(x,y) +#define TracyGpuZoneTransient(x,y,z) #define TracyGpuCollect #define TracyGpuNamedZoneS(x,y,z,w) #define TracyGpuNamedZoneCS(x,y,z,w,a) #define TracyGpuZoneS(x,y) #define TracyGpuZoneCS(x,y,z) +#define TracyGpuZoneTransientS(x,y,z,w) namespace tracy { @@ -56,11 +58,13 @@ public: # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); # define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true ) +# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active ); #else # define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); # define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true ) +# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active ); #endif #define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect(); @@ -69,11 +73,13 @@ public: # define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); # define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true ) # define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true ) +# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active ); #else # define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active ) # define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active ) # define TracyGpuZoneS( name, depth ) TracyGpuZone( name ) # define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color ) +# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active ) #endif namespace tracy @@ -196,10 +202,10 @@ public: TracyLfqPrepare( QueueType::GpuZoneBegin ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); TracyLfqCommit; } @@ -220,10 +226,57 @@ public: const auto thread = GetThreadHandle(); TracyLfqPrepare( QueueType::GpuZoneBeginCallstack ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); - MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.thread, thread ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + TracyLfqCommit; + } + + tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + + const auto queryId = GetGpuCtx().ptr->NextQueryId(); + glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + + TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc ); + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + TracyLfqCommit; + } + + tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + + const auto queryId = GetGpuCtx().ptr->NextQueryId(); + glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + + GetProfiler().SendCallstack( depth ); + + const auto thread = GetThreadHandle(); + TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack ); + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.thread, thread ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); TracyLfqCommit; } diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 01f0c8f6..7f56b851 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -1771,6 +1771,13 @@ static void FreeAssociatedMemory( const QueueItem& item ) ptr = MemRead( &item.zoneBegin.srcloc ); tracy_free( (void*)ptr ); break; + case QueueType::GpuZoneBeginAllocSrcLoc: + case QueueType::GpuZoneBeginAllocSrcLocCallstack: + case QueueType::GpuZoneBeginAllocSrcLocSerial: + case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: + ptr = MemRead( &item.gpuZoneBegin.srcloc ); + tracy_free( (void*)ptr ); + break; case QueueType::CallstackMemory: case QueueType::Callstack: ptr = MemRead( &item.callstackFat.ptr ); @@ -1976,6 +1983,18 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) MemWrite( &item->gpuZoneBegin.cpuTime, dt ); break; } + case QueueType::GpuZoneBeginAllocSrcLoc: + case QueueType::GpuZoneBeginAllocSrcLocCallstack: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - refThread; + refThread = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + ptr = MemRead( &item->gpuZoneBegin.srcloc ); + SendSourceLocationPayload( ptr ); + tracy_free( (void*)ptr ); + break; + } case QueueType::GpuZoneEnd: { int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); @@ -2206,6 +2225,18 @@ Profiler::DequeueStatus Profiler::DequeueSerial() MemWrite( &item->gpuZoneBegin.cpuTime, dt ); break; } + case QueueType::GpuZoneBeginAllocSrcLocSerial: + case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + ptr = MemRead( &item->gpuZoneBegin.srcloc ); + SendSourceLocationPayload( ptr ); + tracy_free( (void*)ptr ); + break; + } case QueueType::GpuZoneEndSerial: { int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index 9deb3539..f267edca 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 43 }; +enum : uint32_t { ProtocolVersion = 44 }; enum : uint16_t { BroadcastVersion = 2 }; using lz4sz_t = uint32_t; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 0d4f6352..d30b6a22 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -42,9 +42,13 @@ enum class QueueType : uint8_t MemFreeCallstackNamed, GpuZoneBegin, GpuZoneBeginCallstack, + GpuZoneBeginAllocSrcLoc, + GpuZoneBeginAllocSrcLocCallstack, GpuZoneEnd, GpuZoneBeginSerial, GpuZoneBeginCallstackSerial, + GpuZoneBeginAllocSrcLocSerial, + GpuZoneBeginAllocSrcLocCallstackSerial, GpuZoneEndSerial, PlotData, ContextSwitch, @@ -317,15 +321,19 @@ struct QueueGpuNewContext GpuContextType type; }; -struct QueueGpuZoneBegin +struct QueueGpuZoneBeginLean { int64_t cpuTime; - uint64_t srcloc; uint64_t thread; uint16_t queryId; uint8_t context; }; +struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean +{ + uint64_t srcloc; +}; + struct QueueGpuZoneEnd { int64_t cpuTime; @@ -522,6 +530,7 @@ struct QueueItem QueueMessageColorFat messageColorFat; QueueGpuNewContext gpuNewContext; QueueGpuZoneBegin gpuZoneBegin; + QueueGpuZoneBeginLean gpuZoneBeginLean; QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; QueueGpuCalibration gpuCalibration; @@ -586,9 +595,13 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial sizeof( QueueHeader ) + sizeof( QueuePlotData ), sizeof( QueueHeader ) + sizeof( QueueContextSwitch ),