From ba61a9ed848351d4f27a944ed0a609fe870ba679 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Thu, 24 Oct 2019 00:04:31 +0200 Subject: [PATCH] Transfer time deltas, not absolute times. This change significantly reduces network bandwidth requirements. Implemented for: - CPU zones, - GPU zones, - locks, - plots, - memory events. --- TracyVulkan.hpp | 6 +-- client/TracyProfiler.cpp | 112 +++++++++++++++++++++++++++++++++++++++ client/TracyProfiler.hpp | 2 + common/TracyProtocol.hpp | 2 +- common/TracyQueue.hpp | 74 ++++++++++++++------------ server/TracyWorker.cpp | 90 +++++++++++++++++++++++-------- server/TracyWorker.hpp | 10 ++-- 7 files changed, 231 insertions(+), 65 deletions(-) diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index be87eb38..ec4b1eae 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -210,7 +210,7 @@ public: vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId ); auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneBegin ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); @@ -234,7 +234,7 @@ public: vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId ); auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); @@ -254,7 +254,7 @@ public: vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_ctx->m_query, queryId ); auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuZoneEnd ); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 9669333d..fd15686c 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -1314,6 +1314,7 @@ void Profiler::Worker() m_sock->Send( &welcome, sizeof( welcome ) ); m_threadCtx = 0; + m_refTimeSerial = 0; #ifdef TRACY_ON_DEMAND OnDemandPayloadMessage onDemand; @@ -1674,6 +1675,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) MemWrite( &item.threadCtx.thread, threadId ); if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return DequeueStatus::ConnectionLost; m_threadCtx = threadId; + m_refTimeThread = 0; } auto end = m_itemBuf + sz; @@ -1707,10 +1709,16 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) break; case QueueType::ZoneBeginAllocSrcLoc: case QueueType::ZoneBeginAllocSrcLocCallstack: + { + int64_t t = MemRead( &item->zoneBegin.time ); + int64_t dt = t - m_refTimeThread; + m_refTimeThread = t; + MemWrite( &item->zoneBegin.time, dt ); ptr = MemRead( &item->zoneBegin.srcloc ); SendSourceLocationPayload( ptr ); tracy_free( (void*)ptr ); break; + } case QueueType::Callstack: ptr = MemRead( &item->callstack.ptr ); SendCallstackPayload( ptr ); @@ -1735,6 +1743,48 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) tracy_free( (void*)ptr ); break; } + case QueueType::ZoneBegin: + case QueueType::ZoneBeginCallstack: + { + int64_t t = MemRead( &item->zoneBegin.time ); + int64_t dt = t - m_refTimeThread; + m_refTimeThread = t; + MemWrite( &item->zoneBegin.time, dt ); + break; + } + case QueueType::ZoneEnd: + { + int64_t t = MemRead( &item->zoneEnd.time ); + int64_t dt = t - m_refTimeThread; + m_refTimeThread = t; + MemWrite( &item->zoneEnd.time, dt ); + break; + } + case QueueType::GpuZoneBegin: + case QueueType::GpuZoneBeginCallstack: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - m_refTimeThread; + m_refTimeThread = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + break; + } + case QueueType::GpuZoneEnd: + { + int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); + int64_t dt = t - m_refTimeThread; + m_refTimeThread = t; + MemWrite( &item->gpuZoneEnd.cpuTime, dt ); + break; + } + case QueueType::PlotData: + { + int64_t t = MemRead( &item->plotData.time ); + int64_t dt = t - m_refTimeThread; + m_refTimeThread = t; + MemWrite( &item->plotData.time, dt ); + break; + } default: assert( false ); break; @@ -1819,6 +1869,68 @@ Profiler::DequeueStatus Profiler::DequeueSerial() SendCallstackPayload( ptr ); tracy_free( (void*)ptr ); break; + case QueueType::LockWait: + case QueueType::LockSharedWait: + { + int64_t t = MemRead( &item->lockWait.time ); + int64_t dt = t - m_refTimeSerial; + m_refTimeSerial = t; + MemWrite( &item->lockWait.time, dt ); + break; + } + case QueueType::LockObtain: + case QueueType::LockSharedObtain: + { + int64_t t = MemRead( &item->lockObtain.time ); + int64_t dt = t - m_refTimeSerial; + m_refTimeSerial = t; + MemWrite( &item->lockObtain.time, dt ); + break; + } + case QueueType::LockRelease: + case QueueType::LockSharedRelease: + { + int64_t t = MemRead( &item->lockRelease.time ); + int64_t dt = t - m_refTimeSerial; + m_refTimeSerial = t; + MemWrite( &item->lockRelease.time, dt ); + break; + } + case QueueType::MemAlloc: + case QueueType::MemAllocCallstack: + { + int64_t t = MemRead( &item->memAlloc.time ); + int64_t dt = t - m_refTimeSerial; + m_refTimeSerial = t; + MemWrite( &item->memAlloc.time, dt ); + break; + } + case QueueType::MemFree: + case QueueType::MemFreeCallstack: + { + int64_t t = MemRead( &item->memFree.time ); + int64_t dt = t - m_refTimeSerial; + m_refTimeSerial = t; + MemWrite( &item->memFree.time, dt ); + break; + } + case QueueType::GpuZoneBeginSerial: + case QueueType::GpuZoneBeginCallstackSerial: + { + int64_t t = MemRead( &item->gpuZoneBegin.cpuTime ); + int64_t dt = t - m_refTimeSerial; + m_refTimeSerial = t; + MemWrite( &item->gpuZoneBegin.cpuTime, dt ); + break; + } + case QueueType::GpuZoneEndSerial: + { + int64_t t = MemRead( &item->gpuZoneEnd.cpuTime ); + int64_t dt = t - m_refTimeSerial; + m_refTimeSerial = t; + MemWrite( &item->gpuZoneEnd.cpuTime, dt ); + break; + } default: assert( false ); break; diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index 4bcc59af..75ed8483 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -543,6 +543,8 @@ private: std::atomic m_zoneId; uint64_t m_threadCtx; + int64_t m_refTimeThread; + int64_t m_refTimeSerial; void* m_stream; // LZ4_stream_t* char* m_buffer; diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index 461cce39..967ac78c 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy { -enum : uint32_t { ProtocolVersion = 18 }; +enum : uint32_t { ProtocolVersion = 19 }; enum : uint32_t { BroadcastVersion = 0 }; using lz4sz_t = uint32_t; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 0622c632..b59051d1 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -19,14 +19,31 @@ enum class QueueType : uint8_t Callstack, CallstackAlloc, FrameImage, + ZoneBegin, + ZoneBeginCallstack, + ZoneEnd, + LockWait, + LockObtain, + LockRelease, + LockSharedWait, + LockSharedObtain, + LockSharedRelease, + MemAlloc, + MemFree, + MemAllocCallstack, + MemFreeCallstack, + GpuZoneBegin, + GpuZoneBeginCallstack, + GpuZoneEnd, + GpuZoneBeginSerial, + GpuZoneBeginCallstackSerial, + GpuZoneEndSerial, + PlotData, Terminate, KeepAlive, ThreadContext, Crash, CrashReport, - ZoneBegin, - ZoneBeginCallstack, - ZoneEnd, ZoneValidation, FrameMarkMsg, FrameMarkMsgStart, @@ -34,25 +51,11 @@ enum class QueueType : uint8_t SourceLocation, LockAnnounce, LockTerminate, - LockWait, - LockObtain, - LockRelease, - LockSharedWait, - LockSharedObtain, - LockSharedRelease, LockMark, - PlotData, MessageLiteral, MessageLiteralColor, GpuNewContext, - GpuZoneBegin, - GpuZoneBeginCallstack, - GpuZoneEnd, GpuTime, - MemAlloc, - MemFree, - MemAllocCallstack, - MemFreeCallstack, CallstackFrameSize, CallstackFrame, SysTimeReport, @@ -394,15 +397,32 @@ static const size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueCallstack ), sizeof( QueueHeader ) + sizeof( QueueCallstackAlloc ), sizeof( QueueHeader ) + sizeof( QueueFrameImage ), + sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), + sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack + sizeof( QueueHeader ) + sizeof( QueueZoneEnd ), + sizeof( QueueHeader ) + sizeof( QueueLockWait ), + sizeof( QueueHeader ) + sizeof( QueueLockObtain ), + sizeof( QueueHeader ) + sizeof( QueueLockRelease ), + sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared + sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared + sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), + sizeof( QueueHeader ) + sizeof( QueueMemFree ), + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial + sizeof( QueueHeader ) + sizeof( QueuePlotData ), // above items must be first sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), - sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), - sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack - sizeof( QueueHeader ) + sizeof( QueueZoneEnd ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start @@ -410,25 +430,11 @@ static const size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueSourceLocation ), sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ), sizeof( QueueHeader ) + sizeof( QueueLockTerminate ), - sizeof( QueueHeader ) + sizeof( QueueLockWait ), - sizeof( QueueHeader ) + sizeof( QueueLockObtain ), - sizeof( QueueHeader ) + sizeof( QueueLockRelease ), - sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared - sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared - sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared sizeof( QueueHeader ) + sizeof( QueueLockMark ), - sizeof( QueueHeader ) + sizeof( QueuePlotData ), sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), - sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack - sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), sizeof( QueueHeader ) + sizeof( QueueGpuTime ), - sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), - sizeof( QueueHeader ) + sizeof( QueueMemFree ), - sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack - sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ), sizeof( QueueHeader ) + sizeof( QueueSysTime ), diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index c854d691..5830abe5 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -3268,13 +3268,22 @@ bool Worker::Process( const QueueItem& ev ) ProcessGpuNewContext( ev.gpuNewContext ); break; case QueueType::GpuZoneBegin: - ProcessGpuZoneBegin( ev.gpuZoneBegin ); + ProcessGpuZoneBegin( ev.gpuZoneBegin, false ); break; case QueueType::GpuZoneBeginCallstack: - ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin ); + ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin, false ); break; case QueueType::GpuZoneEnd: - ProcessGpuZoneEnd( ev.gpuZoneEnd ); + ProcessGpuZoneEnd( ev.gpuZoneEnd, false ); + break; + case QueueType::GpuZoneBeginSerial: + ProcessGpuZoneBegin( ev.gpuZoneBegin, true ); + break; + case QueueType::GpuZoneBeginCallstackSerial: + ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin, true ); + break; + case QueueType::GpuZoneEndSerial: + ProcessGpuZoneEnd( ev.gpuZoneEnd, true ); break; case QueueType::GpuTime: ProcessGpuTime( ev.gpuTime ); @@ -3341,13 +3350,15 @@ bool Worker::Process( const QueueItem& ev ) void Worker::ProcessThreadContext( const QueueThreadContext& ev ) { m_threadCtx = ev.thread; + m_refTimeThread = 0; } void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ) { CheckSourceLocation( ev.srcloc ); - const auto start = TscTime( ev.time - m_data.baseTime ); + m_refTimeThread += ev.time; + const auto start = TscTime( m_refTimeThread - m_data.baseTime ); zone->SetStart( start ); zone->SetEnd( -1 ); zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); @@ -3380,7 +3391,8 @@ void Worker::ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBe auto it = m_pendingSourceLocationPayload.find( ev.srcloc ); assert( it != m_pendingSourceLocationPayload.end() ); - const auto start = TscTime( ev.time - m_data.baseTime ); + m_refTimeThread += ev.time; + const auto start = TscTime( m_refTimeThread - m_data.baseTime ); zone->SetStart( start ); zone->SetEnd( -1 ); zone->SetSrcLoc( it->second ); @@ -3432,7 +3444,8 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev ) assert( !stack.empty() ); auto zone = stack.back_and_pop(); assert( zone->End() == -1 ); - zone->SetEnd( TscTime( ev.time - m_data.baseTime ) ); + m_refTimeThread += ev.time; + zone->SetEnd( TscTime( m_refTimeThread - m_data.baseTime ) ); assert( zone->End() >= zone->Start() ); m_data.lastTime = std::max( m_data.lastTime, zone->End() ); @@ -3773,7 +3786,8 @@ void Worker::ProcessLockWait( const QueueLockWait& ev ) } auto lev = ev.type == LockType::Lockable ? m_slab.Alloc() : m_slab.Alloc(); - lev->SetTime( TscTime( ev.time - m_data.baseTime ) ); + m_refTimeSerial += ev.time; + lev->SetTime( TscTime( m_refTimeSerial - m_data.baseTime ) ); lev->SetSrcLoc( 0 ); lev->type = LockEvent::Type::Wait; @@ -3787,7 +3801,8 @@ void Worker::ProcessLockObtain( const QueueLockObtain& ev ) auto& lock = *it->second; auto lev = lock.type == LockType::Lockable ? m_slab.Alloc() : m_slab.Alloc(); - lev->SetTime( TscTime( ev.time - m_data.baseTime ) ); + m_refTimeSerial += ev.time; + lev->SetTime( TscTime( m_refTimeSerial - m_data.baseTime ) ); lev->SetSrcLoc( 0 ); lev->type = LockEvent::Type::Obtain; @@ -3801,7 +3816,8 @@ void Worker::ProcessLockRelease( const QueueLockRelease& ev ) auto& lock = *it->second; auto lev = lock.type == LockType::Lockable ? m_slab.Alloc() : m_slab.Alloc(); - lev->SetTime( TscTime( ev.time - m_data.baseTime ) ); + m_refTimeSerial += ev.time; + lev->SetTime( TscTime( m_refTimeSerial - m_data.baseTime ) ); lev->SetSrcLoc( 0 ); lev->type = LockEvent::Type::Release; @@ -3822,7 +3838,8 @@ void Worker::ProcessLockSharedWait( const QueueLockWait& ev ) assert( ev.type == LockType::SharedLockable ); auto lev = m_slab.Alloc(); - lev->SetTime( TscTime( ev.time - m_data.baseTime ) ); + m_refTimeSerial += ev.time; + lev->SetTime( TscTime( m_refTimeSerial - m_data.baseTime ) ); lev->SetSrcLoc( 0 ); lev->type = LockEvent::Type::WaitShared; @@ -3837,7 +3854,8 @@ void Worker::ProcessLockSharedObtain( const QueueLockObtain& ev ) assert( lock.type == LockType::SharedLockable ); auto lev = m_slab.Alloc(); - lev->SetTime( TscTime( ev.time - m_data.baseTime ) ); + m_refTimeSerial += ev.time; + lev->SetTime( TscTime( m_refTimeSerial - m_data.baseTime ) ); lev->SetSrcLoc( 0 ); lev->type = LockEvent::Type::ObtainShared; @@ -3852,7 +3870,8 @@ void Worker::ProcessLockSharedRelease( const QueueLockRelease& ev ) assert( lock.type == LockType::SharedLockable ); auto lev = m_slab.Alloc(); - lev->SetTime( TscTime( ev.time - m_data.baseTime ) ); + m_refTimeSerial += ev.time; + lev->SetTime( TscTime( m_refTimeSerial - m_data.baseTime ) ); lev->SetSrcLoc( 0 ); lev->type = LockEvent::Type::ReleaseShared; @@ -3900,7 +3919,8 @@ void Worker::ProcessPlotData( const QueuePlotData& ev ) Query( ServerQueryPlotName, name ); } ); - const auto time = TscTime( ev.time - m_data.baseTime ); + m_refTimeThread += ev.time; + const auto time = TscTime( m_refTimeThread - m_data.baseTime ); m_data.lastTime = std::max( m_data.lastTime, time ); switch( ev.type ) { @@ -4005,7 +4025,7 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) m_gpuCtxMap[ev.context] = gpu; } -void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev ) +void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ) { m_data.gpuCnt++; @@ -4014,7 +4034,18 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e CheckSourceLocation( ev.srcloc ); - zone->SetCpuStart( TscTime( ev.cpuTime - m_data.baseTime ) ); + int64_t cpuTime; + if( serial ) + { + m_refTimeSerial += ev.cpuTime; + cpuTime = m_refTimeSerial; + } + else + { + m_refTimeThread += ev.cpuTime; + cpuTime = m_refTimeThread; + } + zone->SetCpuStart( TscTime( cpuTime - m_data.baseTime ) ); zone->SetCpuEnd( -1 ); zone->gpuStart = std::numeric_limits::max(); zone->gpuEnd = -1; @@ -4064,23 +4095,23 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e ctx->query[ev.queryId] = zone; } -void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ) +void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev, bool serial ) { auto zone = m_slab.Alloc(); - ProcessGpuZoneBeginImpl( zone, ev ); + ProcessGpuZoneBeginImpl( zone, ev, serial ); } -void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev ) +void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ) { auto zone = m_slab.Alloc(); - ProcessGpuZoneBeginImpl( zone, ev ); + ProcessGpuZoneBeginImpl( zone, ev, serial ); auto& next = m_nextCallstack[ev.thread]; next.type = NextCallstackType::Gpu; next.gpu = zone; } -void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ) +void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ) { auto ctx = m_gpuCtxMap[ev.context]; assert( ctx ); @@ -4094,7 +4125,18 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ) assert( !ctx->query[ev.queryId] ); ctx->query[ev.queryId] = zone; - zone->SetCpuEnd( TscTime( ev.cpuTime - m_data.baseTime ) ); + int64_t cpuTime; + if( serial ) + { + m_refTimeSerial += ev.cpuTime; + cpuTime = m_refTimeSerial; + } + else + { + m_refTimeThread += ev.cpuTime; + cpuTime = m_refTimeThread; + } + zone->SetCpuEnd( TscTime( cpuTime - m_data.baseTime ) ); m_data.lastTime = std::max( m_data.lastTime, zone->CpuEnd() ); } @@ -4137,7 +4179,8 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) void Worker::ProcessMemAlloc( const QueueMemAlloc& ev ) { - const auto time = TscTime( ev.time - m_data.baseTime ); + m_refTimeSerial += ev.time; + const auto time = TscTime( m_refTimeSerial - m_data.baseTime ); m_data.lastTime = std::max( m_data.lastTime, time ); NoticeThread( ev.thread ); @@ -4188,7 +4231,8 @@ bool Worker::ProcessMemFree( const QueueMemFree& ev ) return false; } - const auto time = TscTime( ev.time - m_data.baseTime ); + m_refTimeSerial += ev.time; + const auto time = TscTime( m_refTimeSerial - m_data.baseTime ); m_data.lastTime = std::max( m_data.lastTime, time ); NoticeThread( ev.thread ); diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index c83bc6cf..2b535b56 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -435,9 +435,9 @@ private: tracy_force_inline void ProcessMessageLiteralColor( const QueueMessageColor& ev ); tracy_force_inline void ProcessMessageAppInfo( const QueueMessage& ev ); tracy_force_inline void ProcessGpuNewContext( const QueueGpuNewContext& ev ); - tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev ); - tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev ); - tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev ); + tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev ); @@ -456,7 +456,7 @@ private: tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); - tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev ); + tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ); void ZoneStackFailure( uint64_t thread, const ZoneEvent* ev ); void ZoneEndFailure( uint64_t thread ); @@ -614,6 +614,8 @@ private: size_t m_frameImageBufferSize = 0; uint64_t m_threadCtx = 0; + int64_t m_refTimeThread = 0; + int64_t m_refTimeSerial = 0; }; }