diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index abf7a192..1abb975c 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -4206,6 +4206,12 @@ bool Worker::Process( const QueueItem& ev ) case QueueType::GpuZoneBeginCallstack: ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin, false ); break; + case QueueType::GpuZoneBeginAllocSrcLoc: + ProcessGpuZoneBeginAllocSrcLoc( ev.gpuZoneBeginLean, false ); + break; + case QueueType::GpuZoneBeginAllocSrcLocCallstack: + ProcessGpuZoneBeginAllocSrcLocCallstack( ev.gpuZoneBeginLean, false ); + break; case QueueType::GpuZoneEnd: ProcessGpuZoneEnd( ev.gpuZoneEnd, false ); break; @@ -4215,6 +4221,12 @@ bool Worker::Process( const QueueItem& ev ) case QueueType::GpuZoneBeginCallstackSerial: ProcessGpuZoneBeginCallstack( ev.gpuZoneBegin, true ); break; + case QueueType::GpuZoneBeginAllocSrcLocSerial: + ProcessGpuZoneBeginAllocSrcLoc( ev.gpuZoneBeginLean, true ); + break; + case QueueType::GpuZoneBeginAllocSrcLocCallstackSerial: + ProcessGpuZoneBeginAllocSrcLocCallstack( ev.gpuZoneBeginLean, true ); + break; case QueueType::GpuZoneEndSerial: ProcessGpuZoneEnd( ev.gpuZoneEnd, true ); break; @@ -5172,14 +5184,27 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) } void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ) +{ + CheckSourceLocation( ev.srcloc ); + zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); + ProcessGpuZoneBeginImplCommon( zone, ev, serial ); +} + +void Worker::ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) +{ + assert( m_pendingSourceLocationPayload != 0 ); + zone->SetSrcLoc( m_pendingSourceLocationPayload ); + ProcessGpuZoneBeginImplCommon( zone, ev, time ); + m_pendingSourceLocationPayload = 0; +} + +void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) { m_data.gpuCnt++; auto ctx = m_gpuCtxMap[ev.context].get(); assert( ctx ); - CheckSourceLocation( ev.srcloc ); - int64_t cpuTime; if( serial ) { @@ -5196,7 +5221,6 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e zone->SetCpuEnd( -1 ); zone->SetGpuStart( -1 ); zone->SetGpuEnd( -1 ); - zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); zone->callstack.SetVal( 0 ); zone->SetChild( -1 ); @@ -5258,6 +5282,22 @@ void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool ser it->second = 0; } +void Worker::ProcessGpuZoneBeginAllocSrcLoc( const QueueGpuZoneBeginLean& ev, bool serial ) +{ + auto zone = m_slab.Alloc(); + ProcessGpuZoneBeginAllocSrcLocImpl( zone, ev, serial ); +} + +void Worker::ProcessGpuZoneBeginAllocSrcLocCallstack( const QueueGpuZoneBeginLean& ev, bool serial ) +{ + auto zone = m_slab.Alloc(); + ProcessGpuZoneBeginAllocSrcLocImpl( zone, ev, serial ); + auto it = m_nextCallstack.find( m_threadCtx ); + assert( it != m_nextCallstack.end() ); + zone->callstack.SetVal( it->second ); + it->second = 0; +} + void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ) { auto ctx = m_gpuCtxMap[ev.context]; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 2fe6ba00..6593c4ee 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -635,6 +635,8 @@ private: tracy_force_inline void ProcessGpuNewContext( const QueueGpuNewContext& ev ); tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginAllocSrcLoc( const QueueGpuZoneBeginLean& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocCallstack( const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev ); @@ -666,6 +668,8 @@ private: tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBeginLean& ev ); tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline MemEvent* ProcessMemAllocImpl( uint64_t memname, MemData& memdata, const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemFreeImpl( uint64_t memname, MemData& memdata, const QueueMemFree& ev );