diff --git a/manual/tracy.tex b/manual/tracy.tex index 598f7d86..3a1394bc 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -1975,6 +1975,8 @@ GPU zones are ended via \texttt{\_\_\_tracy\_emit\_gpu\_zone\_end}. When the timestamps are fetched from the GPU, they must then be emitted via the \texttt{\_\_\_tracy\_emit\_gpu\_time} function. After all timestamps for a frame are emitted, \texttt{queryIds} may be re-used. +CPU and GPU timestamps may be periodically resynchronized via the \texttt{\_\_\_tracy\_emit\_gpu\_time\_sync} function, which takes the GPU timestamp closest to the moment of the call. This can help with timestamp drift and work around compounding GPU timestamp overflowing. Note that this requires CPU and GPU synchronization, which will block execution of your application. Do not do this every frame. + To see how you should use this API, you should look at the reference implementation contained in API-specific C++ headers provided by Tracy. For example, to see how to write your instrumentation of OpenGL, you should closely follow the contents of the \texttt{TracyOpenGL.hpp} implementation. \subsubsection{Fibers} diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp index d89d77b9..799e8876 100644 --- a/public/client/TracyProfiler.cpp +++ b/public/client/TracyProfiler.cpp @@ -4293,6 +4293,15 @@ TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibrat TracyLfqCommitC; } +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + TracyLfqCommitC; +} + TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) { auto item = tracy::Profiler::QueueSerial(); @@ -4400,6 +4409,16 @@ TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_c tracy::Profiler::QueueSerialFinish(); } +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + TRACY_API int ___tracy_connected( void ) { return tracy::GetProfiler().IsConnected(); diff --git a/public/common/TracyProtocol.hpp b/public/common/TracyProtocol.hpp index 5eb1639d..51399b09 100644 --- a/public/common/TracyProtocol.hpp +++ b/public/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 64 }; +enum : uint32_t { ProtocolVersion = 65 }; enum : uint16_t { BroadcastVersion = 3 }; using lz4sz_t = uint32_t; diff --git a/public/common/TracyQueue.hpp b/public/common/TracyQueue.hpp index 051d412a..df886e41 100644 --- a/public/common/TracyQueue.hpp +++ b/public/common/TracyQueue.hpp @@ -70,6 +70,7 @@ enum class QueueType : uint8_t KeepAlive, ThreadContext, GpuCalibration, + GpuTimeSync, Crash, CrashReport, ZoneValidation, @@ -453,6 +454,13 @@ struct QueueGpuCalibration uint8_t context; }; +struct QueueGpuTimeSync +{ + int64_t gpuTime; + int64_t cpuTime; + uint8_t context; +}; + struct QueueGpuContextName { uint8_t context; @@ -718,6 +726,7 @@ struct QueueItem QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; QueueGpuCalibration gpuCalibration; + QueueGpuTimeSync gpuTimeSync; QueueGpuContextName gpuContextName; QueueGpuContextNameFat gpuContextNameFat; QueueMemAlloc memAlloc; @@ -821,6 +830,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), + sizeof( QueueHeader ) + sizeof( QueueGpuTimeSync ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), diff --git a/public/tracy/TracyC.h b/public/tracy/TracyC.h index 996889c4..cf23203d 100644 --- a/public/tracy/TracyC.h +++ b/public/tracy/TracyC.h @@ -172,6 +172,11 @@ struct ___tracy_gpu_calibration_data { uint8_t context; }; +struct ___tracy_gpu_time_sync_data { + int64_t gpuTime; + uint8_t context; +}; + // Some containers don't support storing const types. // This struct, as visible to user, is immutable, so treat it as if const was declared here. typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; @@ -204,6 +209,7 @@ TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data ); TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); @@ -214,6 +220,7 @@ TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_dat TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data ); TRACY_API int ___tracy_connected(void); diff --git a/server/TracyEventDebug.cpp b/server/TracyEventDebug.cpp index c6c02ffa..df750a94 100644 --- a/server/TracyEventDebug.cpp +++ b/server/TracyEventDebug.cpp @@ -195,6 +195,9 @@ void EventDebug( const QueueItem& ev ) case QueueType::GpuCalibration: fprintf( f, "ev %i (GpuCalibration)\n", ev.hdr.idx ); break; + case QueueType::GpuTimeSync: + fprintf( f, "ev %i (GpuTimeSync)\n", ev.hdr.idx ); + break; case QueueType::Crash: fprintf( f, "ev %i (Crash)\n", ev.hdr.idx ); break; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index b4110795..90dac69c 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -4589,6 +4589,9 @@ bool Worker::Process( const QueueItem& ev ) case QueueType::GpuCalibration: ProcessGpuCalibration( ev.gpuCalibration ); break; + case QueueType::GpuTimeSync: + ProcessGpuTimeSync( ev.gpuTimeSync ); + break; case QueueType::GpuContextName: ProcessGpuContextName( ev.gpuContextName ); break; @@ -5921,6 +5924,29 @@ void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev ) ctx->calibratedGpuTime = gpuTime; ctx->calibratedCpuTime = TscTime( ev.cpuTime ); } + +void Worker::ProcessGpuTimeSync( const QueueGpuTimeSync& ev ) +{ + auto ctx = m_gpuCtxMap[ev.context]; + assert( ctx ); + + int64_t gpuTime; + if( ctx->period == 1.f ) + { + gpuTime = ev.gpuTime; + } + else + { + gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss + } + + const auto cpuTime = TscTime( ev.cpuTime ); + + ctx->timeDiff = cpuTime - gpuTime; + ctx->lastGpuTime = 0; + ctx->overflow = 0; + ctx->overflowMul = 0; +} void Worker::ProcessGpuContextName( const QueueGpuContextName& ev ) { diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index e3b3deaf..6ef503c9 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -722,6 +722,7 @@ private: tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev ); + tracy_force_inline void ProcessGpuTimeSync( const QueueGpuTimeSync& ev ); tracy_force_inline void ProcessGpuContextName( const QueueGpuContextName& ev ); tracy_force_inline MemEvent* ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemAllocNamed( const QueueMemAlloc& ev );