1
0
mirror of https://github.com/wolfpld/tracy synced 2025-04-29 20:33:52 +00:00

Merge pull request #644 from YaLTeR/gpu-time-sync

Add GpuTimeSync event
This commit is contained in:
Bartosz Taudul 2023-10-23 19:18:38 +02:00 committed by GitHub
commit 4c8944b174
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 69 additions and 1 deletions

View File

@ -1975,6 +1975,8 @@ GPU zones are ended via \texttt{\_\_\_tracy\_emit\_gpu\_zone\_end}.
When the timestamps are fetched from the GPU, they must then be emitted via the \texttt{\_\_\_tracy\_emit\_gpu\_time} function. After all timestamps for a frame are emitted, \texttt{queryIds} may be re-used. When the timestamps are fetched from the GPU, they must then be emitted via the \texttt{\_\_\_tracy\_emit\_gpu\_time} function. After all timestamps for a frame are emitted, \texttt{queryIds} may be re-used.
CPU and GPU timestamps may be periodically resynchronized via the \texttt{\_\_\_tracy\_emit\_gpu\_time\_sync} function, which takes the GPU timestamp closest to the moment of the call. This can help with timestamp drift and work around compounding GPU timestamp overflowing. Note that this requires CPU and GPU synchronization, which will block execution of your application. Do not do this every frame.
To see how you should use this API, you should look at the reference implementation contained in API-specific C++ headers provided by Tracy. For example, to see how to write your instrumentation of OpenGL, you should closely follow the contents of the \texttt{TracyOpenGL.hpp} implementation. To see how you should use this API, you should look at the reference implementation contained in API-specific C++ headers provided by Tracy. For example, to see how to write your instrumentation of OpenGL, you should closely follow the contents of the \texttt{TracyOpenGL.hpp} implementation.
\subsubsection{Fibers} \subsubsection{Fibers}

View File

@ -4293,6 +4293,15 @@ TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibrat
TracyLfqCommitC; TracyLfqCommitC;
} }
TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data )
{
TracyLfqPrepareC( tracy::QueueType::GpuTimeSync );
tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime );
tracy::MemWrite( &item->gpuTimeSync.context, data.context );
TracyLfqCommitC;
}
TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data )
{ {
auto item = tracy::Profiler::QueueSerial(); auto item = tracy::Profiler::QueueSerial();
@ -4400,6 +4409,16 @@ TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_c
tracy::Profiler::QueueSerialFinish(); tracy::Profiler::QueueSerialFinish();
} }
TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data )
{
auto item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync );
tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime );
tracy::MemWrite( &item->gpuTimeSync.context, data.context );
tracy::Profiler::QueueSerialFinish();
}
TRACY_API int ___tracy_connected( void ) TRACY_API int ___tracy_connected( void )
{ {
return tracy::GetProfiler().IsConnected(); return tracy::GetProfiler().IsConnected();

View File

@ -9,7 +9,7 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 64 }; enum : uint32_t { ProtocolVersion = 65 };
enum : uint16_t { BroadcastVersion = 3 }; enum : uint16_t { BroadcastVersion = 3 };
using lz4sz_t = uint32_t; using lz4sz_t = uint32_t;

View File

@ -70,6 +70,7 @@ enum class QueueType : uint8_t
KeepAlive, KeepAlive,
ThreadContext, ThreadContext,
GpuCalibration, GpuCalibration,
GpuTimeSync,
Crash, Crash,
CrashReport, CrashReport,
ZoneValidation, ZoneValidation,
@ -453,6 +454,13 @@ struct QueueGpuCalibration
uint8_t context; uint8_t context;
}; };
struct QueueGpuTimeSync
{
int64_t gpuTime;
int64_t cpuTime;
uint8_t context;
};
struct QueueGpuContextName struct QueueGpuContextName
{ {
uint8_t context; uint8_t context;
@ -718,6 +726,7 @@ struct QueueItem
QueueGpuZoneEnd gpuZoneEnd; QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime; QueueGpuTime gpuTime;
QueueGpuCalibration gpuCalibration; QueueGpuCalibration gpuCalibration;
QueueGpuTimeSync gpuTimeSync;
QueueGpuContextName gpuContextName; QueueGpuContextName gpuContextName;
QueueGpuContextNameFat gpuContextNameFat; QueueGpuContextNameFat gpuContextNameFat;
QueueMemAlloc memAlloc; QueueMemAlloc memAlloc;
@ -821,6 +830,7 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ), // keep alive
sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
sizeof( QueueHeader ) + sizeof( QueueGpuTimeSync ),
sizeof( QueueHeader ), // crash sizeof( QueueHeader ), // crash
sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),

View File

@ -172,6 +172,11 @@ struct ___tracy_gpu_calibration_data {
uint8_t context; uint8_t context;
}; };
struct ___tracy_gpu_time_sync_data {
int64_t gpuTime;
uint8_t context;
};
// Some containers don't support storing const types. // Some containers don't support storing const types.
// This struct, as visible to user, is immutable, so treat it as if const was declared here. // This struct, as visible to user, is immutable, so treat it as if const was declared here.
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
@ -204,6 +209,7 @@ TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data );
TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data );
TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data );
TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data ); TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data );
TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data );
@ -214,6 +220,7 @@ TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_dat
TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data );
TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data );
TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data );
TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data );
TRACY_API int ___tracy_connected(void); TRACY_API int ___tracy_connected(void);

View File

@ -195,6 +195,9 @@ void EventDebug( const QueueItem& ev )
case QueueType::GpuCalibration: case QueueType::GpuCalibration:
fprintf( f, "ev %i (GpuCalibration)\n", ev.hdr.idx ); fprintf( f, "ev %i (GpuCalibration)\n", ev.hdr.idx );
break; break;
case QueueType::GpuTimeSync:
fprintf( f, "ev %i (GpuTimeSync)\n", ev.hdr.idx );
break;
case QueueType::Crash: case QueueType::Crash:
fprintf( f, "ev %i (Crash)\n", ev.hdr.idx ); fprintf( f, "ev %i (Crash)\n", ev.hdr.idx );
break; break;

View File

@ -4589,6 +4589,9 @@ bool Worker::Process( const QueueItem& ev )
case QueueType::GpuCalibration: case QueueType::GpuCalibration:
ProcessGpuCalibration( ev.gpuCalibration ); ProcessGpuCalibration( ev.gpuCalibration );
break; break;
case QueueType::GpuTimeSync:
ProcessGpuTimeSync( ev.gpuTimeSync );
break;
case QueueType::GpuContextName: case QueueType::GpuContextName:
ProcessGpuContextName( ev.gpuContextName ); ProcessGpuContextName( ev.gpuContextName );
break; break;
@ -5921,6 +5924,29 @@ void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev )
ctx->calibratedGpuTime = gpuTime; ctx->calibratedGpuTime = gpuTime;
ctx->calibratedCpuTime = TscTime( ev.cpuTime ); ctx->calibratedCpuTime = TscTime( ev.cpuTime );
} }
void Worker::ProcessGpuTimeSync( const QueueGpuTimeSync& ev )
{
auto ctx = m_gpuCtxMap[ev.context];
assert( ctx );
int64_t gpuTime;
if( ctx->period == 1.f )
{
gpuTime = ev.gpuTime;
}
else
{
gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss
}
const auto cpuTime = TscTime( ev.cpuTime );
ctx->timeDiff = cpuTime - gpuTime;
ctx->lastGpuTime = 0;
ctx->overflow = 0;
ctx->overflowMul = 0;
}
void Worker::ProcessGpuContextName( const QueueGpuContextName& ev ) void Worker::ProcessGpuContextName( const QueueGpuContextName& ev )
{ {

View File

@ -722,6 +722,7 @@ private:
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial );
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev ); tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev );
tracy_force_inline void ProcessGpuTimeSync( const QueueGpuTimeSync& ev );
tracy_force_inline void ProcessGpuContextName( const QueueGpuContextName& ev ); tracy_force_inline void ProcessGpuContextName( const QueueGpuContextName& ev );
tracy_force_inline MemEvent* ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemAlloc( const QueueMemAlloc& ev );
tracy_force_inline MemEvent* ProcessMemAllocNamed( const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemAllocNamed( const QueueMemAlloc& ev );