1
0
mirror of https://github.com/wolfpld/tracy synced 2025-04-29 12:23:53 +00:00

Send Vulkan GPU calibration messages.

This commit is contained in:
Bartosz Taudul 2020-07-07 20:32:25 +02:00
parent c91c6be763
commit 1b6bc1b69a
8 changed files with 100 additions and 19 deletions

View File

@ -150,7 +150,7 @@ namespace tracy
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency)); MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
MemWrite(&item->gpuNewContext.context, m_context); MemWrite(&item->gpuNewContext.context, m_context);
MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 }); MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 });
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND

View File

@ -72,7 +72,7 @@ namespace tracy {
MemWrite(&item->gpuNewContext.period, 1.0f); MemWrite(&item->gpuNewContext.period, 1.0f);
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId); MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
MemWrite(&item->gpuNewContext.accuracyBits, (uint8_t)0); MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item); GetProfiler().DeferItem(*item);
#endif #endif

View File

@ -110,7 +110,7 @@ public:
MemWrite( &item->gpuNewContext.thread, thread ); MemWrite( &item->gpuNewContext.thread, thread );
MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context ); MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND

View File

@ -102,14 +102,16 @@ public:
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue ); vkQueueWaitIdle( queue );
int64_t tcpu, tgpu;
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
{
vkBeginCommandBuffer( cmdbuf, &beginInfo ); vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf ); vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue ); vkQueueWaitIdle( queue );
int64_t tcpu = Profiler::GetTime(); tcpu = Profiler::GetTime();
int64_t tgpu;
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
vkBeginCommandBuffer( cmdbuf, &beginInfo ); vkBeginCommandBuffer( cmdbuf, &beginInfo );
@ -117,6 +119,39 @@ public:
vkEndCommandBuffer( cmdbuf ); vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue ); vkQueueWaitIdle( queue );
}
else
{
enum { NumProbes = 32 };
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( int i=0; i<NumProbes; i++ )
{
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
}
uint64_t minDeviation = deviation[0];
for( int i=1; i<NumProbes; i++ )
{
if( minDeviation > deviation[i] )
{
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime();
}
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext ); MemWrite( &item->hdr.type, QueueType::GpuNewContext );
@ -125,7 +160,7 @@ public:
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context ); MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
@ -153,6 +188,8 @@ public:
{ {
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
m_head = m_tail = 0; m_head = m_tail = 0;
int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
return; return;
} }
#endif #endif
@ -184,6 +221,25 @@ public:
Profiler::QueueSerialFinish(); Profiler::QueueSerialFinish();
} }
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
{
int64_t tgpu, tcpu;
Calibrate( m_device, tcpu, tgpu );
const auto refCpu = Profiler::GetTime();
const auto delta = tcpu - m_prevCalibration;
if( delta > 0 )
{
m_prevCalibration = tcpu;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
Profiler::QueueSerialFinish();
}
}
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
m_tail += cnt; m_tail += cnt;

View File

@ -9,7 +9,7 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 36 }; enum : uint32_t { ProtocolVersion = 37 };
enum : uint32_t { BroadcastVersion = 1 }; enum : uint32_t { BroadcastVersion = 1 };
using lz4sz_t = uint32_t; using lz4sz_t = uint32_t;

View File

@ -56,6 +56,7 @@ enum class QueueType : uint8_t
Terminate, Terminate,
KeepAlive, KeepAlive,
ThreadContext, ThreadContext,
GpuCalibration,
Crash, Crash,
CrashReport, CrashReport,
ZoneValidation, ZoneValidation,
@ -268,6 +269,11 @@ enum class GpuContextType : uint8_t
Direct3D12 Direct3D12
}; };
enum GpuContextFlags : uint8_t
{
GpuContextCalibration = 1 << 0
};
struct QueueGpuNewContext struct QueueGpuNewContext
{ {
int64_t cpuTime; int64_t cpuTime;
@ -275,7 +281,7 @@ struct QueueGpuNewContext
uint64_t thread; uint64_t thread;
float period; float period;
uint8_t context; uint8_t context;
uint8_t accuracyBits; GpuContextFlags flags;
GpuContextType type; GpuContextType type;
}; };
@ -303,6 +309,14 @@ struct QueueGpuTime
uint8_t context; uint8_t context;
}; };
struct QueueGpuCalibration
{
int64_t gpuTime;
int64_t cpuTime;
int64_t cpuDelta;
uint8_t context;
};
struct QueueMemAlloc struct QueueMemAlloc
{ {
int64_t time; int64_t time;
@ -477,6 +491,7 @@ struct QueueItem
QueueGpuZoneBegin gpuZoneBegin; QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneEnd gpuZoneEnd; QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime; QueueGpuTime gpuTime;
QueueGpuCalibration gpuCalibration;
QueueMemAlloc memAlloc; QueueMemAlloc memAlloc;
QueueMemFree memFree; QueueMemFree memFree;
QueueCallstackMemory callstackMemory; QueueCallstackMemory callstackMemory;
@ -553,6 +568,7 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // terminate
sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ), // keep alive
sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
sizeof( QueueHeader ), // crash sizeof( QueueHeader ), // crash
sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),

View File

@ -3957,6 +3957,9 @@ bool Worker::Process( const QueueItem& ev )
case QueueType::GpuTime: case QueueType::GpuTime:
ProcessGpuTime( ev.gpuTime ); ProcessGpuTime( ev.gpuTime );
break; break;
case QueueType::GpuCalibration:
ProcessGpuCalibration( ev.gpuCalibration );
break;
case QueueType::MemAlloc: case QueueType::MemAlloc:
ProcessMemAlloc( ev.memAlloc ); ProcessMemAlloc( ev.memAlloc );
break; break;
@ -5061,6 +5064,11 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
} }
} }
void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev )
{
}
void Worker::ProcessMemAlloc( const QueueMemAlloc& ev ) void Worker::ProcessMemAlloc( const QueueMemAlloc& ev )
{ {
const auto refTime = m_refTimeSerial + ev.time; const auto refTime = m_refTimeSerial + ev.time;

View File

@ -643,6 +643,7 @@ private:
tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial );
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial );
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev );
tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev );
tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev ); tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev );
tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev ); tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev );