mirror of
https://github.com/wolfpld/tracy
synced 2025-04-29 12:23:53 +00:00
Send Vulkan GPU calibration messages.
This commit is contained in:
parent
c91c6be763
commit
1b6bc1b69a
@ -150,7 +150,7 @@ namespace tracy
|
|||||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||||
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||||
MemWrite(&item->gpuNewContext.context, m_context);
|
MemWrite(&item->gpuNewContext.context, m_context);
|
||||||
MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 });
|
MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 });
|
||||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||||
|
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
@ -72,7 +72,7 @@ namespace tracy {
|
|||||||
MemWrite(&item->gpuNewContext.period, 1.0f);
|
MemWrite(&item->gpuNewContext.period, 1.0f);
|
||||||
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
|
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
|
||||||
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
|
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
|
||||||
MemWrite(&item->gpuNewContext.accuracyBits, (uint8_t)0);
|
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
GetProfiler().DeferItem(*item);
|
GetProfiler().DeferItem(*item);
|
||||||
#endif
|
#endif
|
||||||
|
@ -110,7 +110,7 @@ public:
|
|||||||
MemWrite( &item->gpuNewContext.thread, thread );
|
MemWrite( &item->gpuNewContext.thread, thread );
|
||||||
MemWrite( &item->gpuNewContext.period, period );
|
MemWrite( &item->gpuNewContext.period, period );
|
||||||
MemWrite( &item->gpuNewContext.context, m_context );
|
MemWrite( &item->gpuNewContext.context, m_context );
|
||||||
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
|
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
|
||||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||||
|
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
@ -102,21 +102,56 @@ public:
|
|||||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||||
vkQueueWaitIdle( queue );
|
vkQueueWaitIdle( queue );
|
||||||
|
|
||||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
int64_t tcpu, tgpu;
|
||||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
|
||||||
vkEndCommandBuffer( cmdbuf );
|
{
|
||||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||||
vkQueueWaitIdle( queue );
|
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||||
|
vkEndCommandBuffer( cmdbuf );
|
||||||
|
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||||
|
vkQueueWaitIdle( queue );
|
||||||
|
|
||||||
int64_t tcpu = Profiler::GetTime();
|
tcpu = Profiler::GetTime();
|
||||||
int64_t tgpu;
|
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
|
||||||
|
|
||||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||||
vkEndCommandBuffer( cmdbuf );
|
vkEndCommandBuffer( cmdbuf );
|
||||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||||
vkQueueWaitIdle( queue );
|
vkQueueWaitIdle( queue );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
enum { NumProbes = 32 };
|
||||||
|
|
||||||
|
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||||
|
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||||
|
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||||
|
};
|
||||||
|
uint64_t ts[2];
|
||||||
|
uint64_t deviation[NumProbes];
|
||||||
|
for( int i=0; i<NumProbes; i++ )
|
||||||
|
{
|
||||||
|
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
|
||||||
|
}
|
||||||
|
uint64_t minDeviation = deviation[0];
|
||||||
|
for( int i=1; i<NumProbes; i++ )
|
||||||
|
{
|
||||||
|
if( minDeviation > deviation[i] )
|
||||||
|
{
|
||||||
|
minDeviation = deviation[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_deviation = minDeviation * 3 / 2;
|
||||||
|
|
||||||
|
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||||
|
|
||||||
|
Calibrate( device, m_prevCalibration, tgpu );
|
||||||
|
tcpu = Profiler::GetTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t flags = 0;
|
||||||
|
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||||
|
|
||||||
auto item = Profiler::QueueSerial();
|
auto item = Profiler::QueueSerial();
|
||||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||||
@ -125,7 +160,7 @@ public:
|
|||||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||||
MemWrite( &item->gpuNewContext.period, period );
|
MemWrite( &item->gpuNewContext.period, period );
|
||||||
MemWrite( &item->gpuNewContext.context, m_context );
|
MemWrite( &item->gpuNewContext.context, m_context );
|
||||||
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
|
MemWrite( &item->gpuNewContext.flags, flags );
|
||||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||||
|
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
@ -153,6 +188,8 @@ public:
|
|||||||
{
|
{
|
||||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||||
m_head = m_tail = 0;
|
m_head = m_tail = 0;
|
||||||
|
int64_t tgpu;
|
||||||
|
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -184,6 +221,25 @@ public:
|
|||||||
Profiler::QueueSerialFinish();
|
Profiler::QueueSerialFinish();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
|
||||||
|
{
|
||||||
|
int64_t tgpu, tcpu;
|
||||||
|
Calibrate( m_device, tcpu, tgpu );
|
||||||
|
const auto refCpu = Profiler::GetTime();
|
||||||
|
const auto delta = tcpu - m_prevCalibration;
|
||||||
|
if( delta > 0 )
|
||||||
|
{
|
||||||
|
m_prevCalibration = tcpu;
|
||||||
|
auto item = Profiler::QueueSerial();
|
||||||
|
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
|
||||||
|
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||||
|
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||||
|
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||||
|
MemWrite( &item->gpuCalibration.context, m_context );
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
|
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
|
||||||
|
|
||||||
m_tail += cnt;
|
m_tail += cnt;
|
||||||
|
@ -9,7 +9,7 @@ namespace tracy
|
|||||||
|
|
||||||
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
||||||
|
|
||||||
enum : uint32_t { ProtocolVersion = 36 };
|
enum : uint32_t { ProtocolVersion = 37 };
|
||||||
enum : uint32_t { BroadcastVersion = 1 };
|
enum : uint32_t { BroadcastVersion = 1 };
|
||||||
|
|
||||||
using lz4sz_t = uint32_t;
|
using lz4sz_t = uint32_t;
|
||||||
|
@ -56,6 +56,7 @@ enum class QueueType : uint8_t
|
|||||||
Terminate,
|
Terminate,
|
||||||
KeepAlive,
|
KeepAlive,
|
||||||
ThreadContext,
|
ThreadContext,
|
||||||
|
GpuCalibration,
|
||||||
Crash,
|
Crash,
|
||||||
CrashReport,
|
CrashReport,
|
||||||
ZoneValidation,
|
ZoneValidation,
|
||||||
@ -268,6 +269,11 @@ enum class GpuContextType : uint8_t
|
|||||||
Direct3D12
|
Direct3D12
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum GpuContextFlags : uint8_t
|
||||||
|
{
|
||||||
|
GpuContextCalibration = 1 << 0
|
||||||
|
};
|
||||||
|
|
||||||
struct QueueGpuNewContext
|
struct QueueGpuNewContext
|
||||||
{
|
{
|
||||||
int64_t cpuTime;
|
int64_t cpuTime;
|
||||||
@ -275,7 +281,7 @@ struct QueueGpuNewContext
|
|||||||
uint64_t thread;
|
uint64_t thread;
|
||||||
float period;
|
float period;
|
||||||
uint8_t context;
|
uint8_t context;
|
||||||
uint8_t accuracyBits;
|
GpuContextFlags flags;
|
||||||
GpuContextType type;
|
GpuContextType type;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -303,6 +309,14 @@ struct QueueGpuTime
|
|||||||
uint8_t context;
|
uint8_t context;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct QueueGpuCalibration
|
||||||
|
{
|
||||||
|
int64_t gpuTime;
|
||||||
|
int64_t cpuTime;
|
||||||
|
int64_t cpuDelta;
|
||||||
|
uint8_t context;
|
||||||
|
};
|
||||||
|
|
||||||
struct QueueMemAlloc
|
struct QueueMemAlloc
|
||||||
{
|
{
|
||||||
int64_t time;
|
int64_t time;
|
||||||
@ -477,6 +491,7 @@ struct QueueItem
|
|||||||
QueueGpuZoneBegin gpuZoneBegin;
|
QueueGpuZoneBegin gpuZoneBegin;
|
||||||
QueueGpuZoneEnd gpuZoneEnd;
|
QueueGpuZoneEnd gpuZoneEnd;
|
||||||
QueueGpuTime gpuTime;
|
QueueGpuTime gpuTime;
|
||||||
|
QueueGpuCalibration gpuCalibration;
|
||||||
QueueMemAlloc memAlloc;
|
QueueMemAlloc memAlloc;
|
||||||
QueueMemFree memFree;
|
QueueMemFree memFree;
|
||||||
QueueCallstackMemory callstackMemory;
|
QueueCallstackMemory callstackMemory;
|
||||||
@ -553,6 +568,7 @@ static constexpr size_t QueueDataSize[] = {
|
|||||||
sizeof( QueueHeader ), // terminate
|
sizeof( QueueHeader ), // terminate
|
||||||
sizeof( QueueHeader ), // keep alive
|
sizeof( QueueHeader ), // keep alive
|
||||||
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
|
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
|
||||||
|
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
|
||||||
sizeof( QueueHeader ), // crash
|
sizeof( QueueHeader ), // crash
|
||||||
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
|
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
|
||||||
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
|
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
|
||||||
|
@ -3957,6 +3957,9 @@ bool Worker::Process( const QueueItem& ev )
|
|||||||
case QueueType::GpuTime:
|
case QueueType::GpuTime:
|
||||||
ProcessGpuTime( ev.gpuTime );
|
ProcessGpuTime( ev.gpuTime );
|
||||||
break;
|
break;
|
||||||
|
case QueueType::GpuCalibration:
|
||||||
|
ProcessGpuCalibration( ev.gpuCalibration );
|
||||||
|
break;
|
||||||
case QueueType::MemAlloc:
|
case QueueType::MemAlloc:
|
||||||
ProcessMemAlloc( ev.memAlloc );
|
ProcessMemAlloc( ev.memAlloc );
|
||||||
break;
|
break;
|
||||||
@ -5061,6 +5064,11 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev )
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void Worker::ProcessMemAlloc( const QueueMemAlloc& ev )
|
void Worker::ProcessMemAlloc( const QueueMemAlloc& ev )
|
||||||
{
|
{
|
||||||
const auto refTime = m_refTimeSerial + ev.time;
|
const auto refTime = m_refTimeSerial + ev.time;
|
||||||
|
@ -643,6 +643,7 @@ private:
|
|||||||
tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial );
|
tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial );
|
||||||
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial );
|
tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial );
|
||||||
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
|
tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev );
|
||||||
|
tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev );
|
||||||
tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev );
|
tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev );
|
||||||
tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev );
|
tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev );
|
||||||
tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev );
|
tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev );
|
||||||
|
Loading…
x
Reference in New Issue
Block a user