From 1b6bc1b69adf42d33c160149f28ca38bb3024c0f Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 7 Jul 2020 20:32:25 +0200 Subject: [PATCH] Send Vulkan GPU calibration messages. --- TracyD3D12.hpp | 2 +- TracyOpenCL.hpp | 2 +- TracyOpenGL.hpp | 2 +- TracyVulkan.hpp | 84 +++++++++++++++++++++++++++++++++------- common/TracyProtocol.hpp | 2 +- common/TracyQueue.hpp | 18 ++++++++- server/TracyWorker.cpp | 8 ++++ server/TracyWorker.hpp | 1 + 8 files changed, 100 insertions(+), 19 deletions(-) diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 8f75d2ea..93d4b244 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -150,7 +150,7 @@ namespace tracy memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast(timestampFrequency)); MemWrite(&item->gpuNewContext.context, m_context); - MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 }); + MemWrite(&item->gpuNewContext.flags, uint8_t{ 0 }); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); #ifdef TRACY_ON_DEMAND diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 77c7de98..5dba8488 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -72,7 +72,7 @@ namespace tracy { MemWrite(&item->gpuNewContext.period, 1.0f); MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId); - MemWrite(&item->gpuNewContext.accuracyBits, (uint8_t)0); + MemWrite(&item->gpuNewContext.flags, (uint8_t)0); #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem(*item); #endif diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index e4d831d3..90c5dcbc 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -110,7 +110,7 @@ public: MemWrite( &item->gpuNewContext.thread, thread ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); + MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) ); MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); #ifdef TRACY_ON_DEMAND diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index 1cc7e20e..9ba6738b 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -102,21 +102,56 @@ public: vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueWaitIdle( queue ); - vkBeginCommandBuffer( cmdbuf, &beginInfo ); - vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); - vkEndCommandBuffer( cmdbuf ); - vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); - vkQueueWaitIdle( queue ); + int64_t tcpu, tgpu; + if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT ) + { + vkBeginCommandBuffer( cmdbuf, &beginInfo ); + vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); + vkEndCommandBuffer( cmdbuf ); + vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); + vkQueueWaitIdle( queue ); - int64_t tcpu = Profiler::GetTime(); - int64_t tgpu; - vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); + tcpu = Profiler::GetTime(); + vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); - vkBeginCommandBuffer( cmdbuf, &beginInfo ); - vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ); - vkEndCommandBuffer( cmdbuf ); - vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); - vkQueueWaitIdle( queue ); + vkBeginCommandBuffer( cmdbuf, &beginInfo ); + vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ); + vkEndCommandBuffer( cmdbuf ); + vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); + vkQueueWaitIdle( queue ); + } + else + { + enum { NumProbes = 32 }; + + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation[NumProbes]; + for( int i=0; i deviation[i] ) + { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); + + Calibrate( device, m_prevCalibration, tgpu ); + tcpu = Profiler::GetTime(); + } + + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuNewContext ); @@ -125,7 +160,7 @@ public: memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); + MemWrite( &item->gpuNewContext.flags, flags ); MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); #ifdef TRACY_ON_DEMAND @@ -153,6 +188,8 @@ public: { vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); m_head = m_tail = 0; + int64_t tgpu; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); return; } #endif @@ -184,6 +221,25 @@ public: Profiler::QueueSerialFinish(); } + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) + { + int64_t tgpu, tcpu; + Calibrate( m_device, tcpu, tgpu ); + const auto refCpu = Profiler::GetTime(); + const auto delta = tcpu - m_prevCalibration; + if( delta > 0 ) + { + m_prevCalibration = tcpu; + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuCalibration ); + MemWrite( &item->gpuCalibration.gpuTime, tgpu ); + MemWrite( &item->gpuCalibration.cpuTime, refCpu ); + MemWrite( &item->gpuCalibration.cpuDelta, delta ); + MemWrite( &item->gpuCalibration.context, m_context ); + Profiler::QueueSerialFinish(); + } + } + vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); m_tail += cnt; diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index 634b5846..cc47fe41 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 36 }; +enum : uint32_t { ProtocolVersion = 37 }; enum : uint32_t { BroadcastVersion = 1 }; using lz4sz_t = uint32_t; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 4bd48b54..815d27ab 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -56,6 +56,7 @@ enum class QueueType : uint8_t Terminate, KeepAlive, ThreadContext, + GpuCalibration, Crash, CrashReport, ZoneValidation, @@ -268,6 +269,11 @@ enum class GpuContextType : uint8_t Direct3D12 }; +enum GpuContextFlags : uint8_t +{ + GpuContextCalibration = 1 << 0 +}; + struct QueueGpuNewContext { int64_t cpuTime; @@ -275,7 +281,7 @@ struct QueueGpuNewContext uint64_t thread; float period; uint8_t context; - uint8_t accuracyBits; + GpuContextFlags flags; GpuContextType type; }; @@ -303,6 +309,14 @@ struct QueueGpuTime uint8_t context; }; +struct QueueGpuCalibration +{ + int64_t gpuTime; + int64_t cpuTime; + int64_t cpuDelta; + uint8_t context; +}; + struct QueueMemAlloc { int64_t time; @@ -477,6 +491,7 @@ struct QueueItem QueueGpuZoneBegin gpuZoneBegin; QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; + QueueGpuCalibration gpuCalibration; QueueMemAlloc memAlloc; QueueMemFree memFree; QueueCallstackMemory callstackMemory; @@ -553,6 +568,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), + sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 510f408f..cefb2394 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -3957,6 +3957,9 @@ bool Worker::Process( const QueueItem& ev ) case QueueType::GpuTime: ProcessGpuTime( ev.gpuTime ); break; + case QueueType::GpuCalibration: + ProcessGpuCalibration( ev.gpuCalibration ); + break; case QueueType::MemAlloc: ProcessMemAlloc( ev.memAlloc ); break; @@ -5061,6 +5064,11 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) } } +void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev ) +{ + +} + void Worker::ProcessMemAlloc( const QueueMemAlloc& ev ) { const auto refTime = m_refTimeSerial + ev.time; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 74a83c98..7390593d 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -643,6 +643,7 @@ private: tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); + tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev ); tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev ); tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev );