1
0
mirror of https://github.com/wolfpld/tracy synced 2025-04-29 04:23:51 +00:00

Serialize Vulkan GPU profiling messages.

Since Vulkan can be multi-threaded, the guarantee of GPU time data
arriving after CPU time data can't be held with asynchronous messages.
Use serial queue instead.
This commit is contained in:
Bartosz Taudul 2019-09-23 15:38:16 +02:00
parent 9a49f49cfd
commit daf64c703a

View File

@ -97,10 +97,7 @@ public:
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue ); vkQueueWaitIdle( queue );
Magic magic; auto item = Profiler::QueueSerial();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuNewContext ); MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu ); MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu ); MemWrite( &item->gpuNewContext.gpuTime, tgpu );
@ -108,12 +105,10 @@ public:
MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context ); MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
#endif #endif
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
} }
@ -156,18 +151,14 @@ public:
return; return;
} }
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
for( unsigned int idx=0; idx<cnt; idx++ ) for( unsigned int idx=0; idx<cnt; idx++ )
{ {
auto item = token->enqueue_begin( magic ); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuTime ); MemWrite( &item->hdr.type, QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, m_res[idx] ); MemWrite( &item->gpuTime.gpuTime, m_res[idx] );
MemWrite( &item->gpuTime.queryId, uint16_t( m_tail + idx ) ); MemWrite( &item->gpuTime.queryId, uint16_t( m_tail + idx ) );
MemWrite( &item->gpuTime.context, m_context ); MemWrite( &item->gpuTime.context, m_context );
tail.store( magic + 1, std::memory_order_release ); Profiler::QueueSerialFinish();
} }
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
@ -218,18 +209,14 @@ public:
const auto queryId = ctx->NextQueryId(); const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId ); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
Magic magic; auto item = Profiler::QueueSerial();
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBegin ); MemWrite( &item->hdr.type, QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, thread ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
tail.store( magic + 1, std::memory_order_release ); Profiler::QueueSerialFinish();
} }
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth ) tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth )
@ -246,18 +233,14 @@ public:
const auto queryId = ctx->NextQueryId(); const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId ); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
Magic magic; auto item = Profiler::QueueSerial();
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack ); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, thread ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
tail.store( magic + 1, std::memory_order_release ); Profiler::QueueSerialFinish();
GetProfiler().SendCallstack( depth ); GetProfiler().SendCallstack( depth );
} }
@ -270,15 +253,12 @@ public:
const auto queryId = m_ctx->NextQueryId(); const auto queryId = m_ctx->NextQueryId();
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_ctx->m_query, queryId ); vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_ctx->m_query, queryId );
Magic magic; auto item = Profiler::QueueSerial();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneEnd ); MemWrite( &item->hdr.type, QueueType::GpuZoneEnd );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() ); MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
tail.store( magic + 1, std::memory_order_release ); Profiler::QueueSerialFinish();
} }
private: private: