diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index 09c50ab8..e307ee3f 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -8,6 +8,8 @@ #define TracyVkZone(x,y) #define TracyVkZoneC(x,y,z) #define TracyVkCollect(x) +#define TracyVkZoneS(x,y,z) +#define TracyVkZoneCS(x,y,z,w) #else @@ -15,6 +17,7 @@ #include #include "Tracy.hpp" #include "client/TracyProfiler.hpp" +#include "client/TracyCallstack.hpp" #define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::s_vkCtx.ptr = (tracy::VkCtx*)tracy::tracy_malloc( sizeof( tracy::VkCtx ) ); new(tracy::s_vkCtx.ptr) tracy::VkCtx( physdev, device, queue, cmdbuf ); #define TracyVkDestroy() tracy::s_vkCtx.ptr->~VkCtx(); tracy::tracy_free( tracy::s_vkCtx.ptr ); tracy::s_vkCtx.ptr = nullptr; @@ -22,6 +25,14 @@ #define TracyVkZoneC( cmdbuf, name, color ) static const tracy::SourceLocation __tracy_gpu_source_location { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope ___tracy_gpu_zone( &__tracy_gpu_source_location, cmdbuf ); #define TracyVkCollect( cmdbuf ) tracy::s_vkCtx.ptr->Collect( cmdbuf ); +#ifdef TRACY_HAS_CALLSTACK +# define TracyVkZoneS( cmdbuf, name, depth ) static const tracy::SourceLocation __tracy_gpu_source_location { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope ___tracy_gpu_zone( &__tracy_gpu_source_location, cmdbuf, depth ); +# define TracyVkZoneCS( cmdbuf, name, color, depth ) static const tracy::SourceLocation __tracy_gpu_source_location { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope ___tracy_gpu_zone( &__tracy_gpu_source_location, cmdbuf, depth ); +#else +# define TracyVkZoneS( cmdbuf, name, depth ) TracyVkZone( cmdbuf, name ) +# define TracyVkZoneCS( cmdbuf, name, color, depth ) TracyVkZoneC( cmdbuf, name, color ) +#endif + namespace tracy { @@ -195,6 +206,28 @@ public: tail.store( magic + 1, std::memory_order_release ); } + tracy_force_inline VkCtxScope( const SourceLocation* srcloc, VkCommandBuffer cmdbuf, int depth ) + : m_cmdbuf( cmdbuf ) + { + const auto thread = GetThreadHandle(); + + auto ctx = s_vkCtx.ptr; + vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, ctx->NextQueryId() ); + + Magic magic; + auto& token = s_token.ptr; + auto& tail = token->get_tail_index(); + auto item = token->enqueue_begin( magic ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, thread ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + tail.store( magic + 1, std::memory_order_release ); + + s_profiler.SendCallstack( depth, thread ); + } + tracy_force_inline ~VkCtxScope() { auto ctx = s_vkCtx.ptr; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index aa7976fb..54889766 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -31,6 +31,7 @@ enum class QueueType : uint8_t MessageLiteral, GpuNewContext, GpuZoneBegin, + GpuZoneBeginCallstack, GpuZoneEnd, GpuTime, GpuResync, @@ -301,6 +302,7 @@ static const size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), sizeof( QueueHeader ) + sizeof( QueueGpuTime ), sizeof( QueueHeader ) + sizeof( QueueGpuResync ),