From a56c47a6a0b36062e3f753325207eba9ad059933 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Mon, 24 Jun 2019 19:19:47 +0200 Subject: [PATCH] Store thread handle in a thread local variable. This saves us a non-inlineable function call. Thread local block is accessed anyway, since we need to get the token, so we already have the pointer and don't need to get it a second time (which is done inside Windows' GetCurrentThreadId()). We also don't need to store the thread id in ScopedZone anymore, as it was a micro-optimization to save us the second GetThreadHandle() call. This change has a measurable effect of reducing enqueue time from ~10 to ~8 ns. A further optimization would be to completely skip thread handle retrieval during zone capture and do it instead on retrieval of data from the queue. Since each thread has its own producer ("token"), the thread handle should be accessible during the dequeue operation. This is a much more invasive change, that would require a) modification of the queue, b) additional processing of dequeued data to inject the thread handle. --- client/TracyScoped.hpp | 14 +++++++------- common/TracySystem.hpp | 8 +++++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/client/TracyScoped.hpp b/client/TracyScoped.hpp index 9048911f..4578ab18 100644 --- a/client/TracyScoped.hpp +++ b/client/TracyScoped.hpp @@ -24,9 +24,8 @@ public: #endif { if( !m_active ) return; - const auto thread = GetThreadHandle(); - m_thread = thread; Magic magic; + const auto thread = GetThreadHandle(); auto token = GetToken(); auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); @@ -53,7 +52,6 @@ public: { if( !m_active ) return; const auto thread = GetThreadHandle(); - m_thread = thread; Magic magic; auto token = GetToken(); auto& tail = token->get_tail_index(); @@ -80,6 +78,7 @@ public: if( GetProfiler().ConnectionId() != m_connectionId ) return; #endif Magic magic; + const auto thread = GetThreadHandle(); auto token = GetToken(); auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); @@ -91,7 +90,7 @@ public: MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) ); MemWrite( &item->zoneEnd.cpu, cpu ); #endif - MemWrite( &item->zoneEnd.thread, m_thread ); + MemWrite( &item->zoneEnd.thread, thread ); tail.store( magic + 1, std::memory_order_release ); } @@ -102,6 +101,7 @@ public: if( GetProfiler().ConnectionId() != m_connectionId ) return; #endif Magic magic; + const auto thread = GetThreadHandle(); auto token = GetToken(); auto ptr = (char*)tracy_malloc( size+1 ); memcpy( ptr, txt, size ); @@ -109,7 +109,7 @@ public: auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); MemWrite( &item->hdr.type, QueueType::ZoneText ); - MemWrite( &item->zoneText.thread, m_thread ); + MemWrite( &item->zoneText.thread, thread ); MemWrite( &item->zoneText.text, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); } @@ -121,6 +121,7 @@ public: if( GetProfiler().ConnectionId() != m_connectionId ) return; #endif Magic magic; + const auto thread = GetThreadHandle(); auto token = GetToken(); auto ptr = (char*)tracy_malloc( size+1 ); memcpy( ptr, txt, size ); @@ -128,13 +129,12 @@ public: auto& tail = token->get_tail_index(); auto item = token->enqueue_begin( magic ); MemWrite( &item->hdr.type, QueueType::ZoneName ); - MemWrite( &item->zoneText.thread, m_thread ); + MemWrite( &item->zoneText.thread, thread ); MemWrite( &item->zoneText.text, (uint64_t)ptr ); tail.store( magic + 1, std::memory_order_release ); } private: - uint64_t m_thread; const bool m_active; #ifdef TRACY_ON_DEMAND diff --git a/common/TracySystem.hpp b/common/TracySystem.hpp index 26e57950..db8c61a6 100644 --- a/common/TracySystem.hpp +++ b/common/TracySystem.hpp @@ -24,7 +24,7 @@ namespace tracy { #ifdef TRACY_ENABLE -static inline uint64_t GetThreadHandle() +static inline uint64_t GetThreadHandleImpl() { #ifdef _WIN32 static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" ); @@ -39,6 +39,12 @@ static inline uint64_t GetThreadHandle() #endif } +const thread_local auto s_threadHandle = GetThreadHandleImpl(); + +static inline uint64_t GetThreadHandle() +{ + return s_threadHandle; +} #endif void SetThreadName( std::thread& thread, const char* name );