From 561d2dc360254b30a7b7a86a2e24ee4436eb01df Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sat, 14 Jul 2018 00:39:01 +0200 Subject: [PATCH] Use the fastest mutex available. The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter --- client/TracyProfiler.cpp | 4 +-- client/TracyProfiler.hpp | 6 ++-- common/TracyMutex.hpp | 33 ++++++++++++++++++++ server/TracyView.cpp | 7 +++-- server/TracyView.hpp | 1 - server/TracyWorker.cpp | 10 +++--- server/TracyWorker.hpp | 10 +++--- standalone/build/win32/Tracy.vcxproj | 1 + standalone/build/win32/Tracy.vcxproj.filters | 3 ++ 9 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 common/TracyMutex.hpp diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index 08167163..4b0357e6 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -472,7 +472,7 @@ void Profiler::ClearQueues( moodycamel::ConsumerToken& token ) for( size_t i=0; i lock( m_serialLock ); + std::lock_guard lock( m_serialLock ); for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v ); m_serialDequeue.clear(); @@ -536,7 +536,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) Profiler::DequeueStatus Profiler::DequeueSerial() { { - std::lock_guard lock( m_serialLock ); + std::lock_guard lock( m_serialLock ); m_serialQueue.swap( m_serialDequeue ); } diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index ab75b856..15d6b93b 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -10,10 +10,10 @@ #include "TracyCallstack.hpp" #include "TracyFastVector.hpp" #include "../common/tracy_lz4.hpp" -#include "../common/tracy_benaphore.h" #include "../common/TracyQueue.hpp" #include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" +#include "../common/TracyMutex.hpp" #include "../common/TracySystem.hpp" #if defined _MSC_VER || defined __CYGWIN__ @@ -412,13 +412,13 @@ private: char* m_lz4Buf; FastVector m_serialQueue, m_serialDequeue; - NonRecursiveBenaphore m_serialLock; + TracyMutex m_serialLock; #ifdef TRACY_ON_DEMAND std::atomic m_isConnected; std::atomic m_frameCount; - NonRecursiveBenaphore m_deferredLock; + TracyMutex m_deferredLock; FastVector m_deferredQueue; #endif }; diff --git a/common/TracyMutex.hpp b/common/TracyMutex.hpp new file mode 100644 index 00000000..e5899048 --- /dev/null +++ b/common/TracyMutex.hpp @@ -0,0 +1,33 @@ +#ifndef __TRACYMUTEX_HPP__ +#define __TRACYMUTEX_HPP__ + +#if defined _MSC_VER + +# include + +namespace tracy +{ +using TracyMutex = std::shared_mutex; +} + +#elif defined __CYGWIN__ + +#include "tracy_benaphore.h" + +namespace tracy +{ +using TracyMutex = NonRecursiveBenaphore; +} + +#else + +#include + +namespace tracy +{ +using TracyMutex = std::mutex; +} + +#endif + +#endif diff --git a/server/TracyView.cpp b/server/TracyView.cpp index ea3b086b..8bd5a927 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -9,6 +9,7 @@ #include #include +#include "../common/TracyMutex.hpp" #include "../common/TracySystem.hpp" #include "tracy_pdqsort.h" #include "TracyBadVersion.hpp" @@ -364,7 +365,7 @@ bool View::DrawImpl() keepOpenPtr = &keepOpen; } - std::lock_guard lock( m_worker.GetDataLock() ); + std::lock_guard lock( m_worker.GetDataLock() ); char tmp[2048]; sprintf( tmp, "%s###Profiler", m_worker.GetCaptureName().c_str() ); ImGui::SetNextWindowSize( ImVec2( 1550, 800 ), ImGuiCond_FirstUseEver ); @@ -437,7 +438,7 @@ void View::DrawConnection() const auto cs = ty * 0.9f; { - std::lock_guard lock( m_worker.GetMbpsDataLock() ); + std::lock_guard lock( m_worker.GetMbpsDataLock() ); ImGui::Begin( m_worker.GetAddr().c_str(), nullptr, ImGuiWindowFlags_AlwaysAutoResize ); const auto& mbpsVector = m_worker.GetMbpsData(); const auto mbps = mbpsVector.back(); @@ -461,7 +462,7 @@ void View::DrawConnection() const auto wpos = ImGui::GetWindowPos() + ImGui::GetWindowContentRegionMin(); ImGui::GetWindowDrawList()->AddCircleFilled( wpos + ImVec2( 1 + cs * 0.5, 3 + ty * 0.5 ), cs * 0.5, m_worker.IsConnected() ? 0xFF2222CC : 0xFF444444, 10 ); - std::lock_guard lock( m_worker.GetDataLock() ); + std::lock_guard lock( m_worker.GetDataLock() ); { const auto sz = m_worker.GetFrameCount(); if( sz > 1 ) diff --git a/server/TracyView.hpp b/server/TracyView.hpp index 9a873145..b08f9e89 100644 --- a/server/TracyView.hpp +++ b/server/TracyView.hpp @@ -8,7 +8,6 @@ #include #include -#include "../common/tracy_benaphore.h" #include "TracyVector.hpp" #include "TracyWorker.hpp" #include "tracy_flat_hash_map.hpp" diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index c0007220..b2414272 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -501,7 +501,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask ) std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->start < rhs.zone->start; } ); #endif } - std::lock_guard lock( m_data.lock ); + std::lock_guard lock( m_data.lock ); m_data.sourceLocationZonesReady = true; } ); #endif @@ -1031,7 +1031,7 @@ void Worker::Exec() const char* end = buf + sz; { - std::lock_guard lock( m_data.lock ); + std::lock_guard lock( m_data.lock ); while( ptr < end ) { auto ev = (const QueueItem*)ptr; @@ -1049,7 +1049,7 @@ void Worker::Exec() enum { MbpsUpdateTime = 200 }; if( td > MbpsUpdateTime ) { - std::lock_guard lock( m_mbpsData.lock ); + std::lock_guard lock( m_mbpsData.lock ); m_mbpsData.mbps.erase( m_mbpsData.mbps.begin() ); m_mbpsData.mbps.emplace_back( bytes / ( td * 125.f ) ); m_mbpsData.compRatio = float( bytes ) / decBytes; @@ -2331,7 +2331,7 @@ void Worker::ReconstructMemAllocPlot() PlotData* plot; { - std::lock_guard lock( m_data.lock ); + std::lock_guard lock( m_data.lock ); plot = m_slab.AllocInit(); } @@ -2413,7 +2413,7 @@ void Worker::ReconstructMemAllocPlot() plot->min = 0; plot->max = max; - std::lock_guard lock( m_data.lock ); + std::lock_guard lock( m_data.lock ); m_data.plots.insert( m_data.plots.begin(), plot ); m_data.memory.plot = plot; } diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 6f3bda87..ae245816 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -9,9 +9,9 @@ #include #include -#include "../common/tracy_benaphore.h" #include "../common/tracy_lz4.hpp" #include "../common/TracyForceInline.hpp" +#include "../common/TracyMutex.hpp" #include "../common/TracyQueue.hpp" #include "../common/TracySocket.hpp" #include "tracy_flat_hash_map.hpp" @@ -75,7 +75,7 @@ class Worker { DataBlock() : zonesCnt( 0 ), lastTime( 0 ), frameOffset( 0 ), threadLast( std::numeric_limits::max(), 0 ) {} - NonRecursiveBenaphore lock; + TracyMutex lock; Vector frames; Vector gpuData; Vector messages; @@ -115,7 +115,7 @@ class Worker { MbpsBlock() : mbps( 64 ), compRatio( 1.0 ) {} - NonRecursiveBenaphore lock; + TracyMutex lock; std::vector mbps; float compRatio; }; @@ -146,7 +146,7 @@ public: int64_t GetDelay() const { return m_delay; } int64_t GetResolution() const { return m_resolution; } - NonRecursiveBenaphore& GetDataLock() { return m_data.lock; } + TracyMutex& GetDataLock() { return m_data.lock; } size_t GetFrameCount() const { return m_data.frames.size(); } int64_t GetLastTime() const { return m_data.lastTime; } uint64_t GetZoneCount() const { return m_data.zonesCnt; } @@ -202,7 +202,7 @@ public: } tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const { assert( thread < m_data.threadExpand.size() ); return m_data.threadExpand[thread]; } - NonRecursiveBenaphore& GetMbpsDataLock() { return m_mbpsData.lock; } + TracyMutex& GetMbpsDataLock() { return m_mbpsData.lock; } const std::vector& GetMbpsData() const { return m_mbpsData.mbps; } float GetCompRatio() const { return m_mbpsData.compRatio; } diff --git a/standalone/build/win32/Tracy.vcxproj b/standalone/build/win32/Tracy.vcxproj index 51a7f317..457921cc 100644 --- a/standalone/build/win32/Tracy.vcxproj +++ b/standalone/build/win32/Tracy.vcxproj @@ -112,6 +112,7 @@ + diff --git a/standalone/build/win32/Tracy.vcxproj.filters b/standalone/build/win32/Tracy.vcxproj.filters index 051bdd51..cac58a1c 100644 --- a/standalone/build/win32/Tracy.vcxproj.filters +++ b/standalone/build/win32/Tracy.vcxproj.filters @@ -179,6 +179,9 @@ server + + common +