mirror of
https://github.com/wolfpld/tracy
synced 2025-04-29 20:33:52 +00:00
Use the fastest mutex available.
The selection is based on the following test results: MSVC: === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.641 ns/iter 2 thread contention: 141.559 ns/iter 3 thread contention: 242.733 ns/iter 4 thread contention: 409.807 ns/iter 5 thread contention: 561.544 ns/iter 6 thread contention: 785.845 ns/iter => std::mutex No contention: 19.190 ns/iter 2 thread contention: 39.305 ns/iter 3 thread contention: 58.999 ns/iter 4 thread contention: 59.532 ns/iter 5 thread contention: 103.539 ns/iter 6 thread contention: 110.314 ns/iter => std::shared_timed_mutex No contention: 45.487 ns/iter 2 thread contention: 96.351 ns/iter 3 thread contention: 142.871 ns/iter 4 thread contention: 184.999 ns/iter 5 thread contention: 336.608 ns/iter 6 thread contention: 542.551 ns/iter => std::shared_mutex No contention: 10.861 ns/iter 2 thread contention: 17.495 ns/iter 3 thread contention: 31.126 ns/iter 4 thread contention: 40.468 ns/iter 5 thread contention: 15.677 ns/iter 6 thread contention: 64.505 ns/iter Cygwin (clang): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 11.536 ns/iter 2 thread contention: 121.082 ns/iter 3 thread contention: 396.430 ns/iter 4 thread contention: 672.555 ns/iter 5 thread contention: 1327.761 ns/iter 6 thread contention: 14151.955 ns/iter => std::mutex No contention: 62.583 ns/iter 2 thread contention: 3990.464 ns/iter 3 thread contention: 7161.189 ns/iter 4 thread contention: 9870.820 ns/iter 5 thread contention: 12355.178 ns/iter 6 thread contention: 14694.903 ns/iter => std::shared_timed_mutex No contention: 91.687 ns/iter 2 thread contention: 1115.037 ns/iter 3 thread contention: 4183.792 ns/iter 4 thread contention: 15283.491 ns/iter 5 thread contention: 27812.477 ns/iter 6 thread contention: 35028.140 ns/iter => std::shared_mutex No contention: 91.764 ns/iter 2 thread contention: 1051.826 ns/iter 3 thread contention: 5574.720 ns/iter 4 thread contention: 15721.416 ns/iter 5 thread contention: 27721.487 ns/iter 6 thread contention: 35420.404 ns/iter Linux (x64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 13.487 ns/iter 2 thread contention: 210.317 ns/iter 3 thread contention: 430.855 ns/iter 4 thread contention: 510.533 ns/iter 5 thread contention: 1003.609 ns/iter 6 thread contention: 1787.683 ns/iter => std::mutex No contention: 12.403 ns/iter 2 thread contention: 157.122 ns/iter 3 thread contention: 186.791 ns/iter 4 thread contention: 265.073 ns/iter 5 thread contention: 283.778 ns/iter 6 thread contention: 270.687 ns/iter => std::shared_timed_mutex No contention: 21.509 ns/iter 2 thread contention: 150.179 ns/iter 3 thread contention: 256.574 ns/iter 4 thread contention: 415.351 ns/iter 5 thread contention: 611.532 ns/iter 6 thread contention: 944.695 ns/iter => std::shared_mutex No contention: 20.805 ns/iter 2 thread contention: 157.034 ns/iter 3 thread contention: 244.025 ns/iter 4 thread contention: 406.269 ns/iter 5 thread contention: 387.985 ns/iter 6 thread contention: 468.550 ns/iter Linux (arm64): === Lock test, 6 threads === => NonRecursiveBenaphore No contention: 20.891 ns/iter 2 thread contention: 211.037 ns/iter 3 thread contention: 409.962 ns/iter 4 thread contention: 657.441 ns/iter 5 thread contention: 828.405 ns/iter 6 thread contention: 1131.827 ns/iter => std::mutex No contention: 50.884 ns/iter 2 thread contention: 103.620 ns/iter 3 thread contention: 332.429 ns/iter 4 thread contention: 620.802 ns/iter 5 thread contention: 783.943 ns/iter 6 thread contention: 834.002 ns/iter => std::shared_timed_mutex No contention: 64.948 ns/iter 2 thread contention: 173.191 ns/iter 3 thread contention: 490.352 ns/iter 4 thread contention: 660.668 ns/iter 5 thread contention: 1014.546 ns/iter 6 thread contention: 1451.553 ns/iter => std::shared_mutex No contention: 64.521 ns/iter 2 thread contention: 195.222 ns/iter 3 thread contention: 490.819 ns/iter 4 thread contention: 654.786 ns/iter 5 thread contention: 955.759 ns/iter 6 thread contention: 1282.544 ns/iter
This commit is contained in:
parent
a26ab263dd
commit
561d2dc360
@ -472,7 +472,7 @@ void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
|
|||||||
for( size_t i=0; i<sz; i++ ) FreeAssociatedMemory( m_itemBuf[i] );
|
for( size_t i=0; i<sz; i++ ) FreeAssociatedMemory( m_itemBuf[i] );
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_serialLock );
|
std::lock_guard<TracyMutex> lock( m_serialLock );
|
||||||
|
|
||||||
for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v );
|
for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v );
|
||||||
m_serialDequeue.clear();
|
m_serialDequeue.clear();
|
||||||
@ -536,7 +536,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
|
|||||||
Profiler::DequeueStatus Profiler::DequeueSerial()
|
Profiler::DequeueStatus Profiler::DequeueSerial()
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_serialLock );
|
std::lock_guard<TracyMutex> lock( m_serialLock );
|
||||||
m_serialQueue.swap( m_serialDequeue );
|
m_serialQueue.swap( m_serialDequeue );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,10 +10,10 @@
|
|||||||
#include "TracyCallstack.hpp"
|
#include "TracyCallstack.hpp"
|
||||||
#include "TracyFastVector.hpp"
|
#include "TracyFastVector.hpp"
|
||||||
#include "../common/tracy_lz4.hpp"
|
#include "../common/tracy_lz4.hpp"
|
||||||
#include "../common/tracy_benaphore.h"
|
|
||||||
#include "../common/TracyQueue.hpp"
|
#include "../common/TracyQueue.hpp"
|
||||||
#include "../common/TracyAlign.hpp"
|
#include "../common/TracyAlign.hpp"
|
||||||
#include "../common/TracyAlloc.hpp"
|
#include "../common/TracyAlloc.hpp"
|
||||||
|
#include "../common/TracyMutex.hpp"
|
||||||
#include "../common/TracySystem.hpp"
|
#include "../common/TracySystem.hpp"
|
||||||
|
|
||||||
#if defined _MSC_VER || defined __CYGWIN__
|
#if defined _MSC_VER || defined __CYGWIN__
|
||||||
@ -412,13 +412,13 @@ private:
|
|||||||
char* m_lz4Buf;
|
char* m_lz4Buf;
|
||||||
|
|
||||||
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
|
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
|
||||||
NonRecursiveBenaphore m_serialLock;
|
TracyMutex m_serialLock;
|
||||||
|
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
std::atomic<bool> m_isConnected;
|
std::atomic<bool> m_isConnected;
|
||||||
std::atomic<uint64_t> m_frameCount;
|
std::atomic<uint64_t> m_frameCount;
|
||||||
|
|
||||||
NonRecursiveBenaphore m_deferredLock;
|
TracyMutex m_deferredLock;
|
||||||
FastVector<QueueItem> m_deferredQueue;
|
FastVector<QueueItem> m_deferredQueue;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
33
common/TracyMutex.hpp
Normal file
33
common/TracyMutex.hpp
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#ifndef __TRACYMUTEX_HPP__
|
||||||
|
#define __TRACYMUTEX_HPP__
|
||||||
|
|
||||||
|
#if defined _MSC_VER
|
||||||
|
|
||||||
|
# include <shared_mutex>
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
using TracyMutex = std::shared_mutex;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined __CYGWIN__
|
||||||
|
|
||||||
|
#include "tracy_benaphore.h"
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
using TracyMutex = NonRecursiveBenaphore;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
using TracyMutex = std::mutex;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
@ -9,6 +9,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include "../common/TracyMutex.hpp"
|
||||||
#include "../common/TracySystem.hpp"
|
#include "../common/TracySystem.hpp"
|
||||||
#include "tracy_pdqsort.h"
|
#include "tracy_pdqsort.h"
|
||||||
#include "TracyBadVersion.hpp"
|
#include "TracyBadVersion.hpp"
|
||||||
@ -364,7 +365,7 @@ bool View::DrawImpl()
|
|||||||
keepOpenPtr = &keepOpen;
|
keepOpenPtr = &keepOpen;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetDataLock() );
|
std::lock_guard<TracyMutex> lock( m_worker.GetDataLock() );
|
||||||
char tmp[2048];
|
char tmp[2048];
|
||||||
sprintf( tmp, "%s###Profiler", m_worker.GetCaptureName().c_str() );
|
sprintf( tmp, "%s###Profiler", m_worker.GetCaptureName().c_str() );
|
||||||
ImGui::SetNextWindowSize( ImVec2( 1550, 800 ), ImGuiCond_FirstUseEver );
|
ImGui::SetNextWindowSize( ImVec2( 1550, 800 ), ImGuiCond_FirstUseEver );
|
||||||
@ -437,7 +438,7 @@ void View::DrawConnection()
|
|||||||
const auto cs = ty * 0.9f;
|
const auto cs = ty * 0.9f;
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetMbpsDataLock() );
|
std::lock_guard<TracyMutex> lock( m_worker.GetMbpsDataLock() );
|
||||||
ImGui::Begin( m_worker.GetAddr().c_str(), nullptr, ImGuiWindowFlags_AlwaysAutoResize );
|
ImGui::Begin( m_worker.GetAddr().c_str(), nullptr, ImGuiWindowFlags_AlwaysAutoResize );
|
||||||
const auto& mbpsVector = m_worker.GetMbpsData();
|
const auto& mbpsVector = m_worker.GetMbpsData();
|
||||||
const auto mbps = mbpsVector.back();
|
const auto mbps = mbpsVector.back();
|
||||||
@ -461,7 +462,7 @@ void View::DrawConnection()
|
|||||||
const auto wpos = ImGui::GetWindowPos() + ImGui::GetWindowContentRegionMin();
|
const auto wpos = ImGui::GetWindowPos() + ImGui::GetWindowContentRegionMin();
|
||||||
ImGui::GetWindowDrawList()->AddCircleFilled( wpos + ImVec2( 1 + cs * 0.5, 3 + ty * 0.5 ), cs * 0.5, m_worker.IsConnected() ? 0xFF2222CC : 0xFF444444, 10 );
|
ImGui::GetWindowDrawList()->AddCircleFilled( wpos + ImVec2( 1 + cs * 0.5, 3 + ty * 0.5 ), cs * 0.5, m_worker.IsConnected() ? 0xFF2222CC : 0xFF444444, 10 );
|
||||||
|
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_worker.GetDataLock() );
|
std::lock_guard<TracyMutex> lock( m_worker.GetDataLock() );
|
||||||
{
|
{
|
||||||
const auto sz = m_worker.GetFrameCount();
|
const auto sz = m_worker.GetFrameCount();
|
||||||
if( sz > 1 )
|
if( sz > 1 )
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "../common/tracy_benaphore.h"
|
|
||||||
#include "TracyVector.hpp"
|
#include "TracyVector.hpp"
|
||||||
#include "TracyWorker.hpp"
|
#include "TracyWorker.hpp"
|
||||||
#include "tracy_flat_hash_map.hpp"
|
#include "tracy_flat_hash_map.hpp"
|
||||||
|
@ -501,7 +501,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
|
|||||||
std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->start < rhs.zone->start; } );
|
std::sort( std::execution::par_unseq, zones.begin(), zones.end(), []( const auto& lhs, const auto& rhs ) { return lhs.zone->start < rhs.zone->start; } );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
|
std::lock_guard<TracyMutex> lock( m_data.lock );
|
||||||
m_data.sourceLocationZonesReady = true;
|
m_data.sourceLocationZonesReady = true;
|
||||||
} );
|
} );
|
||||||
#endif
|
#endif
|
||||||
@ -1031,7 +1031,7 @@ void Worker::Exec()
|
|||||||
const char* end = buf + sz;
|
const char* end = buf + sz;
|
||||||
|
|
||||||
{
|
{
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
|
std::lock_guard<TracyMutex> lock( m_data.lock );
|
||||||
while( ptr < end )
|
while( ptr < end )
|
||||||
{
|
{
|
||||||
auto ev = (const QueueItem*)ptr;
|
auto ev = (const QueueItem*)ptr;
|
||||||
@ -1049,7 +1049,7 @@ void Worker::Exec()
|
|||||||
enum { MbpsUpdateTime = 200 };
|
enum { MbpsUpdateTime = 200 };
|
||||||
if( td > MbpsUpdateTime )
|
if( td > MbpsUpdateTime )
|
||||||
{
|
{
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_mbpsData.lock );
|
std::lock_guard<TracyMutex> lock( m_mbpsData.lock );
|
||||||
m_mbpsData.mbps.erase( m_mbpsData.mbps.begin() );
|
m_mbpsData.mbps.erase( m_mbpsData.mbps.begin() );
|
||||||
m_mbpsData.mbps.emplace_back( bytes / ( td * 125.f ) );
|
m_mbpsData.mbps.emplace_back( bytes / ( td * 125.f ) );
|
||||||
m_mbpsData.compRatio = float( bytes ) / decBytes;
|
m_mbpsData.compRatio = float( bytes ) / decBytes;
|
||||||
@ -2331,7 +2331,7 @@ void Worker::ReconstructMemAllocPlot()
|
|||||||
|
|
||||||
PlotData* plot;
|
PlotData* plot;
|
||||||
{
|
{
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
|
std::lock_guard<TracyMutex> lock( m_data.lock );
|
||||||
plot = m_slab.AllocInit<PlotData>();
|
plot = m_slab.AllocInit<PlotData>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2413,7 +2413,7 @@ void Worker::ReconstructMemAllocPlot()
|
|||||||
plot->min = 0;
|
plot->min = 0;
|
||||||
plot->max = max;
|
plot->max = max;
|
||||||
|
|
||||||
std::lock_guard<NonRecursiveBenaphore> lock( m_data.lock );
|
std::lock_guard<TracyMutex> lock( m_data.lock );
|
||||||
m_data.plots.insert( m_data.plots.begin(), plot );
|
m_data.plots.insert( m_data.plots.begin(), plot );
|
||||||
m_data.memory.plot = plot;
|
m_data.memory.plot = plot;
|
||||||
}
|
}
|
||||||
|
@ -9,9 +9,9 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "../common/tracy_benaphore.h"
|
|
||||||
#include "../common/tracy_lz4.hpp"
|
#include "../common/tracy_lz4.hpp"
|
||||||
#include "../common/TracyForceInline.hpp"
|
#include "../common/TracyForceInline.hpp"
|
||||||
|
#include "../common/TracyMutex.hpp"
|
||||||
#include "../common/TracyQueue.hpp"
|
#include "../common/TracyQueue.hpp"
|
||||||
#include "../common/TracySocket.hpp"
|
#include "../common/TracySocket.hpp"
|
||||||
#include "tracy_flat_hash_map.hpp"
|
#include "tracy_flat_hash_map.hpp"
|
||||||
@ -75,7 +75,7 @@ class Worker
|
|||||||
{
|
{
|
||||||
DataBlock() : zonesCnt( 0 ), lastTime( 0 ), frameOffset( 0 ), threadLast( std::numeric_limits<uint64_t>::max(), 0 ) {}
|
DataBlock() : zonesCnt( 0 ), lastTime( 0 ), frameOffset( 0 ), threadLast( std::numeric_limits<uint64_t>::max(), 0 ) {}
|
||||||
|
|
||||||
NonRecursiveBenaphore lock;
|
TracyMutex lock;
|
||||||
Vector<int64_t> frames;
|
Vector<int64_t> frames;
|
||||||
Vector<GpuCtxData*> gpuData;
|
Vector<GpuCtxData*> gpuData;
|
||||||
Vector<MessageData*> messages;
|
Vector<MessageData*> messages;
|
||||||
@ -115,7 +115,7 @@ class Worker
|
|||||||
{
|
{
|
||||||
MbpsBlock() : mbps( 64 ), compRatio( 1.0 ) {}
|
MbpsBlock() : mbps( 64 ), compRatio( 1.0 ) {}
|
||||||
|
|
||||||
NonRecursiveBenaphore lock;
|
TracyMutex lock;
|
||||||
std::vector<float> mbps;
|
std::vector<float> mbps;
|
||||||
float compRatio;
|
float compRatio;
|
||||||
};
|
};
|
||||||
@ -146,7 +146,7 @@ public:
|
|||||||
int64_t GetDelay() const { return m_delay; }
|
int64_t GetDelay() const { return m_delay; }
|
||||||
int64_t GetResolution() const { return m_resolution; }
|
int64_t GetResolution() const { return m_resolution; }
|
||||||
|
|
||||||
NonRecursiveBenaphore& GetDataLock() { return m_data.lock; }
|
TracyMutex& GetDataLock() { return m_data.lock; }
|
||||||
size_t GetFrameCount() const { return m_data.frames.size(); }
|
size_t GetFrameCount() const { return m_data.frames.size(); }
|
||||||
int64_t GetLastTime() const { return m_data.lastTime; }
|
int64_t GetLastTime() const { return m_data.lastTime; }
|
||||||
uint64_t GetZoneCount() const { return m_data.zonesCnt; }
|
uint64_t GetZoneCount() const { return m_data.zonesCnt; }
|
||||||
@ -202,7 +202,7 @@ public:
|
|||||||
}
|
}
|
||||||
tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const { assert( thread < m_data.threadExpand.size() ); return m_data.threadExpand[thread]; }
|
tracy_force_inline uint64_t DecompressThread( uint16_t thread ) const { assert( thread < m_data.threadExpand.size() ); return m_data.threadExpand[thread]; }
|
||||||
|
|
||||||
NonRecursiveBenaphore& GetMbpsDataLock() { return m_mbpsData.lock; }
|
TracyMutex& GetMbpsDataLock() { return m_mbpsData.lock; }
|
||||||
const std::vector<float>& GetMbpsData() const { return m_mbpsData.mbps; }
|
const std::vector<float>& GetMbpsData() const { return m_mbpsData.mbps; }
|
||||||
float GetCompRatio() const { return m_mbpsData.compRatio; }
|
float GetCompRatio() const { return m_mbpsData.compRatio; }
|
||||||
|
|
||||||
|
@ -112,6 +112,7 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
|
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
|
||||||
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
|
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
|
||||||
|
<ClInclude Include="..\..\..\common\TracyMutex.hpp" />
|
||||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
|
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
|
||||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
|
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
|
||||||
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
|
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
|
||||||
|
@ -179,6 +179,9 @@
|
|||||||
<ClInclude Include="..\..\..\server\TracyVarArray.hpp">
|
<ClInclude Include="..\..\..\server\TracyVarArray.hpp">
|
||||||
<Filter>server</Filter>
|
<Filter>server</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="..\..\..\common\TracyMutex.hpp">
|
||||||
|
<Filter>common</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Natvis Include="DebugVis.natvis" />
|
<Natvis Include="DebugVis.natvis" />
|
||||||
|
Loading…
x
Reference in New Issue
Block a user