mirror of
https://github.com/wolfpld/tracy
synced 2025-04-29 20:33:52 +00:00
CPU-side GPU event transfer.
This commit is contained in:
parent
b208df8829
commit
6fcdb924e8
@ -7,17 +7,76 @@
|
|||||||
|
|
||||||
#include "client/TracyProfiler.hpp"
|
#include "client/TracyProfiler.hpp"
|
||||||
|
|
||||||
|
#define TracyGpuZone( ctx, name ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location );
|
||||||
|
#define TracyGpuZoneC( ctx, name, color ) static const tracy::SourceLocation __tracy_gpu_source_location { __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; auto ___tracy_gpu_zone = tracy::detail::__GpuHelper( ctx, name, &__tracy_gpu_source_location );
|
||||||
|
|
||||||
namespace tracy
|
namespace tracy
|
||||||
{
|
{
|
||||||
|
|
||||||
extern std::atomic<uint16_t> s_gpuCtxCounter;
|
extern std::atomic<uint16_t> s_gpuCtxCounter;
|
||||||
|
|
||||||
|
template<int Num> class GpuCtx;
|
||||||
|
|
||||||
|
template<int Num>
|
||||||
|
class __GpuCtxScope
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
tracy_force_inline __GpuCtxScope( GpuCtx<Num>& ctx, const char* name, const SourceLocation* srcloc )
|
||||||
|
: m_ctx( ctx )
|
||||||
|
{
|
||||||
|
glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP );
|
||||||
|
|
||||||
|
Magic magic;
|
||||||
|
auto& token = s_token.ptr;
|
||||||
|
auto& tail = token->get_tail_index();
|
||||||
|
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||||
|
item->hdr.type = QueueType::GpuZoneBegin;
|
||||||
|
item->gpuZoneBegin.cpuTime = Profiler::GetTime();
|
||||||
|
item->gpuZoneBegin.name = (uint64_t)name;
|
||||||
|
item->gpuZoneBegin.srcloc = (uint64_t)srcloc;
|
||||||
|
item->gpuZoneBegin.context = m_ctx.GetId();
|
||||||
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline ~__GpuCtxScope()
|
||||||
|
{
|
||||||
|
glQueryCounter( m_ctx.NextQueryId(), GL_TIMESTAMP );
|
||||||
|
|
||||||
|
Magic magic;
|
||||||
|
auto& token = s_token.ptr;
|
||||||
|
auto& tail = token->get_tail_index();
|
||||||
|
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||||
|
item->hdr.type = QueueType::GpuZoneEnd;
|
||||||
|
item->gpuZoneEnd.cpuTime = Profiler::GetTime();
|
||||||
|
item->gpuZoneEnd.thread = GetThreadHandle();
|
||||||
|
item->gpuZoneEnd.context = m_ctx.GetId();
|
||||||
|
tail.store( magic + 1, std::memory_order_release );
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
GpuCtx<Num>& m_ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace detail
|
||||||
|
{
|
||||||
|
template<int Num>
|
||||||
|
static tracy_force_inline __GpuCtxScope<Num> __GpuHelper( GpuCtx<Num>* ctx, const char* name, const SourceLocation* srcloc )
|
||||||
|
{
|
||||||
|
return ctx->SpawnZone( name, srcloc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<int Num>
|
template<int Num>
|
||||||
class GpuCtx
|
class GpuCtx
|
||||||
{
|
{
|
||||||
|
friend class __GpuCtxScope<Num>;
|
||||||
|
friend __GpuCtxScope<Num> detail::__GpuHelper<Num>( GpuCtx<Num>* ctx, const char* name, const SourceLocation* srcloc );
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GpuCtx()
|
GpuCtx()
|
||||||
: m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) )
|
: m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) )
|
||||||
|
, m_head( 0 )
|
||||||
|
, m_tail( 0 )
|
||||||
{
|
{
|
||||||
glGenQueries( Num, m_query );
|
glGenQueries( Num, m_query );
|
||||||
|
|
||||||
@ -37,8 +96,28 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
tracy_force_inline __GpuCtxScope<Num> SpawnZone( const char* name, const SourceLocation* srcloc )
|
||||||
|
{
|
||||||
|
return __GpuCtxScope<Num>( *this, name, srcloc );
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline unsigned int NextQueryId()
|
||||||
|
{
|
||||||
|
const auto id = m_head;
|
||||||
|
m_head = ( m_head + 1 ) % Num;
|
||||||
|
return m_query[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline uint16_t GetId() const
|
||||||
|
{
|
||||||
|
return m_context;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int m_query[Num];
|
unsigned int m_query[Num];
|
||||||
uint16_t m_context;
|
uint16_t m_context;
|
||||||
|
|
||||||
|
unsigned int m_head;
|
||||||
|
unsigned int m_tail;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,8 @@ enum class QueueType : uint8_t
|
|||||||
Message,
|
Message,
|
||||||
MessageLiteral,
|
MessageLiteral,
|
||||||
GpuNewContext,
|
GpuNewContext,
|
||||||
|
GpuZoneBegin,
|
||||||
|
GpuZoneEnd,
|
||||||
NUM_TYPES
|
NUM_TYPES
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -146,6 +148,21 @@ struct QueueGpuNewContext
|
|||||||
uint16_t context;
|
uint16_t context;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct QueueGpuZoneBegin
|
||||||
|
{
|
||||||
|
int64_t cpuTime;
|
||||||
|
uint64_t name;
|
||||||
|
uint64_t srcloc;
|
||||||
|
uint16_t context;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct QueueGpuZoneEnd
|
||||||
|
{
|
||||||
|
int64_t cpuTime;
|
||||||
|
uint64_t thread;
|
||||||
|
uint16_t context;
|
||||||
|
};
|
||||||
|
|
||||||
struct QueueHeader
|
struct QueueHeader
|
||||||
{
|
{
|
||||||
union
|
union
|
||||||
@ -174,6 +191,8 @@ struct QueueItem
|
|||||||
QueuePlotData plotData;
|
QueuePlotData plotData;
|
||||||
QueueMessage message;
|
QueueMessage message;
|
||||||
QueueGpuNewContext gpuNewContext;
|
QueueGpuNewContext gpuNewContext;
|
||||||
|
QueueGpuZoneBegin gpuZoneBegin;
|
||||||
|
QueueGpuZoneEnd gpuZoneEnd;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -204,6 +223,8 @@ static const size_t QueueDataSize[] = {
|
|||||||
sizeof( QueueHeader ) + sizeof( QueueMessage ),
|
sizeof( QueueHeader ) + sizeof( QueueMessage ),
|
||||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
|
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
|
||||||
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
||||||
|
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
|
||||||
|
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
|
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
|
||||||
|
@ -94,6 +94,22 @@ enum { LockEventSize = sizeof( LockEvent ) };
|
|||||||
enum { MaxLockThreads = sizeof( LockEvent::waitList ) * 8 };
|
enum { MaxLockThreads = sizeof( LockEvent::waitList ) * 8 };
|
||||||
static_assert( std::numeric_limits<decltype(LockEvent::lockCount)>::max() >= MaxLockThreads, "Not enough space for lock count." );
|
static_assert( std::numeric_limits<decltype(LockEvent::lockCount)>::max() >= MaxLockThreads, "Not enough space for lock count." );
|
||||||
|
|
||||||
|
|
||||||
|
struct GpuEvent
|
||||||
|
{
|
||||||
|
int64_t cpuStart;
|
||||||
|
int64_t cpuEnd;
|
||||||
|
int64_t gpuStart;
|
||||||
|
int64_t gpuEnd;
|
||||||
|
int32_t srcloc;
|
||||||
|
uint64_t name;
|
||||||
|
uint64_t thread;
|
||||||
|
|
||||||
|
Vector<GpuEvent*> child;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum { GpuEventSize = sizeof( GpuEvent ) };
|
||||||
|
|
||||||
#pragma pack()
|
#pragma pack()
|
||||||
|
|
||||||
|
|
||||||
@ -115,6 +131,9 @@ struct ThreadData
|
|||||||
struct GpuCtxData
|
struct GpuCtxData
|
||||||
{
|
{
|
||||||
int64_t timeDiff;
|
int64_t timeDiff;
|
||||||
|
Vector<GpuEvent*> timeline;
|
||||||
|
Vector<GpuEvent*> stack;
|
||||||
|
Vector<GpuEvent*> queue;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LockMap
|
struct LockMap
|
||||||
|
@ -591,6 +591,12 @@ void View::Process( const QueueItem& ev )
|
|||||||
case QueueType::GpuNewContext:
|
case QueueType::GpuNewContext:
|
||||||
ProcessGpuNewContext( ev.gpuNewContext );
|
ProcessGpuNewContext( ev.gpuNewContext );
|
||||||
break;
|
break;
|
||||||
|
case QueueType::GpuZoneBegin:
|
||||||
|
ProcessGpuZoneBegin( ev.gpuZoneBegin );
|
||||||
|
break;
|
||||||
|
case QueueType::GpuZoneEnd:
|
||||||
|
ProcessGpuZoneEnd( ev.gpuZoneEnd );
|
||||||
|
break;
|
||||||
case QueueType::Terminate:
|
case QueueType::Terminate:
|
||||||
m_terminate = true;
|
m_terminate = true;
|
||||||
break;
|
break;
|
||||||
@ -850,12 +856,58 @@ void View::ProcessMessageLiteral( const QueueMessage& ev )
|
|||||||
void View::ProcessGpuNewContext( const QueueGpuNewContext& ev )
|
void View::ProcessGpuNewContext( const QueueGpuNewContext& ev )
|
||||||
{
|
{
|
||||||
assert( ev.context == m_gpuData.size() );
|
assert( ev.context == m_gpuData.size() );
|
||||||
auto gpu = m_slab.Alloc<GpuCtxData>();
|
auto gpu = m_slab.AllocInit<GpuCtxData>();
|
||||||
gpu->timeDiff = int64_t( ev.cputime * m_timerMul - ev.gputime );
|
gpu->timeDiff = int64_t( ev.cputime * m_timerMul - ev.gputime );
|
||||||
std::lock_guard<std::mutex> lock( m_lock );
|
std::lock_guard<std::mutex> lock( m_lock );
|
||||||
m_gpuData.push_back( gpu );
|
m_gpuData.push_back( gpu );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void View::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev )
|
||||||
|
{
|
||||||
|
assert( m_gpuData.size() >= ev.context );
|
||||||
|
auto ctx = m_gpuData[ev.context];
|
||||||
|
|
||||||
|
CheckString( ev.name );
|
||||||
|
CheckSourceLocation( ev.srcloc );
|
||||||
|
|
||||||
|
auto zone = m_slab.AllocInit<GpuEvent>();
|
||||||
|
zone->cpuStart = ev.cpuTime;
|
||||||
|
zone->cpuEnd = -1;
|
||||||
|
zone->gpuStart = std::numeric_limits<int64_t>::max();
|
||||||
|
zone->gpuEnd = -1;
|
||||||
|
zone->name = ev.name;
|
||||||
|
zone->srcloc = ev.srcloc;
|
||||||
|
zone->thread = 0;
|
||||||
|
|
||||||
|
auto timeline = &ctx->timeline;
|
||||||
|
if( !ctx->stack.empty() )
|
||||||
|
{
|
||||||
|
timeline = &ctx->stack.back()->child;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_lock.lock();
|
||||||
|
timeline->push_back( zone );
|
||||||
|
m_lock.unlock();
|
||||||
|
|
||||||
|
ctx->stack.push_back( zone );
|
||||||
|
ctx->queue.push_back( zone );
|
||||||
|
}
|
||||||
|
|
||||||
|
void View::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev )
|
||||||
|
{
|
||||||
|
assert( m_gpuData.size() >= ev.context );
|
||||||
|
auto ctx = m_gpuData[ev.context];
|
||||||
|
|
||||||
|
assert( !ctx->stack.empty() );
|
||||||
|
auto zone = ctx->stack.back();
|
||||||
|
ctx->stack.pop_back();
|
||||||
|
ctx->queue.push_back( zone );
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock( m_lock );
|
||||||
|
zone->cpuEnd = ev.cpuTime;
|
||||||
|
zone->thread = ev.thread;
|
||||||
|
}
|
||||||
|
|
||||||
void View::CheckString( uint64_t ptr )
|
void View::CheckString( uint64_t ptr )
|
||||||
{
|
{
|
||||||
if( m_strings.find( ptr ) != m_strings.end() ) return;
|
if( m_strings.find( ptr ) != m_strings.end() ) return;
|
||||||
|
@ -68,6 +68,8 @@ private:
|
|||||||
void ProcessMessage( const QueueMessage& ev );
|
void ProcessMessage( const QueueMessage& ev );
|
||||||
void ProcessMessageLiteral( const QueueMessage& ev );
|
void ProcessMessageLiteral( const QueueMessage& ev );
|
||||||
void ProcessGpuNewContext( const QueueGpuNewContext& ev );
|
void ProcessGpuNewContext( const QueueGpuNewContext& ev );
|
||||||
|
void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev );
|
||||||
|
void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev );
|
||||||
|
|
||||||
void CheckString( uint64_t ptr );
|
void CheckString( uint64_t ptr );
|
||||||
void CheckThreadString( uint64_t id );
|
void CheckThreadString( uint64_t id );
|
||||||
|
Loading…
x
Reference in New Issue
Block a user