From 06b23cc2937519a7948e021e81e302bb628cb772 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Thu, 16 May 2024 11:07:56 -0700 Subject: [PATCH] more cleanup --- public/tracy/TracyMetal.hmm | 109 ++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 62 deletions(-) diff --git a/public/tracy/TracyMetal.hmm b/public/tracy/TracyMetal.hmm index 8a573d6c..ec8e210f 100644 --- a/public/tracy/TracyMetal.hmm +++ b/public/tracy/TracyMetal.hmm @@ -55,6 +55,14 @@ using TracyMetalCtx = void*; } while(false); +#define TRACY_METAL_DEBUG_MASK (0) + +#if TRACY_METAL_DEBUG_MASK +#define TracyMetalDebug(mask, ...) if (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } +#else +#define TracyMetalDebug(mask, ...) +#endif + namespace tracy { @@ -66,12 +74,12 @@ class MetalCtx public: MetalCtx(id device) - : m_device(device) + : m_device(device) { ZoneScopedNC("TracyMetalCtx", tracy::Color::Red4); - TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue); - TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample); + TracyMetalDebug(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); + TracyMetalDebug(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); if (m_device == nil) { @@ -106,10 +114,12 @@ public: MTLTimestamp cpuTimestamp = 0; MTLTimestamp gpuTimestamp = 0; [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; - TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp); - TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp); + TracyMetalDebug(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); + TracyMetalDebug(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); + cpuTimestamp = Profiler::GetTime(); - TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp); + TracyMetalDebug(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); + float period = 1.0f; m_contextId = GetGpuCtxCounter().fetch_add(1); @@ -220,7 +230,7 @@ public: TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count); } - //TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count); + TracyMetalDebug(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); NSRange range = NSMakeRange(RingIndex(begin), count); NSData* data = [counterSampleBuffer resolveCounterRange:range]; @@ -244,7 +254,7 @@ public: MTLTimestamp& t_start = timestamps[i+0].timestamp; MTLTimestamp& t_end = timestamps[i+1].timestamp; uint32_t k = RingIndex(begin + i); - //fprintf(stdout, "TracyMetal: Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start)); + TracyMetalDebug(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue)) { TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k); @@ -255,7 +265,7 @@ public: auto checkTime = std::chrono::high_resolution_clock::now(); auto requestTime = m_timestampRequestTime[k]; auto ms_in_flight = std::chrono::duration(checkTime-requestTime).count()*1000.0f; - //TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight); + TracyMetalDebug(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); const float timeout_ms = 200.0f; if (ms_in_flight < timeout_ms) break; @@ -264,8 +274,8 @@ public: t_start = lastValidTimestamp + 5; t_end = t_start + 5; } - TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone"); - TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone"); + TracyMetalDebug(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); + TracyMetalDebug(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); { auto* item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuTime); @@ -283,7 +293,7 @@ public: Profiler::QueueSerialFinish(); } lastValidTimestamp = t_end; - TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId"); + TracyMetalDebug(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); resolved += 2; } ZoneValue(RingCount(begin, m_previousCheckpoint.load())); @@ -328,9 +338,13 @@ private: auto count = RingCount(m_previousCheckpoint, id); if (count >= MaxQueries) { - TracyMetalPanic(, "NextQueryId: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", m_previousCheckpoint.load(), id, count); - // #TODO: return some sentinel value; ideally a "hidden" query index - //return (MaxQueries - n); + // TODO: return a proper (hidden) "sentinel" query + Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 }; + TracyMetalPanic( + return sentinel, + "NextQueryId: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", + m_previousCheckpoint.load(), id, count + ); } uint32_t buffer_idx = (id / MaxQueries) % 2; ZoneValue(buffer_idx); @@ -339,33 +353,11 @@ private: TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id); uint32_t idx = RingIndex(id); ZoneValue(idx); - TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId"); + TracyMetalDebug(1<<1, TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId")); m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); - //if (id >= MaxQueries) - // TracyMetalPanic(, "NextQueryId: %u (%llu)", idx, id); return Query{ buffer, idx }; } - tracy_force_inline unsigned int NextQueryId(int n=1) - { - ZoneScopedNC("TracyMetal::NextQueryId", tracy::Color::LightCoral); - auto id = m_queryCounter.fetch_add(n); - ZoneValue(id); - auto count = RingCount(m_previousCheckpoint, id); - if (count >= MaxQueries) - { - TracyMetalPanic(, "NextQueryId: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", m_previousCheckpoint.load(), id, count); - // #TODO: return some sentinel value; ideally a "hidden" query index - return (MaxQueries - n); - } - uint32_t idx = RingIndex(id); - TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId"); - m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); - //if (id >= MaxQueries) - // TracyMetalPanic(, "NextQueryId: %u (%llu)", idx, id); - return idx; - } - tracy_force_inline uint8_t GetContextId() const { return m_contextId; @@ -399,8 +391,7 @@ private: id counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error]; if (error != nil) { - //NSLog(@"%@", error.localizedDescription); - //NSLog(@"%@", error.localizedFailureReason); + //NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason); TracyMetalPanic(return nil, "ERROR: unable to create sample buffer for timestamp counters : %s | %s", [error.localizedDescription cString], [error.localizedFailureReason cString]); @@ -437,17 +428,16 @@ public: #endif { if ( !m_active ) return; - if (desc == nil) TracyMetalPanic(return, "pass descriptor is nil."); + if (desc == nil) TracyMetalPanic(return, "compute pass descriptor is nil."); m_ctx = ctx; - auto query = m_query = ctx->NextQuery(); + auto& query = m_query = ctx->NextQuery(); desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; - SubmitZoneBeginGpu(ctx, query.idx+0, srcloc); - //SubmitZoneEndGpu(ctx, queryId+1); + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); } tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) @@ -458,17 +448,16 @@ public: #endif { if ( !m_active ) return; - if (desc == nil) TracyMetalPanic(return, "pass descriptor is nil."); + if (desc == nil) TracyMetalPanic(return, "blit pass descriptor is nil."); m_ctx = ctx; - auto query = m_query = ctx->NextQuery(); + auto& query = m_query = ctx->NextQuery(); desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; - SubmitZoneBeginGpu(ctx, query.idx+0, srcloc); - //SubmitZoneEndGpu(ctx, queryId+1); + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); } tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) @@ -479,10 +468,10 @@ public: #endif { if ( !m_active ) return; - if (desc == nil) TracyMetalPanic(return, "pass descriptor is nil."); + if (desc == nil) TracyMetalPanic(return, "render pass descriptor is nil."); m_ctx = ctx; - auto query = m_query = ctx->NextQuery(); + auto& query = m_query = ctx->NextQuery(); desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0; @@ -490,8 +479,7 @@ public: desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample; desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1; - SubmitZoneBeginGpu(ctx, query.idx+0, srcloc); - //SubmitZoneEndGpu(ctx, queryId+1); + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); } #if 0 @@ -506,21 +494,19 @@ public: m_ctx = ctx; m_cmdEncoder = cmdEncoder; - auto queryId = m_queryId = ctx->NextQueryId(); + auto& query = m_query = ctx->NextQueryId(); - [m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:queryId withBarrier:YES]; + [m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES]; - SubmitZoneBeginGpu(ctx, queryId, srcloc); + SubmitZoneBeginGpu(ctx, query.idx, srcloc); } #endif tracy_force_inline ~MetalZoneScope() { if( !m_active ) return; - - auto queryId = m_query.idx + 1; - - SubmitZoneEndGpu(m_ctx, queryId); + + SubmitZoneEndGpu(m_ctx, m_query.idx + 1); } private: @@ -540,7 +526,7 @@ private: MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() ); Profiler::QueueSerialFinish(); - TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"); + TracyMetalDebug(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); } static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId) @@ -553,10 +539,9 @@ private: MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() ); Profiler::QueueSerialFinish(); - TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"); + TracyMetalDebug(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); } -public: MetalCtx::Query m_query = {}; };