diff --git a/.appveyor.yml b/.appveyor.yml index b68f34b0..f9d57c63 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -15,11 +15,13 @@ build_script: - cmd: msbuild .\update\build\win32\update.vcxproj - cmd: msbuild .\profiler\build\win32\Tracy.vcxproj - cmd: msbuild .\capture\build\win32\capture.vcxproj + - cmd: msbuild .\csvexport\build\win32\csvexport.vcxproj - cmd: msbuild .\library\win32\TracyProfiler.vcxproj /property:Configuration=Release - sh: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk2.0-dev libcapstone-dev - sh: make -C update/build/unix debug release - sh: make -C profiler/build/unix debug release - sh: make -C capture/build/unix debug release + - sh: make -C csvexport/build/unix debug release - sh: make -C library/unix debug release - sh: make -C test - sh: make -C test clean diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 1d6a952f..7a71bfef 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -29,6 +29,8 @@ jobs: run: make -j -C update/build/unix debug release - name: Capture utility run: make -j -C capture/build/unix debug release + - name: Csvexport utility + run: make -j -C csvexport/build/unix debug release - name: Import-chrome utility run: make -j -C import-chrome/build/unix debug release - name: Library diff --git a/.github/workflows/msvc.yml b/.github/workflows/msvc.yml index 60734a80..8df3bc17 100644 --- a/.github/workflows/msvc.yml +++ b/.github/workflows/msvc.yml @@ -30,6 +30,10 @@ jobs: run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Debug /property:Platform=x64 - name: Capture utility Release run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64 + - name: Csvexport utility Debug + run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Debug /property:Platform=x64 + - name: Csvexport utility Release + run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Release /property:Platform=x64 - name: Import-chrome utility Debug run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Debug /property:Platform=x64 - name: Import-chrome utility Release diff --git a/AUTHORS b/AUTHORS index 1d06c4b5..52c7654b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,4 +10,5 @@ Michał Cichoń (OSX call stack decoding b Thales Sabino (OpenCL support) Andrew Depke (Direct3D 12 support) Simonas Kazlauskas (OSX CI, external bindings) +Jakub Žádník (csvexport utility) Andrey Voroshilov (multi-DLL fixes) diff --git a/NEWS b/NEWS index 924ee631..266ff86b 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,10 @@ v0.7.1 (xxxx-xx-xx) - Fixed attachment of postponed frame images. - Source location data can be now copied to clipboard from zone info window. - Zones in find zones menu can be now grouped by zone name. +- Vulkan and D3D12 GPU contexts can be now calibrated. +- Added CSV export utility. +- "Go to frame" popup no longer has a dedicated button. To show it, click on + the frame counter. v0.7 (2020-06-11) ----------------- diff --git a/TracyD3D12.hpp b/TracyD3D12.hpp index 8f75d2ea..831ee9a5 100644 --- a/TracyD3D12.hpp +++ b/TracyD3D12.hpp @@ -50,8 +50,8 @@ namespace tracy bool m_initialized = false; - ID3D12Device* m_device; - ID3D12CommandQueue* m_queue; + ID3D12Device* m_device = nullptr; + ID3D12CommandQueue* m_queue = nullptr; uint8_t m_context; Microsoft::WRL::ComPtr m_queryHeap; Microsoft::WRL::ComPtr m_readbackBuffer; @@ -65,6 +65,9 @@ namespace tracy Microsoft::WRL::ComPtr m_payloadFence; std::queue m_payloadQueue; + int64_t m_prevCalibration = 0; + int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() }; + public: D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) : m_device(device) @@ -98,6 +101,9 @@ namespace tracy assert(false && "Failed to get queue clock calibration."); } + // Save the device cpu timestamp, not the profiler's timestamp. + m_prevCalibration = cpuTimestamp * m_qpcToNs; + cpuTimestamp = Profiler::GetTime(); D3D12_QUERY_HEAP_DESC heapDesc{}; @@ -150,7 +156,7 @@ namespace tracy memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast(timestampFrequency)); MemWrite(&item->gpuNewContext.context, m_context); - MemWrite(&item->gpuNewContext.accuracyBits, uint8_t{ 0 }); + MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); #ifdef TRACY_ON_DEMAND @@ -233,6 +239,34 @@ namespace tracy } m_readbackBuffer->Unmap(0, nullptr); + + // Recalibrate to account for drift. + + uint64_t cpuTimestamp; + uint64_t gpuTimestamp; + + if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + assert(false && "Failed to get queue clock calibration."); + } + + cpuTimestamp *= m_qpcToNs; + + const auto cpuDelta = cpuTimestamp - m_prevCalibration; + if (cpuDelta > 0) + { + m_prevCalibration = cpuTimestamp; + cpuTimestamp = Profiler::GetTime(); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuCalibration); + MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); + MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); + MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta); + MemWrite(&item->gpuCalibration.context, m_context); + + Profiler::QueueSerialFinish(); + } } private: diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 77c7de98..5dba8488 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -72,7 +72,7 @@ namespace tracy { MemWrite(&item->gpuNewContext.period, 1.0f); MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId); - MemWrite(&item->gpuNewContext.accuracyBits, (uint8_t)0); + MemWrite(&item->gpuNewContext.flags, (uint8_t)0); #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem(*item); #endif diff --git a/TracyOpenGL.hpp b/TracyOpenGL.hpp index e4d831d3..90c5dcbc 100644 --- a/TracyOpenGL.hpp +++ b/TracyOpenGL.hpp @@ -110,7 +110,7 @@ public: MemWrite( &item->gpuNewContext.thread, thread ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); + MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) ); MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); #ifdef TRACY_ON_DEMAND diff --git a/TracyVulkan.hpp b/TracyVulkan.hpp index 7f6b9182..9ba6738b 100644 --- a/TracyVulkan.hpp +++ b/TracyVulkan.hpp @@ -4,6 +4,7 @@ #if !defined TRACY_ENABLE #define TracyVkContext(x,y,z,w) nullptr +#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr #define TracyVkDestroy(x) #define TracyVkNamedZone(c,x,y,z,w) #define TracyVkNamedZoneC(c,x,y,z,w,a) @@ -42,16 +43,36 @@ class VkCtx enum { QueryCount = 64 * 1024 }; public: - VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) + VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT ) : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) , m_head( 0 ) , m_tail( 0 ) , m_oldCnt( 0 ) , m_queryCount( QueryCount ) + , m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT ) { assert( m_context != 255 ); + if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT ) + { + uint32_t num; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr ); + if( num > 4 ) num = 4; + VkTimeDomainEXT data[4]; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data ); + for( uint32_t i=0; i deviation[i] ) + { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); + + Calibrate( device, m_prevCalibration, tgpu ); + tcpu = Profiler::GetTime(); + } + + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuNewContext ); @@ -104,7 +160,7 @@ public: memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); + MemWrite( &item->gpuNewContext.flags, flags ); MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); #ifdef TRACY_ON_DEMAND @@ -132,6 +188,8 @@ public: { vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); m_head = m_tail = 0; + int64_t tgpu; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); return; } #endif @@ -163,6 +221,25 @@ public: Profiler::QueueSerialFinish(); } + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) + { + int64_t tgpu, tcpu; + Calibrate( m_device, tcpu, tgpu ); + const auto refCpu = Profiler::GetTime(); + const auto delta = tcpu - m_prevCalibration; + if( delta > 0 ) + { + m_prevCalibration = tcpu; + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuCalibration ); + MemWrite( &item->gpuCalibration.gpuTime, tgpu ); + MemWrite( &item->gpuCalibration.cpuTime, refCpu ); + MemWrite( &item->gpuCalibration.cpuDelta, delta ); + MemWrite( &item->gpuCalibration.context, m_context ); + Profiler::QueueSerialFinish(); + } + } + vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); m_tail += cnt; @@ -183,8 +260,35 @@ private: return m_context; } + tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) + { + assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation; + do + { + m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation ); + } + while( deviation > m_deviation ); + +#if defined _WIN32 || defined __CYGWIN__ + tGpu = ts[0]; + tCpu = ts[1] * m_qpcToNs; +#else + assert( false ); +#endif + } + VkDevice m_device; VkQueryPool m_query; + VkTimeDomainEXT m_timeDomain; + uint64_t m_deviation; + int64_t m_qpcToNs; + int64_t m_prevCalibration; uint8_t m_context; unsigned int m_head; @@ -193,6 +297,8 @@ private: unsigned int m_queryCount; int64_t* m_res; + + PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT; }; class VkCtxScope @@ -271,11 +377,11 @@ private: VkCtx* m_ctx; }; -static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) +static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) { InitRPMallocThread(); auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); - new(ctx) VkCtx( physdev, device, queue, cmdbuf ); + new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); return ctx; } @@ -289,7 +395,8 @@ static inline void DestroyVkContext( VkCtx* ctx ) using TracyVkCtx = tracy::VkCtx*; -#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf ); +#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); +#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); #define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK # define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); diff --git a/capture/build/win32/capture.vcxproj b/capture/build/win32/capture.vcxproj index ffea25c1..2a9230b3 100644 --- a/capture/build/win32/capture.vcxproj +++ b/capture/build/win32/capture.vcxproj @@ -138,6 +138,7 @@ + @@ -171,7 +172,6 @@ - @@ -184,6 +184,7 @@ + @@ -227,7 +228,6 @@ - diff --git a/capture/build/win32/capture.vcxproj.filters b/capture/build/win32/capture.vcxproj.filters index 30b334b5..8b0cc276 100644 --- a/capture/build/win32/capture.vcxproj.filters +++ b/capture/build/win32/capture.vcxproj.filters @@ -13,6 +13,9 @@ {043ecb94-f240-4986-94b0-bc5bbd415a82} + + {ee9737d2-69c7-44da-b9c7-539d18f9d4b4} + @@ -33,9 +36,6 @@ src - - src - common @@ -129,6 +129,9 @@ server + + getopt + @@ -179,9 +182,6 @@ server - - src - common @@ -293,5 +293,8 @@ server + + getopt + \ No newline at end of file diff --git a/capture/src/capture.cpp b/capture/src/capture.cpp index 341f8c02..6b911b85 100644 --- a/capture/src/capture.cpp +++ b/capture/src/capture.cpp @@ -15,7 +15,7 @@ #include "../../server/TracyMemory.hpp" #include "../../server/TracyPrint.hpp" #include "../../server/TracyWorker.hpp" -#include "getopt.h" +#include "../../getopt/getopt.h" bool disconnect = false; diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index f52db989..d61ea061 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -873,6 +873,17 @@ static Thread* s_sysTraceThread = nullptr; TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } +TRACY_API int64_t GetFrequencyQpc() +{ +#if defined _WIN32 || defined __CYGWIN__ + LARGE_INTEGER t; + QueryPerformanceFrequency( &t ); + return t.QuadPart; +#else + return 0; +#endif +} + #ifdef TRACY_DELAYED_INIT struct ThreadNameData; TRACY_API moodycamel::ConcurrentQueue& GetQueue(); diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index 87c4c098..c4eac091 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -64,6 +64,7 @@ TRACY_API GpuCtxWrapper& GetGpuCtx(); TRACY_API uint64_t GetThreadHandle(); TRACY_API void InitRPMallocThread(); TRACY_API bool ProfilerAvailable(); +TRACY_API int64_t GetFrequencyQpc(); struct SourceLocationData { diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index 634b5846..cc47fe41 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 36 }; +enum : uint32_t { ProtocolVersion = 37 }; enum : uint32_t { BroadcastVersion = 1 }; using lz4sz_t = uint32_t; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 4bd48b54..815d27ab 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -56,6 +56,7 @@ enum class QueueType : uint8_t Terminate, KeepAlive, ThreadContext, + GpuCalibration, Crash, CrashReport, ZoneValidation, @@ -268,6 +269,11 @@ enum class GpuContextType : uint8_t Direct3D12 }; +enum GpuContextFlags : uint8_t +{ + GpuContextCalibration = 1 << 0 +}; + struct QueueGpuNewContext { int64_t cpuTime; @@ -275,7 +281,7 @@ struct QueueGpuNewContext uint64_t thread; float period; uint8_t context; - uint8_t accuracyBits; + GpuContextFlags flags; GpuContextType type; }; @@ -303,6 +309,14 @@ struct QueueGpuTime uint8_t context; }; +struct QueueGpuCalibration +{ + int64_t gpuTime; + int64_t cpuTime; + int64_t cpuDelta; + uint8_t context; +}; + struct QueueMemAlloc { int64_t time; @@ -477,6 +491,7 @@ struct QueueItem QueueGpuZoneBegin gpuZoneBegin; QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; + QueueGpuCalibration gpuCalibration; QueueMemAlloc memAlloc; QueueMemFree memFree; QueueCallstackMemory callstackMemory; @@ -553,6 +568,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), + sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), diff --git a/csvexport/build/unix/Makefile b/csvexport/build/unix/Makefile new file mode 100644 index 00000000..3b50301c --- /dev/null +++ b/csvexport/build/unix/Makefile @@ -0,0 +1,12 @@ +all: debug + +debug: + @+make -f debug.mk all + +release: + @+make -f release.mk all + +clean: + @+make -f build.mk clean + +.PHONY: all clean debug release diff --git a/csvexport/build/unix/build.mk b/csvexport/build/unix/build.mk new file mode 100644 index 00000000..a7a67460 --- /dev/null +++ b/csvexport/build/unix/build.mk @@ -0,0 +1,60 @@ +CFLAGS += +CXXFLAGS := $(CFLAGS) -std=gnu++17 +# DEFINES += -DTRACY_NO_STATISTICS +INCLUDES := $(shell pkg-config --cflags capstone) +LIBS := $(shell pkg-config --libs capstone) -lpthread +PROJECT := csvexport +IMAGE := $(PROJECT)-$(BUILD) + +FILTER := + +BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g') +BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g') + +SRC := $(filter-out $(FILTER),$(BASE)) +SRC2 := $(filter-out $(FILTER),$(BASE2)) + +TBB := $(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?) +ifeq ($(TBB),0) + LIBS += -ltbb +endif + +OBJDIRBASE := obj/$(BUILD) +OBJDIR := $(OBJDIRBASE)/o/o/o + +OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o)) +OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o)) + +all: $(IMAGE) + +$(OBJDIR)/%.o: %.cpp + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.cpp + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +$(OBJDIR)/%.o: %.c + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.c + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +$(IMAGE): $(OBJ) $(OBJ2) + $(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@ + +ifneq "$(MAKECMDGOALS)" "clean" +-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) +endif + +clean: + rm -rf $(OBJDIRBASE) $(IMAGE)* + +.PHONY: clean all diff --git a/csvexport/build/unix/debug.mk b/csvexport/build/unix/debug.mk new file mode 100644 index 00000000..04d925a6 --- /dev/null +++ b/csvexport/build/unix/debug.mk @@ -0,0 +1,11 @@ +ARCH := $(shell uname -m) + +CFLAGS := -g3 -Wall +DEFINES := -DDEBUG +BUILD := debug + +ifeq ($(ARCH),x86_64) +CFLAGS += -msse4.1 +endif + +include build.mk diff --git a/csvexport/build/unix/release.mk b/csvexport/build/unix/release.mk new file mode 100644 index 00000000..b59abd5c --- /dev/null +++ b/csvexport/build/unix/release.mk @@ -0,0 +1,7 @@ +ARCH := $(shell uname -m) + +CFLAGS := -O3 -s -march=native +DEFINES := -DNDEBUG +BUILD := release + +include build.mk diff --git a/csvexport/build/win32/csvexport.sln b/csvexport/build/win32/csvexport.sln new file mode 100644 index 00000000..41975ed3 --- /dev/null +++ b/csvexport/build/win32/csvexport.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.30225.117 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csvexport", "csvexport.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.ActiveCfg = Debug|x64 + {447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.Build.0 = Debug|x64 + {447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x86.ActiveCfg = Debug|Win32 + {447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x86.Build.0 = Debug|Win32 + {447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.ActiveCfg = Release|x64 + {447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.Build.0 = Release|x64 + {447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x86.ActiveCfg = Release|Win32 + {447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {3E51386C-43EA-44AC-9F24-AFAFE4D63ADE} + EndGlobalSection +EndGlobal diff --git a/csvexport/build/win32/csvexport.vcxproj b/csvexport/build/win32/csvexport.vcxproj new file mode 100644 index 00000000..b350fe5e --- /dev/null +++ b/csvexport/build/win32/csvexport.vcxproj @@ -0,0 +1,235 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {447D58BF-94CD-4469-BB90-549C05D03E00} + capture + 10.0 + x64-windows-static + + + + Application + true + v142 + MultiByte + + + Application + false + v142 + true + MultiByte + + + Application + true + v142 + MultiByte + + + Application + false + v142 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + true + _CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions) + AdvancedVectorExtensions2 + stdcpplatest + ..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include + + + ws2_32.lib;capstone.lib;%(AdditionalDependencies) + Console + ..\..\..\vcpkg\vcpkg\installed\x64-windows-static\debug\lib + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + true + NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions) + AdvancedVectorExtensions2 + stdcpplatest + ..\..\..\vcpkg\vcpkg\installed\x64-windows-static\include + + + true + true + ws2_32.lib;capstone.lib;%(AdditionalDependencies) + Console + ..\..\..\vcpkg\vcpkg\installed\x64-windows-static\lib + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/csvexport/build/win32/csvexport.vcxproj.filters b/csvexport/build/win32/csvexport.vcxproj.filters new file mode 100644 index 00000000..045ffe8b --- /dev/null +++ b/csvexport/build/win32/csvexport.vcxproj.filters @@ -0,0 +1,300 @@ + + + + + {729c80ee-4d26-4a5e-8f1f-6c075783eb56} + + + {cf23ef7b-7694-4154-830b-00cf053350ea} + + + {e39d3623-47cd-4752-8da9-3ea324f964c1} + + + {043ecb94-f240-4986-94b0-bc5bbd415a82} + + + {ee9737d2-69c7-44da-b9c7-539d18f9d4b4} + + + + + common + + + common + + + common + + + server + + + server + + + common + + + server + + + server + + + server + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + server + + + server + + + getopt + + + src + + + + + common + + + common + + + common + + + common + + + common + + + common + + + common + + + common + + + server + + + server + + + server + + + server + + + server + + + server + + + server + + + server + + + common + + + common + + + server + + + server + + + server + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + zstd + + + server + + + server + + + server + + + getopt + + + \ No newline at end of file diff --git a/csvexport/src/csvexport.cpp b/csvexport/src/csvexport.cpp new file mode 100644 index 00000000..c0654c44 --- /dev/null +++ b/csvexport/src/csvexport.cpp @@ -0,0 +1,311 @@ +#ifdef _WIN32 +# include +#endif + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../server/TracyFileRead.hpp" +#include "../../server/TracyWorker.hpp" +#include "../../getopt/getopt.h" + +void print_usage_exit(int e) +{ + fprintf(stderr, "Extract statistics from a trace to a CSV format\n"); + fprintf(stderr, "Usage:\n"); + fprintf(stderr, " extract [OPTION...] \n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -h, --help Print usage\n"); + fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n"); + fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n"); + fprintf(stderr, " -c, --case Case sensitive filtering\n"); + fprintf(stderr, " -e, --self Get self times\n"); + fprintf(stderr, " -u, --unwrap Report each zone event\n"); + + exit(e); +} + +struct Args { + const char* filter; + const char* separator; + const char* trace_file; + bool case_sensitive; + bool self_time; + bool unwrap; +}; + +Args parse_args(int argc, char** argv) +{ + if (argc == 1) + { + print_usage_exit(1); + } + + Args args = { "", ",", "", false, false, false }; + + struct option long_opts[] = { + { "help", no_argument, NULL, 'h' }, + { "filter", optional_argument, NULL, 'f' }, + { "sep", optional_argument, NULL, 's' }, + { "case", no_argument, NULL, 'c' }, + { "self", no_argument, NULL, 'e' }, + { "unwrap", no_argument, NULL, 'u' }, + { NULL, 0, NULL, 0 } + }; + + int c; + while ((c = getopt_long(argc, argv, "hf:s:ceu", long_opts, NULL)) != -1) + { + switch (c) + { + case 'h': + print_usage_exit(0); + break; + case 'f': + args.filter = optarg; + break; + case 's': + args.separator = optarg; + break; + case 'c': + args.case_sensitive = true; + break; + case 'e': + args.self_time = true; + break; + case 'u': + args.unwrap = true; + break; + default: + print_usage_exit(1); + break; + } + } + + if (argc != optind + 1) + { + print_usage_exit(1); + } + + args.trace_file = argv[optind]; + + return args; +} + +bool is_substring( + const char* term, + const char* s, + bool case_sensitive = false +){ + auto new_term = std::string(term); + auto new_s = std::string(s); + + if (!case_sensitive) { + std::transform( + new_term.begin(), + new_term.end(), + new_term.begin(), + [](unsigned char c){ return std::tolower(c); } + ); + + std::transform( + new_s.begin(), + new_s.end(), + new_s.begin(), + [](unsigned char c){ return std::tolower(c); } + ); + } + + return new_s.find(new_term) != std::string::npos; +} + +const char* get_name(int32_t id, const tracy::Worker& worker) +{ + auto& srcloc = worker.GetSourceLocation(id); + return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function); +} + +template +std::string join(const T& v, const char* sep) { + std::ostringstream s; + for (const auto& i : v) { + if (&i != &v[0]) { + s << sep; + } + s << i; + } + return s.str(); +} + +// From TracyView.cpp +int64_t GetZoneChildTimeFast( + const tracy::Worker& worker, + const tracy::ZoneEvent& zone +){ + int64_t time = 0; + if( zone.HasChildren() ) + { + auto& children = worker.GetZoneChildren( zone.Child() ); + if( children.is_magic() ) + { + auto& vec = *(tracy::Vector*)&children; + for( auto& v : vec ) + { + assert( v.IsEndValid() ); + time += v.End() - v.Start(); + } + } + else + { + for( auto& v : children ) + { + assert( v->IsEndValid() ); + time += v->End() - v->Start(); + } + } + } + return time; +} + +int main(int argc, char** argv) +{ +#ifdef _WIN32 + if (!AttachConsole(ATTACH_PARENT_PROCESS)) + { + AllocConsole(); + SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07); + } +#endif + + Args args = parse_args(argc, argv); + + auto f = std::unique_ptr( + tracy::FileRead::Open(args.trace_file) + ); + if (!f) + { + fprintf(stderr, "Could not open file %s\n", args.trace_file); + return 1; + } + + auto worker = tracy::Worker(*f); + + while (!worker.AreSourceLocationZonesReady()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + auto& slz = worker.GetSourceLocationZones(); + tracy::Vector slz_selected; + slz_selected.reserve(slz.size()); + + uint32_t total_cnt = 0; + for(auto it = slz.begin(); it != slz.end(); ++it) + { + if(it->second.total != 0) + { + ++total_cnt; + if(args.filter[0] == '\0') + { + slz_selected.push_back_no_space_check(it); + } + else + { + auto name = get_name(it->first, worker); + if(is_substring(args.filter, name, args.case_sensitive)) + { + slz_selected.push_back_no_space_check(it); + } + } + } + } + + std::vector columns; + if (args.unwrap) + { + columns = { + "name", "src_file", "src_line", "ns_since_start", "exec_time_ns" + }; + } + else + { + columns = { + "name", "src_file", "src_line", "total_ns", "total_perc", + "counts", "mean_ns", "min_ns", "max_ns", "std_ns" + }; + } + std::string header = join(columns, args.separator); + printf("%s\n", header.data()); + + const auto last_time = worker.GetLastTime(); + for(auto& it : slz_selected) + { + std::vector values(columns.size()); + + values[0] = get_name(it->first, worker); + + const auto& srcloc = worker.GetSourceLocation(it->first); + values[1] = worker.GetString(srcloc.file); + values[2] = std::to_string(srcloc.line); + + const auto& zone_data = it->second; + + if (args.unwrap) + { + int i = 0; + for (const auto& zone_thread_data : zone_data.zones) { + const auto zone_event = zone_thread_data.Zone(); + const auto start = zone_event->Start(); + const auto end = zone_event->End(); + + values[3] = std::to_string(start); + + auto timespan = end - start; + if (args.self_time) { + timespan -= GetZoneChildTimeFast(worker, *zone_event); + } + values[4] = std::to_string(timespan); + + std::string row = join(values, args.separator); + printf("%s\n", row.data()); + } + } + else + { + const auto time = args.self_time ? zone_data.selfTotal : zone_data.total; + values[3] = std::to_string(time); + values[4] = std::to_string(100. * time / last_time); + + values[5] = std::to_string(zone_data.zones.size()); + + const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total) + / zone_data.zones.size(); + values[6] = std::to_string(avg); + + const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min; + const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max; + values[7] = std::to_string(tmin); + values[8] = std::to_string(tmax); + + const auto sz = zone_data.zones.size(); + const auto ss = zone_data.sumSq + - 2. * zone_data.total * avg + + avg * avg * sz; + const auto std = sqrt(ss / (sz - 1)); + values[9] = std::to_string(std); + + std::string row = join(values, args.separator); + printf("%s\n", row.data()); + } + } + + return 0; +} diff --git a/capture/src/getopt.c b/getopt/getopt.c similarity index 100% rename from capture/src/getopt.c rename to getopt/getopt.c diff --git a/capture/src/getopt.h b/getopt/getopt.h similarity index 100% rename from capture/src/getopt.h rename to getopt/getopt.h diff --git a/manual/tracy.tex b/manual/tracy.tex index 00123503..568471c8 100644 --- a/manual/tracy.tex +++ b/manual/tracy.tex @@ -104,6 +104,7 @@ Hello and welcome to the Tracy Profiler user manual! Here you will find all the \item Chapter~\ref{client}, \emph{\nameref{client}}, provides information on how to instrument your application, in order to retrieve useful profiling data. \item Chapter~\ref{capturing}, \emph{\nameref{capturing}}, goes into more detail on how the profiling information can be captured and stored on disk. \item Chapter~\ref{analyzingdata}, \emph{\nameref{analyzingdata}}, guides you through the graphical user interface of the profiler. +\item Chapter~\ref{csvexport}, \emph{\nameref{csvexport}}, explains how to export some zone timing statistics into a CSV format. \item Chapter~\ref{importingdata}, \emph{\nameref{importingdata}}, documents how to import data from other profilers. \item Chapter~\ref{configurationfiles}, \emph{\nameref{configurationfiles}}, gives information on the profiler settings. \end{itemize} @@ -1184,7 +1185,7 @@ This requirement is relaxed in the on-demand mode (section~\ref{ondemand}), beca Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 12 and OpenCL execution time on GPU. -Note that the CPU and GPU timers may be not synchronized. You can correct the resulting desynchronization in the profiler's options (section~\ref{options}). +Note that the CPU and GPU timers may be not synchronized, unless a calibrated context is created. Since availability of calibrated contexts is limited, you can correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}). \subsubsection{OpenGL} @@ -1217,6 +1218,12 @@ To mark a GPU zone use the \texttt{TracyVkZone(ctx, cmdbuf, name)} macro, where You also need to periodically collect the GPU events using the \texttt{TracyVkCollect(ctx, cmdbuf)} macro\footnote{It is considerably faster than the OpenGL's \texttt{TracyGpuCollect}.}. The provided command buffer must be in the recording state and outside of a render pass instance. +\subparagraph{Calibrated context} + +In order to maintain synchronization between CPU and GPU time domains, you will need to enable the \texttt{VK\_EXT\_calibrated\_timestamps} device extension and retrieve the following function pointers: \texttt{vkGetPhysicalDeviceCalibrateableTimeDomainsEXT} and \texttt{vkGetCalibratedTimestampsEXT}. + +To enable calibrated context, replace the macro \texttt{TracyVkContext} with \texttt{TracyVkContextCalibrated} and pass the two functions as additional parameters, in the order specified above. + \subsubsection{Direct3D 12} To enable Direct3D 12 support, include the \texttt{tracy/TracyD3D12.hpp} header file. Tracing Direct3D 12 queues is nearly on par with the Vulkan implementation, where a \texttt{TracyD3D12Ctx} is returned from a call to \texttt{TracyD3D12Context(device, queue)}, which should be later cleaned up with the \texttt{TracyD3D12Destroy(ctx)} macro. Multiple contexts can be created, each with any queue type. @@ -1227,6 +1234,10 @@ Using GPU zones is the same as the Vulkan implementation, where the \texttt{Trac The macro \texttt{TracyD3D12NewFrame(ctx)} is used to mark a new frame, and should appear before or after recording command lists, similar to \texttt{FrameMark}. This macro is a key component that enables automatic query data synchronization, so the user doesn't have to worry about synchronizing GPU execution before invoking a collection. Event data can then be collected and sent to the profiler using the \texttt{TracyD3D12Collect(ctx)} macro. +Note that due to artifacts from dynamic frequency scaling, GPU profiling may be slightly inaccurate. To counter this, \texttt{ID3D12Device::SetStablePowerState()} can be used to enable accurate profiling, at the expense of some performance. If the machine is not in developer mode, the device will be removed upon calling. Do not use this in shipping code. + +Direct3D 12 contexts are always calibrated. + \subsubsection{OpenCL} OpenCL support is achieved by including the \texttt{tracy/TracyOpenCL.hpp} header file. Tracing OpenCL requires the creation of a Tracy OpenCL context using the macro \texttt{TracyCLContext(context, device)}, which will return an instance of \texttt{TracyCLCtx} object that must be used when creating zones. The specified \texttt{device} must be part of the \texttt{context}. Cleanup is performed using the \texttt{TracyCLDestroy(ctx)} macro. Although not common, it is possible to create multiple OpenCL contexts for the same application. @@ -1913,9 +1924,8 @@ The main profiler window is split into three sections, as seen on figure~\ref{ma \draw (0.6, -0.7) node[anchor=north west] {Frames: 364}; \draw[rounded corners=5pt] (2.8, -0.7) rectangle+(0.4, -0.5) node [midway] {\faCaretRight}; \draw[rounded corners=5pt] (3.3, -0.7) rectangle+(0.5, -0.5) node [midway] {\faCaretDown}; -\draw[rounded corners=5pt] (3.9, -0.7) rectangle+(0.5, -0.5) node [midway] {\faCrosshairs}; -\draw (4.5, -0.65) node[anchor=north west] {\faEye~52.7 ms \hspace{5pt} \faDatabase~6.06 s \hspace{5pt} \faMemory~195.2 MB}; -\draw[dashed] (10.6, -0.75) rectangle+(3.2, -0.4) node[midway] {Notification area}; +\draw (4, -0.65) node[anchor=north west] {\faEye~52.7 ms \hspace{5pt} \faDatabase~6.06 s \hspace{5pt} \faMemory~195.2 MB}; +\draw[dashed] (10.1, -0.75) rectangle+(3.2, -0.4) node[midway] {Notification area}; \draw (0.1, -1.3) rectangle+(15.3, -1) node [midway] {Frame time graph}; \draw (0.1, -2.4) rectangle+(15.3, -3) node [midway] {Timeline view}; @@ -1950,9 +1960,9 @@ The control menu (top row of buttons) provides access to various features of the \end{itemize} \end{itemize} -The frame information block consists of four elements: the current frame set name along with the number of captured frames, the two navigational buttons \faCaretLeft{} and \faCaretRight{}, which allow you to focus the timeline view on the previous or next frame, and the frame set selection button \faCaretDown{}, which is used to switch to a another frame set\footnote{See section~\ref{framesets} for another way to change the active frame set.}. The \emph{\faCrosshairs{}~Go to frame} button allows zooming the timeline view on the specified frame. For more information about marking frames, see section~\ref{markingframes}. +The frame information block consists of four elements: the current frame set name along with the number of captured frames (click on it with the \LMB{}~left mouse button to go to a specified frame), the two navigational buttons \faCaretLeft{} and \faCaretRight{}, which allow you to focus the timeline view on the previous or next frame, and the frame set selection button \faCaretDown{}, which is used to switch to a another frame set\footnote{See section~\ref{framesets} for another way to change the active frame set.}. For more information about marking frames, see section~\ref{markingframes}. -The next three items show the \emph{\faEye{}~view time range}, the \emph{\faDatabase{}~time span} of the whole capture, and the \emph{\faMemory{}~memory usage} of the profiler. +The next three items show the \emph{\faEye{}~view time range}, the \emph{\faDatabase{}~time span} of the whole capture (clicking on it with the \MMB{} middle mouse button will set the view range to the entire capture), and the \emph{\faMemory{}~memory usage} of the profiler. \paragraph{Notification area} @@ -2366,7 +2376,7 @@ In this window you can set various trace-related options. The timeline view migh \begin{itemize} \item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here. \end{itemize} -\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}. +\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets of uncalibrated contexts (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}. \item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed. \begin{itemize} \item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available. @@ -3011,6 +3021,37 @@ This window lists all annotations marked on the timeline. Each annotation is pre \label{figannlist} \end{figure} +\section{Exporting zone statistics to CSV} +\label{csvexport} + +You can use a command-line utility in the \texttt{csvexport} directory to export basic zone statistics from a saved trace into a CSV format. +The tool requires a single .tracy file as an argument and prints results into the standard output (stdout) from where you can redirect it into a file or use it as an input into another tool. +By default, the utility will list all zones with the following columns: + +\begin{itemize} + \item \texttt{name} -- Zone name + \item \texttt{src\_file} -- Source file where the zone was set + \item \texttt{src\_line} -- Line in the source file where the zone was set + \item \texttt{total\_ns} -- Total zone time in nanoseconds + \item \texttt{total\_perc} -- Total zone time as a percentage of the program's execution time + \item \texttt{counts} -- Zone count + \item \texttt{mean\_ns} -- Mean zone time (equivalent in MPTC in the profiler GUI) in nanoseconds + \item \texttt{min\_ns} -- Minimum zone time in nanoseconds + \item \texttt{max\_ns} -- Maximum zone time in nanoseconds + \item \texttt{std\_ns} -- Standard deviation of the zone time in nanoseconds +\end{itemize} + +You can customize the output with the following command line options: + +\begin{itemize} + \item \texttt{-h, -\hspace{-1.25ex} -help} -- display a help message + \item \texttt{-f, -\hspace{-1.25ex} -filter } -- filter the zone names + \item \texttt{-c, -\hspace{-1.25ex} -case} -- make the name filtering case sensitive + \item \texttt{-s, -\hspace{-1.25ex} -sep } -- customize the CSV separator (default is ``\texttt{,}'') + \item \texttt{-e, -\hspace{-1.25ex} -self} -- use self time (equivalent to the ``Self time'' toggle in the profiler GUI) + \item \texttt{-u, -\hspace{-1.25ex} -unwrap} -- report each zone individually; this will discard the statistics columns and instead reports for each zone entry its timestamp and the duration of the zone entry. +\end{itemize} + \section{Importing external profiling data} \label{importingdata} diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index c1401fbd..63b74f86 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -562,10 +562,13 @@ struct GpuCtxData int64_t timeDiff; uint64_t thread; uint64_t count; - uint8_t accuracyBits; float period; GpuContextType type; bool hasPeriod; + bool hasCalibration; + int64_t calibratedGpuTime; + int64_t calibratedCpuTime; + double calibrationMod; unordered_flat_map threadData; short_ptr query[64*1024]; }; diff --git a/server/TracyVersion.hpp b/server/TracyVersion.hpp index 9f745627..77825c70 100644 --- a/server/TracyVersion.hpp +++ b/server/TracyVersion.hpp @@ -7,7 +7,7 @@ namespace Version { enum { Major = 0 }; enum { Minor = 7 }; -enum { Patch = 0 }; +enum { Patch = 1 }; } } diff --git a/server/TracyView.cpp b/server/TracyView.cpp index d142ac78..08fc7cda 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -616,6 +616,7 @@ bool View::DrawImpl() { ImGui::PopStyleColor(); } + if( ImGui::IsItemClicked() ) ImGui::OpenPopup( "GoToFramePopup" ); } ImGui::SameLine(); if( ImGui::SmallButton( " " ICON_FA_CARET_RIGHT " " ) ) ZoomToNextFrame(); @@ -639,14 +640,6 @@ bool View::DrawImpl() } ImGui::EndCombo(); } - ImGui::SameLine(); - if( ImGui::Button( ICON_FA_CROSSHAIRS ) ) ImGui::OpenPopup( "GoToFramePopup" ); - if( ImGui::IsItemHovered() ) - { - ImGui::BeginTooltip(); - ImGui::TextUnformatted( "Go to frame" ); - ImGui::EndTooltip(); - } if( ImGui::BeginPopup( "GoToFramePopup" ) ) { static int frameNum = 1; @@ -691,6 +684,10 @@ bool View::DrawImpl() ImGui::BeginTooltip(); ImGui::Text( "Time span" ); ImGui::EndTooltip(); + if( ImGui::IsItemClicked( 2 ) ) + { + ZoomToRange( 0, m_worker.GetLastTime() ); + } } ImGui::SameLine(); dx = ImGui::GetCursorPosX() - cx; @@ -2170,6 +2167,13 @@ bool View::DrawZoneFrames( const FrameData& frames ) auto tx = ImGui::CalcTextSize( buf ).x; uint32_t color = ( frames.name == 0 && i == 0 ) ? redColor : activeColor; + if( fsz - 7 <= tx ) + { + static char tmp[256]; + sprintf( tmp, "%s (%s)", RealToString( i ), TimeToString( ftime ) ); + buf = tmp; + tx = ImGui::CalcTextSize( buf ).x; + } if( fsz - 7 <= tx ) { buf = TimeToString( ftime ); @@ -2595,17 +2599,10 @@ void View::DrawZones() } } TextFocused( "Zone count:", RealToString( v->count ) ); - //TextFocused( "Top-level zones:", RealToString( v->timeline.size() ) ); if( isMultithreaded ) { TextFocused( "Timestamp accuracy:", TimeToString( v->period ) ); } - else - { - TextDisabledUnformatted( "Query accuracy bits:" ); - ImGui::SameLine(); - ImGui::Text( "%i", v->accuracyBits ); - } ImGui::EndTooltip(); } } @@ -7913,80 +7910,83 @@ void View::DrawOptions() { ImGui::TextDisabled( "%s threads", RealToString( gpuData[i]->threadData.size() ) ); } - ImGui::TreePush(); - auto& drift = GpuDrift( gpuData[i] ); - ImGui::SetNextItemWidth( 120 ); - ImGui::PushID( i ); - ImGui::InputInt( "Drift (ns/s)", &drift ); - ImGui::PopID(); - if( timeline.size() > 1 ) + if( !gpuData[i]->hasCalibration ) { - ImGui::SameLine(); - if( ImGui::Button( ICON_FA_ROBOT " Auto" ) ) + ImGui::TreePush(); + auto& drift = GpuDrift( gpuData[i] ); + ImGui::SetNextItemWidth( 120 ); + ImGui::PushID( i ); + ImGui::InputInt( "Drift (ns/s)", &drift ); + ImGui::PopID(); + if( timeline.size() > 1 ) { - size_t lastidx = 0; - if( timeline.is_magic() ) + ImGui::SameLine(); + if( ImGui::Button( ICON_FA_ROBOT " Auto" ) ) { - auto& tl = *((Vector*)&timeline); - for( size_t j=tl.size()-1; j > 0; j-- ) + size_t lastidx = 0; + if( timeline.is_magic() ) { - if( tl[j].GpuEnd() >= 0 ) + auto& tl = *((Vector*)&timeline); + for( size_t j=tl.size()-1; j > 0; j-- ) { - lastidx = j; - break; + if( tl[j].GpuEnd() >= 0 ) + { + lastidx = j; + break; + } } } - } - else - { - for( size_t j=timeline.size()-1; j > 0; j-- ) + else { - if( timeline[j]->GpuEnd() >= 0 ) + for( size_t j=timeline.size()-1; j > 0; j-- ) { - lastidx = j; - break; + if( timeline[j]->GpuEnd() >= 0 ) + { + lastidx = j; + break; + } } } - } - enum { NumSlopes = 10000 }; - std::random_device rd; - std::default_random_engine gen( rd() ); - std::uniform_int_distribution dist( 0, lastidx - 1 ); - float slopes[NumSlopes]; - size_t idx = 0; - if( timeline.is_magic() ) - { - auto& tl = *((Vector*)&timeline); - do + enum { NumSlopes = 10000 }; + std::random_device rd; + std::default_random_engine gen( rd() ); + std::uniform_int_distribution dist( 0, lastidx - 1 ); + float slopes[NumSlopes]; + size_t idx = 0; + if( timeline.is_magic() ) { - const auto p0 = dist( gen ); - const auto p1 = dist( gen ); - if( p0 != p1 ) + auto& tl = *((Vector*)&timeline); + do { - slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) ); + const auto p0 = dist( gen ); + const auto p1 = dist( gen ); + if( p0 != p1 ) + { + slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) ); + } } + while( idx < NumSlopes ); } - while( idx < NumSlopes ); - } - else - { - do + else { - const auto p0 = dist( gen ); - const auto p1 = dist( gen ); - if( p0 != p1 ) + do { - slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); + const auto p0 = dist( gen ); + const auto p1 = dist( gen ); + if( p0 != p1 ) + { + slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); + } } + while( idx < NumSlopes ); } - while( idx < NumSlopes ); + std::sort( slopes, slopes+NumSlopes ); + drift = int( 1000000000 * -slopes[NumSlopes/2] ); } - std::sort( slopes, slopes+NumSlopes ); - drift = int( 1000000000 * -slopes[NumSlopes/2] ); } + ImGui::TreePop(); } - ImGui::TreePop(); } ImGui::TreePop(); } @@ -8527,11 +8527,12 @@ void View::DrawMessages() size_t tsz = 0; for( const auto& t : m_threadOrder ) if( !t->messages.empty() ) tsz++; - m_messageFilter.Draw( ICON_FA_FILTER " Filter messages", 200 ); + bool filterChanged = m_messageFilter.Draw( ICON_FA_FILTER " Filter messages", 200 ); ImGui::SameLine(); if( ImGui::Button( ICON_FA_BACKSPACE " Clear" ) ) { m_messageFilter.Clear(); + filterChanged = true; } ImGui::SameLine(); ImGui::Spacing(); @@ -8549,6 +8550,7 @@ void View::DrawMessages() ImGui::Checkbox( ICON_FA_IMAGE " Show frame images", &m_showMessageImages ); } + bool threadsChanged = false; auto expand = ImGui::TreeNode( ICON_FA_RANDOM " Visible threads:" ); ImGui::SameLine(); ImGui::TextDisabled( "(%zu)", tsz ); @@ -8563,6 +8565,7 @@ void View::DrawMessages() { VisibleMsgThread( t->id ) = true; } + threadsChanged = true; } ImGui::SameLine(); if( ImGui::SmallButton( "Unselect all" ) ) @@ -8571,6 +8574,7 @@ void View::DrawMessages() { VisibleMsgThread( t->id ) = false; } + threadsChanged = true; } int idx = 0; @@ -8581,7 +8585,10 @@ void View::DrawMessages() const auto threadColor = GetThreadColor( t->id, 0 ); SmallColorBox( threadColor ); ImGui::SameLine(); - SmallCheckbox( m_worker.GetThreadName( t->id ), &VisibleMsgThread( t->id ) ); + if( SmallCheckbox( m_worker.GetThreadName( t->id ), &VisibleMsgThread( t->id ) ) ) + { + threadsChanged = true; + } ImGui::PopID(); ImGui::SameLine(); ImGui::TextDisabled( "(%s)", RealToString( t->messages.size() ) ); @@ -8594,6 +8601,78 @@ void View::DrawMessages() ImGui::TreePop(); } + const bool msgsChanged = msgs.size() != m_prevMessages; + if( filterChanged || threadsChanged ) + { + m_msgList.reserve( msgs.size() ); + m_msgList.clear(); + if( m_messageFilter.IsActive() ) + { + for( size_t i=0; ithread ); + if( VisibleMsgThread( tid ) ) + { + const auto text = m_worker.GetString( msgs[i]->ref ); + if( m_messageFilter.PassFilter( text ) ) + { + m_msgList.push_back_no_space_check( uint32_t( i ) ); + } + } + } + } + else + { + for( size_t i=0; ithread ); + if( VisibleMsgThread( tid ) ) + { + m_msgList.push_back_no_space_check( uint32_t( i ) ); + } + } + } + m_visibleMessages = m_msgList.size(); + if( msgsChanged ) m_prevMessages = msgs.size(); + } + else if( msgsChanged ) + { + assert( m_prevMessages < msgs.size() ); + m_msgList.reserve( msgs.size() ); + if( m_messageFilter.IsActive() ) + { + for( size_t i=m_prevMessages; ithread ); + if( VisibleMsgThread( tid ) ) + { + const auto text = m_worker.GetString( msgs[i]->ref ); + if( m_messageFilter.PassFilter( text ) ) + { + m_msgList.push_back_no_space_check( uint32_t( i ) ); + } + } + } + } + else + { + for( size_t i=m_prevMessages; ithread ); + if( VisibleMsgThread( tid ) ) + { + m_msgList.push_back_no_space_check( uint32_t( i ) ); + } + } + } + m_visibleMessages = m_msgList.size(); + m_prevMessages = msgs.size(); + } + bool hasCallstack = m_worker.GetCallstackFrameCount() != 0; ImGui::Separator(); ImGui::BeginChild( "##messages" ); @@ -8627,85 +8706,25 @@ void View::DrawMessages() } ImGui::Separator(); - int msgcnt = 0; - const auto filterActive = m_messageFilter.IsActive(); int idx = 0; - for( const auto& v : msgs ) + if( m_msgToFocus ) { - const auto tid = m_worker.DecompressThread( v->thread ); - if( VisibleMsgThread( tid ) ) + for( const auto& msgIdx : m_msgList ) { - const auto text = m_worker.GetString( v->ref ); - if( !filterActive || m_messageFilter.PassFilter( text ) ) + DrawMessageLine( *msgs[msgIdx], hasCallstack, idx ); + } + } + else + { + ImGuiListClipper clipper( m_msgList.size() ); + while( clipper.Step() ) + { + for( auto i=clipper.DisplayStart; itime ), m_msgHighlight == v, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap ) ) - { - CenterAtTime( v->time ); - } - if( ImGui::IsItemHovered() ) - { - m_msgHighlight = v; - - if( m_showMessageImages ) - { - const auto frameIdx = m_worker.GetFrameRange( *m_frames, v->time, v->time ).first; - auto fi = m_worker.GetFrameImage( *m_frames, frameIdx ); - if( fi ) - { - ImGui::BeginTooltip(); - if( fi != m_frameTexturePtr ) - { - if( !m_frameTexture ) m_frameTexture = MakeTexture(); - UpdateTexture( m_frameTexture, m_worker.UnpackFrameImage( *fi ), fi->w, fi->h ); - m_frameTexturePtr = fi; - } - if( fi->flip ) - { - ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ), ImVec2( 0, 1 ), ImVec2( 1, 0 ) ); - } - else - { - ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ) ); - } - ImGui::EndTooltip(); - } - } - } - if( m_msgToFocus == v ) - { - ImGui::SetScrollHereY(); - m_msgToFocus.Decay( nullptr ); - m_messagesScrollBottom = false; - } - ImGui::PopID(); - ImGui::NextColumn(); - SmallColorBox( GetThreadColor( tid, 0 ) ); - ImGui::SameLine(); - ImGui::TextUnformatted( m_worker.GetThreadName( tid ) ); - ImGui::SameLine(); - ImGui::TextDisabled( "(%s)", RealToString( tid ) ); - ImGui::NextColumn(); - ImGui::PushStyleColor( ImGuiCol_Text, v->color ); - ImGui::TextWrapped( "%s", text ); - ImGui::PopStyleColor(); - ImGui::NextColumn(); - if( hasCallstack ) - { - const auto cs = v->callstack.Val(); - if( cs != 0 ) - { - SmallCallstackButton( ICON_FA_ALIGN_JUSTIFY, cs, idx ); - ImGui::SameLine(); - DrawCallstackCalls( cs, 4 ); - } - ImGui::NextColumn(); - } - msgcnt++; + DrawMessageLine( *msgs[m_msgList[i]], hasCallstack, idx ); } } } - m_visibleMessages = msgcnt; if( m_worker.IsConnected() && ImGui::GetScrollY() >= ImGui::GetScrollMaxY() ) { @@ -8717,6 +8736,84 @@ void View::DrawMessages() ImGui::End(); } +void View::DrawMessageLine( const MessageData& msg, bool hasCallstack, int& idx ) +{ + const auto text = m_worker.GetString( msg.ref ); + const auto tid = m_worker.DecompressThread( msg.thread ); + ImGui::PushID( &msg ); + if( ImGui::Selectable( TimeToStringExact( msg.time ), m_msgHighlight == &msg, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap ) ) + { + CenterAtTime( msg.time ); + } + if( ImGui::IsItemHovered() ) + { + m_msgHighlight = &msg; + + if( m_showMessageImages ) + { + const auto frameIdx = m_worker.GetFrameRange( *m_frames, msg.time, msg.time ).first; + auto fi = m_worker.GetFrameImage( *m_frames, frameIdx ); + if( fi ) + { + ImGui::BeginTooltip(); + if( fi != m_frameTexturePtr ) + { + if( !m_frameTexture ) m_frameTexture = MakeTexture(); + UpdateTexture( m_frameTexture, m_worker.UnpackFrameImage( *fi ), fi->w, fi->h ); + m_frameTexturePtr = fi; + } + if( fi->flip ) + { + ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ), ImVec2( 0, 1 ), ImVec2( 1, 0 ) ); + } + else + { + ImGui::Image( m_frameTexture, ImVec2( fi->w, fi->h ) ); + } + ImGui::EndTooltip(); + } + } + } + if( m_msgToFocus == &msg ) + { + ImGui::SetScrollHereY(); + m_msgToFocus.Decay( nullptr ); + m_messagesScrollBottom = false; + } + ImGui::PopID(); + ImGui::NextColumn(); + SmallColorBox( GetThreadColor( tid, 0 ) ); + ImGui::SameLine(); + ImGui::TextUnformatted( m_worker.GetThreadName( tid ) ); + ImGui::SameLine(); + ImGui::TextDisabled( "(%s)", RealToString( tid ) ); + ImGui::NextColumn(); + ImGui::PushStyleColor( ImGuiCol_Text, msg.color ); + const auto cw = ImGui::GetContentRegionAvail().x; + const auto tw = ImGui::CalcTextSize( text ).x; + ImGui::TextUnformatted( text ); + if( tw > cw && ImGui::IsItemHovered() ) + { + ImGui::SetNextWindowSize( ImVec2( 1000, 0 ) ); + ImGui::BeginTooltip(); + ImGui::TextWrapped( "%s", text ); + ImGui::EndTooltip(); + } + ImGui::PopStyleColor(); + ImGui::NextColumn(); + if( hasCallstack ) + { + const auto cs = msg.callstack.Val(); + if( cs != 0 ) + { + SmallCallstackButton( ICON_FA_ALIGN_JUSTIFY, cs, idx ); + ImGui::SameLine(); + DrawCallstackCalls( cs, 4 ); + } + ImGui::NextColumn(); + } +} + uint64_t View::GetSelectionTarget( const Worker::ZoneThreadData& ev, FindZone::GroupBy groupBy ) const { switch( groupBy ) diff --git a/server/TracyView.hpp b/server/TracyView.hpp index bfaf5738..983e7ab1 100644 --- a/server/TracyView.hpp +++ b/server/TracyView.hpp @@ -155,6 +155,7 @@ private: int DrawCpuData( int offset, double pxns, const ImVec2& wpos, bool hover, float yMin, float yMax ); void DrawOptions(); void DrawMessages(); + void DrawMessageLine( const MessageData& msg, bool hasCallstack, int& idx ); void DrawFindZone(); void DrawStatistics(); void DrawMemory(); @@ -329,6 +330,8 @@ private: bool m_showMessageImages = false; ImGuiTextFilter m_statisticsFilter; int m_visibleMessages = 0; + size_t m_prevMessages = 0; + Vector m_msgList; bool m_disconnectIssued = false; DecayValue m_drawThreadMigrations = 0; DecayValue m_drawThreadHighlight = 0; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 5d19a8b6..4fa2ec62 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -923,14 +923,25 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks ) for( uint64_t i=0; i(); - if( fileVer >= FileVersion( 0, 6, 14 ) ) + if( fileVer >= FileVersion( 0, 7, 1 ) ) { - f.Read5( ctx->thread, ctx->accuracyBits, ctx->count, ctx->period, ctx->type ); + uint8_t calibration; + f.Read5( ctx->thread, calibration, ctx->count, ctx->period, ctx->type ); + ctx->hasCalibration = calibration; } else { - f.Read4( ctx->thread, ctx->accuracyBits, ctx->count, ctx->period ); - ctx->type = ctx->thread == 0 ? GpuContextType::Vulkan : GpuContextType::OpenGl; + uint8_t accuracy; + if( fileVer >= FileVersion( 0, 6, 14 ) ) + { + f.Read5( ctx->thread, accuracy, ctx->count, ctx->period, ctx->type ); + } + else + { + f.Read4( ctx->thread, accuracy, ctx->count, ctx->period ); + ctx->type = ctx->thread == 0 ? GpuContextType::Vulkan : GpuContextType::OpenGl; + } + ctx->hasCalibration = false; } ctx->hasPeriod = ctx->period != 1.f; m_data.gpuCnt += ctx->count; @@ -3955,6 +3966,9 @@ bool Worker::Process( const QueueItem& ev ) case QueueType::GpuTime: ProcessGpuTime( ev.gpuTime ); break; + case QueueType::GpuCalibration: + ProcessGpuCalibration( ev.gpuCalibration ); + break; case QueueType::MemAlloc: ProcessMemAlloc( ev.memAlloc ); break; @@ -4886,15 +4900,19 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) gpuTime = int64_t( double( ev.period ) * ev.gpuTime ); // precision loss } + const auto cpuTime = TscTime( ev.cpuTime - m_data.baseTime ); auto gpu = m_slab.AllocInit(); memset( gpu->query, 0, sizeof( gpu->query ) ); - gpu->timeDiff = TscTime( ev.cpuTime - m_data.baseTime ) - gpuTime; + gpu->timeDiff = cpuTime - gpuTime; gpu->thread = ev.thread; - gpu->accuracyBits = ev.accuracyBits; gpu->period = ev.period; gpu->count = 0; gpu->type = ev.type; gpu->hasPeriod = ev.period != 1.f; + gpu->hasCalibration = ev.flags & GpuContextCalibration; + gpu->calibratedGpuTime = gpuTime; + gpu->calibratedCpuTime = cpuTime; + gpu->calibrationMod = 1.; m_data.gpuData.push_back( gpu ); m_gpuCtxMap[ev.context] = gpu; } @@ -5028,11 +5046,25 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) int64_t gpuTime; if( !ctx->hasPeriod ) { - gpuTime = t; + if( !ctx->hasCalibration ) + { + gpuTime = t + ctx->timeDiff; + } + else + { + gpuTime = int64_t( ( t - ctx->calibratedGpuTime ) * ctx->calibrationMod + ctx->calibratedCpuTime ); + } } else { - gpuTime = int64_t( double( ctx->period ) * t ); // precision loss + if( !ctx->hasCalibration ) + { + gpuTime = int64_t( double( ctx->period ) * t ) + ctx->timeDiff; // precision loss + } + else + { + gpuTime = int64_t( ( double( ctx->period ) * t - ctx->calibratedGpuTime ) * ctx->calibrationMod + ctx->calibratedCpuTime ); + } } auto zone = ctx->query[ev.queryId]; @@ -5041,25 +5073,46 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) if( zone->GpuStart() < 0 ) { - const auto time = ctx->timeDiff + gpuTime; - zone->SetGpuStart( time ); - if( m_data.lastTime < time ) m_data.lastTime = time; + zone->SetGpuStart( gpuTime ); + if( m_data.lastTime < gpuTime ) m_data.lastTime = gpuTime; ctx->count++; } else { - auto time = ctx->timeDiff + gpuTime; - if( time < zone->GpuStart() ) + if( gpuTime < zone->GpuStart() ) { auto tmp = zone->GpuStart(); - std::swap( time, tmp ); + std::swap( gpuTime, tmp ); zone->SetGpuStart( tmp ); } - zone->SetGpuEnd( time ); - if( m_data.lastTime < time ) m_data.lastTime = time; + zone->SetGpuEnd( gpuTime ); + if( m_data.lastTime < gpuTime ) m_data.lastTime = gpuTime; } } +void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev ) +{ + auto ctx = m_gpuCtxMap[ev.context]; + assert( ctx ); + assert( ctx->hasCalibration ); + + int64_t gpuTime; + if( !ctx->hasPeriod ) + { + gpuTime = ev.gpuTime; + } + else + { + gpuTime = int64_t( double( ctx->period ) * ev.gpuTime ); // precision loss + } + + const auto cpuDelta = ev.cpuDelta; + const auto gpuDelta = gpuTime - ctx->calibratedGpuTime; + ctx->calibrationMod = double( cpuDelta ) / gpuDelta; + ctx->calibratedGpuTime = gpuTime; + ctx->calibratedCpuTime = TscTime( ev.cpuTime - m_data.baseTime ); +} + void Worker::ProcessMemAlloc( const QueueMemAlloc& ev ) { const auto refTime = m_refTimeSerial + ev.time; @@ -6575,7 +6628,8 @@ void Worker::Write( FileWrite& f ) for( auto& ctx : m_data.gpuData ) { f.Write( &ctx->thread, sizeof( ctx->thread ) ); - f.Write( &ctx->accuracyBits, sizeof( ctx->accuracyBits ) ); + uint8_t calibration = ctx->hasCalibration; + f.Write( &calibration, sizeof( calibration ) ); f.Write( &ctx->count, sizeof( ctx->count ) ); f.Write( &ctx->period, sizeof( ctx->period ) ); f.Write( &ctx->type, sizeof( ctx->type ) ); diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 74a83c98..7390593d 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -643,6 +643,7 @@ private: tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ); tracy_force_inline void ProcessGpuTime( const QueueGpuTime& ev ); + tracy_force_inline void ProcessGpuCalibration( const QueueGpuCalibration& ev ); tracy_force_inline void ProcessMemAlloc( const QueueMemAlloc& ev ); tracy_force_inline bool ProcessMemFree( const QueueMemFree& ev ); tracy_force_inline void ProcessMemAllocCallstack( const QueueMemAlloc& ev );