From dae1908b51b18fce53f944ab1ffb876e3bdda39b Mon Sep 17 00:00:00 2001 From: "hans.dembinski@gmail.com" Date: Tue, 14 Feb 2017 00:12:12 +0000 Subject: [PATCH] improving benchmark and benchmark doc --- build/CMakeLists.txt | 1 + doc/Jamfile.v2 | 18 ++--- doc/benchmark.py | 66 +++++++++++++++++++ doc/benchmarks.qbk | 59 ++++------------- test/{speed_vs_numpy.py => speed_numpy.py.in} | 36 +++++----- 5 files changed, 110 insertions(+), 70 deletions(-) create mode 100644 doc/benchmark.py rename test/{speed_vs_numpy.py => speed_numpy.py.in} (85%) diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt index 283658c9..20fa0516 100644 --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -115,4 +115,5 @@ add_test(detail_test detail_test) if(HAVE_PYTHON) configure_file(../test/python_suite_test.py.in python_suite_test.py) add_test(python_suite_test python_suite_test.py) + configure_file(../test/speed_numpy.py.in speed_numpy.py) endif() diff --git a/doc/Jamfile.v2 b/doc/Jamfile.v2 index ef795750..36bbcc2b 100644 --- a/doc/Jamfile.v2 +++ b/doc/Jamfile.v2 @@ -13,14 +13,17 @@ doxygen autodoc [ glob ../../../boost/histogram/storage/*.hpp ] : PREDEFINED=BOOST_HISTOGRAM_DOXYGEN - HIDE_UNDOC_MEMBERS=YES + EXTRACT_ALL=NO EXTRACT_PRIVATE=NO + HIDE_UNDOC_MEMBERS=YES + HIDE_UNDOC_CLASSES=YES ENABLE_PREPROCESSING=YES MACRO_EXPANSION=YES EXPAND_ONLY_PREDEF=YES SEARCH_INCLUDES=NO JAVADOC_AUTOBRIEF=YES USE_MATHJAX=YES + SORT_MEMBER_DOCS=NO ; exe speed @@ -30,17 +33,16 @@ exe speed "-std=c++11" ; -boostbook standalone +boostbook histogram : histogram.qbk : autodoc - toc.max.depth=2 - toc.section.depth=2 - chunk.section.depth=1 - boost.root=../../../.. - navig.graphics=1 + html:boost.root=../../../.. + html:boost.libraries=../../../../libs/libraries.htm + generate.section.toc.level=3 + chunk.first.sections=1 boost.mathjax=1 - $(images_location)/.. + ; diff --git a/doc/benchmark.py b/doc/benchmark.py new file mode 100644 index 00000000..900d02a1 --- /dev/null +++ b/doc/benchmark.py @@ -0,0 +1,66 @@ +import numpy as np +import matplotlib.pyplot as plt +import sys +import re +from collections import defaultdict +from matplotlib.patches import Rectangle +from matplotlib.text import Text +from matplotlib.font_manager import FontProperties + +data = defaultdict(lambda:[]) +for line in open("perf.dat"): + r = re.search("([0-9])D\n", line) + if r: + dim = int(r.group(1)) + continue + if line.startswith("uniform"): + dist = "uniform" + continue + if line.startswith("normal"): + dist = "normal" + continue + label, time = line.strip().split(" ") + time = float(time) + data[(dim, dist)].append((label, time)) + +plt.figure(figsize=(10, 8)) +plt.subplots_adjust(left=0.12, right=0.92, top=0.95, bottom=0.1) +x = [] +y = [] +i = 0 +for k in sorted(data): + if k[1] == "uniform": + continue + v = data[k] + j = 0 + for label, t in sorted(v): + x.append(t) + i -= 1 + y.append(i) + z = float(j) / len(v) + col = ((1.0-z) * np.array((1.0, 0.0, 0.0)) + + z * np.array((1.0, 1.0, 0.0))) + if label == "root": + col = "k" + if "numpy" in label: + col = "0.5" + r = Rectangle((0, i), t, 1, facecolor=col) + tx = Text(-0.01, i+0.5, "%s" % label, + fontsize=17, va="center", ha="right", clip_on=False) + plt.gca().add_artist(r) + plt.gca().add_artist(tx) + j += 1 + i -= 1 + font0 = FontProperties() + font0.set_size(20) + font0.set_weight("bold") + tx = Text(-0.01, i+0.6, "%iD" % k[0], + fontproperties=font0, va="center", ha="right", clip_on=False) + plt.gca().add_artist(tx) +plt.ylim(0, i) +plt.xlim(0, 0.701) + +plt.tick_params("y", left="off", labelleft="off") +plt.xlabel("time (smaller is better)") + +plt.savefig("html/benchmark.png") \ No newline at end of file diff --git a/doc/benchmarks.qbk b/doc/benchmarks.qbk index 57c2a622..5f29bf25 100644 --- a/doc/benchmarks.qbk +++ b/doc/benchmarks.qbk @@ -2,58 +2,23 @@ The library is designed to be fast. While it is unlikely that the time spend in the histogram is limiting an application, we cannot predict how the library is used. -The following table shows results of a simple benchmark against +The following plot shows results of a benchmark on a 9 GHz Macbook Pro. Random numbers from a uniform and a normal distribution are filled into the histograms of 1, 3, and 6 dimensions. 100 bins are used for 1D, 100x100x100=10^6 for 3D, and 10x10x10x10x10x10=10^6 for 6D. Each test is repeated 50 times and the minimum time is taken. -* `TH1I`, `TH3I` and `THnI` of the [@https://root.cern.ch ROOT framework] +[$benchmark.png [width 5in] [height 5in]] -* `histogram` and `histogramdd` from the Python module `numpy` +[variablelist Plot legend: + [[root] [[@https://root.cern.ch ROOT classes] (`TH1I` for 1D, `TH3I` for 3D and `THnI` for 6D)]] + [[py:numpy] [numpy functions ([python]`numpy.histogram1d` for 1D, `numpy.histogramdd` for 3D and 6D)]] + [[py:hd_sd] [[classref boost::histogram::dynamic_histogram dynamic_histogram] with [classref boost::histogram::adaptive_storage adaptive_storage] called from Python]] + [[hs_ss] [[classref boost::histogram::static_histogram static_histogram] with [classref boost::histogram::container_storage> container_storage>]]] + [[hs_sd] [[classref boost::histogram::static_histogram static_histogram] with [classref boost::histogram::adaptive_storage adaptive_storage]]] + [[hd_ss] [[classref boost::histogram::dynamic_histogram dynamic_histogram] with [classref boost::histogram::container_storage> container_storage>]]] + [[hd_sd] [[classref boost::histogram::dynamic_histogram dynamic_histogram] with [classref boost::histogram::adaptive_storage adaptive_storage]]] -The benchmark against ROOT is implemented in C++, the benchmark against numpy in Python. - -[table:benchmark_res Test system: Intel Core i7-4500U CPU clocked at 1.8 GHz, 8 GB of DDR3 RAM - - [[distribution] [uniform] [normal]] - [[ - [table distribution - [[dimension]] - [[No. of fills ]] - [[C++: ROOT \[t/s\]]] - [[C++: boost \[t/s\]]] - [[Py: numpy \[t/s\] ]] - [[Py: boost \[t/s\] ]] - ] - ] - [[table uniform - [[1D ] [3D ] [6D ]] - [[12M ] [4M ] [2M ]] - [[0.127] [0.199] [0.185]] - [[0.172] [0.177] [0.155]] - [[0.825] [0.727] [0.436]] - [[0.209] [0.229] [0.192]] - ]] - [[table normal - [[1D ] [3D ] [6D ]] - [[12M ] [4M ] [2M ]] - [[0.168] [0.143] [0.179]] - [[0.172] [0.171] [0.150]] - [[0.824] [0.426] [0.401]] - [[0.207] [0.194] [0.168]] - ]]] ] +[classref boost::histogram::static_histogram static_histogram] is always faster than [classref boost::histogram::dynamic_histogram dynamic_histogram] and safer to use, as more checks are done at compile time. It is recommended when working in C++ only. [classref boost::histogram::adaptive_storage adaptive_storage] is faster than [classref boost::histogram::container_storage container_storage] for larger histograms, because it uses the cache more effectively due to its smaller memory consumption per bin. - -Remarks: - -* The comparison with ROOT puts ROOT at the advantage, since `TH1I` and `TH3I` are specialized classes for 1 dimension and 3 dimensions, not a general class for N dimensions. ROOT histograms also lack a comparably flexible system to define different binning schemes for each axis. - -* Large vectors are pre-allocated and with random numbers drawn from a uniform or normal distribution for all tests. In the timed part, these numbers are read from the vector and put into the histograms. This reduces the overhead merely to memory access. - -* The test with uniform random numbers never fills the overflow and underflow bins, while the test with random numbers from a normal distribution does. This explains some of the differences between the two distributions. - -* All tests are repeated 10 times, the minimum is shown. - - -TODO +The histograms in this library are mostly faster than the competition, in some cases by a factor of 2. Simultaneously they are more flexible, since binning strategies can be exchanged. The Python-wrapped histogram is slower than numpy's own specialized function for 1D, but beats numpy's general multi-dimensional function by a factor 2 to 3. [endsect] \ No newline at end of file diff --git a/test/speed_vs_numpy.py b/test/speed_numpy.py.in similarity index 85% rename from test/speed_vs_numpy.py rename to test/speed_numpy.py.in index 169d34d3..5f510252 100755 --- a/test/speed_vs_numpy.py +++ b/test/speed_numpy.py.in @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!@PYTHON_EXECUTABLE@ ## ## Copyright 2015-2016 Hans Dembinski ## @@ -18,7 +18,7 @@ def compare_1d(n, distrib): best_numpy = float("infinity") best_boost = float("infinity") - for k in xrange(10): + for k in xrange(50): t = timer() w, xe = np.histogram(r, bins=100, range=(0.0, 1.0)) t = timer() - t @@ -31,9 +31,8 @@ def compare_1d(n, distrib): best_boost = min(t, best_boost) assert(np.all(w == np.array(h)[:-2])) - print "1D" - print "t[numpy] = %.3f" % best_numpy - print "t[boost] = %.3f" % best_boost + print "python:numpy %.3f" % best_numpy + print "python:hd_sd %.3f" % best_boost def compare_3d(n, distrib): if distrib == 0: @@ -44,7 +43,7 @@ def compare_3d(n, distrib): best_numpy = float("infinity") best_boost = float("infinity") - for k in xrange(10): + for k in xrange(50): t = timer() w, xe = np.histogramdd(r, bins=(100, 100, 100), range=((0.0, 1.0), @@ -62,9 +61,8 @@ def compare_3d(n, distrib): best_boost = min(t, best_boost) assert(np.all(w == np.array(h)[:-2,:-2,:-2])) - print "3D" - print "t[numpy] = %.3f" % best_numpy - print "t[boost] = %.3f" % best_boost + print "python:numpy %.3f" % best_numpy + print "python:hd_sd %.3f" % best_boost def compare_6d(n, distrib): if distrib == 0: @@ -75,7 +73,7 @@ def compare_6d(n, distrib): best_numpy = float("infinity") best_boost = float("infinity") - for k in xrange(10): + for k in xrange(50): t = timer() w, xe = np.histogramdd(r, bins=(10, 10, 10, 10, 10, 10), @@ -100,15 +98,23 @@ def compare_6d(n, distrib): best_boost = min(t, best_boost) assert(np.all(w == np.array(h)[:-2,:-2,:-2,:-2,:-2,:-2])) - print "6D" - print "t[numpy] = %.3f" % best_numpy - print "t[boost] = %.3f" % best_boost + print "python:numpy %.3f" % best_numpy + print "python:hd_sd %.3f" % best_boost +print "1D" print "uniform distribution" compare_1d(12000000, 0) -compare_3d(4000000, 0) -compare_6d(2000000, 0) print "normal distribution" compare_1d(12000000, 1) + +print "3D" +print "uniform distribution" +compare_3d(4000000, 0) +print "normal distribution" compare_3d(4000000, 1) + +print "6D" +print "uniform distribution" +compare_6d(2000000, 0) +print "normal distribution" compare_6d(2000000, 1)