improving benchmark and benchmark doc

This commit is contained in:
hans.dembinski@gmail.com 2017-02-14 00:12:12 +00:00
parent a7f0d790b8
commit dae1908b51
5 changed files with 110 additions and 70 deletions

View File

@ -115,4 +115,5 @@ add_test(detail_test detail_test)
if(HAVE_PYTHON)
configure_file(../test/python_suite_test.py.in python_suite_test.py)
add_test(python_suite_test python_suite_test.py)
configure_file(../test/speed_numpy.py.in speed_numpy.py)
endif()

View File

@ -13,14 +13,17 @@ doxygen autodoc
[ glob ../../../boost/histogram/storage/*.hpp ]
:
<doxygen:param>PREDEFINED=BOOST_HISTOGRAM_DOXYGEN
<doxygen:param>HIDE_UNDOC_MEMBERS=YES
<doxygen:param>EXTRACT_ALL=NO
<doxygen:param>EXTRACT_PRIVATE=NO
<doxygen:param>HIDE_UNDOC_MEMBERS=YES
<doxygen:param>HIDE_UNDOC_CLASSES=YES
<doxygen:param>ENABLE_PREPROCESSING=YES
<doxygen:param>MACRO_EXPANSION=YES
<doxygen:param>EXPAND_ONLY_PREDEF=YES
<doxygen:param>SEARCH_INCLUDES=NO
<doxygen:param>JAVADOC_AUTOBRIEF=YES
<doxygen:param>USE_MATHJAX=YES
<doxygen:param>SORT_MEMBER_DOCS=NO
;
exe speed
@ -30,17 +33,16 @@ exe speed
<cxxflags>"-std=c++11"
;
boostbook standalone
boostbook histogram
:
histogram.qbk
:
<dependency>autodoc
<xsl:param>toc.max.depth=2
<xsl:param>toc.section.depth=2
<xsl:param>chunk.section.depth=1
<xsl:param>boost.root=../../../..
<xsl:param>navig.graphics=1
<format>html:<xsl:param>boost.root=../../../..
<format>html:<xsl:param>boost.libraries=../../../../libs/libraries.htm
<xsl:param>generate.section.toc.level=3
<xsl:param>chunk.first.sections=1
<xsl:param>boost.mathjax=1
<xsl:path>$(images_location)/..
;

66
doc/benchmark.py Normal file
View File

@ -0,0 +1,66 @@
import numpy as np
import matplotlib.pyplot as plt
import sys
import re
from collections import defaultdict
from matplotlib.patches import Rectangle
from matplotlib.text import Text
from matplotlib.font_manager import FontProperties
data = defaultdict(lambda:[])
for line in open("perf.dat"):
r = re.search("([0-9])D\n", line)
if r:
dim = int(r.group(1))
continue
if line.startswith("uniform"):
dist = "uniform"
continue
if line.startswith("normal"):
dist = "normal"
continue
label, time = line.strip().split(" ")
time = float(time)
data[(dim, dist)].append((label, time))
plt.figure(figsize=(10, 8))
plt.subplots_adjust(left=0.12, right=0.92, top=0.95, bottom=0.1)
x = []
y = []
i = 0
for k in sorted(data):
if k[1] == "uniform":
continue
v = data[k]
j = 0
for label, t in sorted(v):
x.append(t)
i -= 1
y.append(i)
z = float(j) / len(v)
col = ((1.0-z) * np.array((1.0, 0.0, 0.0))
+ z * np.array((1.0, 1.0, 0.0)))
if label == "root":
col = "k"
if "numpy" in label:
col = "0.5"
r = Rectangle((0, i), t, 1, facecolor=col)
tx = Text(-0.01, i+0.5, "%s" % label,
fontsize=17, va="center", ha="right", clip_on=False)
plt.gca().add_artist(r)
plt.gca().add_artist(tx)
j += 1
i -= 1
font0 = FontProperties()
font0.set_size(20)
font0.set_weight("bold")
tx = Text(-0.01, i+0.6, "%iD" % k[0],
fontproperties=font0, va="center", ha="right", clip_on=False)
plt.gca().add_artist(tx)
plt.ylim(0, i)
plt.xlim(0, 0.701)
plt.tick_params("y", left="off", labelleft="off")
plt.xlabel("time (smaller is better)")
plt.savefig("html/benchmark.png")

View File

@ -2,58 +2,23 @@
The library is designed to be fast. While it is unlikely that the time spend in the histogram is limiting an application, we cannot predict how the library is used.
The following table shows results of a simple benchmark against
The following plot shows results of a benchmark on a 9 GHz Macbook Pro. Random numbers from a uniform and a normal distribution are filled into the histograms of 1, 3, and 6 dimensions. 100 bins are used for 1D, 100x100x100=10^6 for 3D, and 10x10x10x10x10x10=10^6 for 6D. Each test is repeated 50 times and the minimum time is taken.
* `TH1I`, `TH3I` and `THnI` of the [@https://root.cern.ch ROOT framework]
[$benchmark.png [width 5in] [height 5in]]
* `histogram` and `histogramdd` from the Python module `numpy`
[variablelist Plot legend:
[[root] [[@https://root.cern.ch ROOT classes] (`TH1I` for 1D, `TH3I` for 3D and `THnI` for 6D)]]
[[py:numpy] [numpy functions ([python]`numpy.histogram1d` for 1D, `numpy.histogramdd` for 3D and 6D)]]
[[py:hd_sd] [[classref boost::histogram::dynamic_histogram dynamic_histogram] with [classref boost::histogram::adaptive_storage adaptive_storage] called from Python]]
[[hs_ss] [[classref boost::histogram::static_histogram static_histogram] with [classref boost::histogram::container_storage<std::vector<int>> container_storage<std::vector<int>>]]]
[[hs_sd] [[classref boost::histogram::static_histogram static_histogram] with [classref boost::histogram::adaptive_storage adaptive_storage]]]
[[hd_ss] [[classref boost::histogram::dynamic_histogram dynamic_histogram] with [classref boost::histogram::container_storage<std::vector<int>> container_storage<std::vector<int>>]]]
[[hd_sd] [[classref boost::histogram::dynamic_histogram dynamic_histogram] with [classref boost::histogram::adaptive_storage adaptive_storage]]]
The benchmark against ROOT is implemented in C++, the benchmark against numpy in Python.
[table:benchmark_res Test system: Intel Core i7-4500U CPU clocked at 1.8 GHz, 8 GB of DDR3 RAM
[[distribution] [uniform] [normal]]
[[
[table distribution
[[dimension]]
[[No. of fills ]]
[[C++: ROOT \[t/s\]]]
[[C++: boost \[t/s\]]]
[[Py: numpy \[t/s\] ]]
[[Py: boost \[t/s\] ]]
]
]
[[table uniform
[[1D ] [3D ] [6D ]]
[[12M ] [4M ] [2M ]]
[[0.127] [0.199] [0.185]]
[[0.172] [0.177] [0.155]]
[[0.825] [0.727] [0.436]]
[[0.209] [0.229] [0.192]]
]]
[[table normal
[[1D ] [3D ] [6D ]]
[[12M ] [4M ] [2M ]]
[[0.168] [0.143] [0.179]]
[[0.172] [0.171] [0.150]]
[[0.824] [0.426] [0.401]]
[[0.207] [0.194] [0.168]]
]]]
]
[classref boost::histogram::static_histogram static_histogram] is always faster than [classref boost::histogram::dynamic_histogram dynamic_histogram] and safer to use, as more checks are done at compile time. It is recommended when working in C++ only. [classref boost::histogram::adaptive_storage adaptive_storage] is faster than [classref boost::histogram::container_storage container_storage] for larger histograms, because it uses the cache more effectively due to its smaller memory consumption per bin.
Remarks:
* The comparison with ROOT puts ROOT at the advantage, since `TH1I` and `TH3I` are specialized classes for 1 dimension and 3 dimensions, not a general class for N dimensions. ROOT histograms also lack a comparably flexible system to define different binning schemes for each axis.
* Large vectors are pre-allocated and with random numbers drawn from a uniform or normal distribution for all tests. In the timed part, these numbers are read from the vector and put into the histograms. This reduces the overhead merely to memory access.
* The test with uniform random numbers never fills the overflow and underflow bins, while the test with random numbers from a normal distribution does. This explains some of the differences between the two distributions.
* All tests are repeated 10 times, the minimum is shown.
TODO
The histograms in this library are mostly faster than the competition, in some cases by a factor of 2. Simultaneously they are more flexible, since binning strategies can be exchanged. The Python-wrapped histogram is slower than numpy's own specialized function for 1D, but beats numpy's general multi-dimensional function by a factor 2 to 3.
[endsect]

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!@PYTHON_EXECUTABLE@
##
## Copyright 2015-2016 Hans Dembinski
##
@ -18,7 +18,7 @@ def compare_1d(n, distrib):
best_numpy = float("infinity")
best_boost = float("infinity")
for k in xrange(10):
for k in xrange(50):
t = timer()
w, xe = np.histogram(r, bins=100, range=(0.0, 1.0))
t = timer() - t
@ -31,9 +31,8 @@ def compare_1d(n, distrib):
best_boost = min(t, best_boost)
assert(np.all(w == np.array(h)[:-2]))
print "1D"
print "t[numpy] = %.3f" % best_numpy
print "t[boost] = %.3f" % best_boost
print "python:numpy %.3f" % best_numpy
print "python:hd_sd %.3f" % best_boost
def compare_3d(n, distrib):
if distrib == 0:
@ -44,7 +43,7 @@ def compare_3d(n, distrib):
best_numpy = float("infinity")
best_boost = float("infinity")
for k in xrange(10):
for k in xrange(50):
t = timer()
w, xe = np.histogramdd(r, bins=(100, 100, 100),
range=((0.0, 1.0),
@ -62,9 +61,8 @@ def compare_3d(n, distrib):
best_boost = min(t, best_boost)
assert(np.all(w == np.array(h)[:-2,:-2,:-2]))
print "3D"
print "t[numpy] = %.3f" % best_numpy
print "t[boost] = %.3f" % best_boost
print "python:numpy %.3f" % best_numpy
print "python:hd_sd %.3f" % best_boost
def compare_6d(n, distrib):
if distrib == 0:
@ -75,7 +73,7 @@ def compare_6d(n, distrib):
best_numpy = float("infinity")
best_boost = float("infinity")
for k in xrange(10):
for k in xrange(50):
t = timer()
w, xe = np.histogramdd(r, bins=(10, 10, 10,
10, 10, 10),
@ -100,15 +98,23 @@ def compare_6d(n, distrib):
best_boost = min(t, best_boost)
assert(np.all(w == np.array(h)[:-2,:-2,:-2,:-2,:-2,:-2]))
print "6D"
print "t[numpy] = %.3f" % best_numpy
print "t[boost] = %.3f" % best_boost
print "python:numpy %.3f" % best_numpy
print "python:hd_sd %.3f" % best_boost
print "1D"
print "uniform distribution"
compare_1d(12000000, 0)
compare_3d(4000000, 0)
compare_6d(2000000, 0)
print "normal distribution"
compare_1d(12000000, 1)
print "3D"
print "uniform distribution"
compare_3d(4000000, 0)
print "normal distribution"
compare_3d(4000000, 1)
print "6D"
print "uniform distribution"
compare_6d(2000000, 0)
print "normal distribution"
compare_6d(2000000, 1)