mirror of
https://github.com/boostorg/histogram.git
synced 2025-05-10 15:24:05 +00:00
adding install instructions
This commit is contained in:
parent
492d73f18f
commit
4d8f88ebd2
@ -1,5 +1,7 @@
|
|||||||
|
cmake_minimum_required (VERSION 2.8)
|
||||||
|
|
||||||
project(histogram)
|
project(histogram)
|
||||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMake)
|
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
|
||||||
|
|
||||||
if(${CMAKE_BUILD_TYPE})
|
if(${CMAKE_BUILD_TYPE})
|
||||||
STRING(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
|
STRING(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
|
||||||
@ -7,81 +9,104 @@ endif()
|
|||||||
|
|
||||||
find_package(Boost 1.55 REQUIRED
|
find_package(Boost 1.55 REQUIRED
|
||||||
COMPONENTS python iostreams serialization unit_test_framework)
|
COMPONENTS python iostreams serialization unit_test_framework)
|
||||||
find_package(PythonLibs)
|
find_package(PythonLibs) # optional
|
||||||
find_package(Numpy) # optional
|
find_package(Numpy) # optional
|
||||||
|
|
||||||
include_directories(include ${Boost_INCLUDE_DIRS})
|
|
||||||
add_definitions(-DBOOST_TEST_DYN_LINK) # for unit_test_framework
|
add_definitions(-DBOOST_TEST_DYN_LINK) # for unit_test_framework
|
||||||
|
add_definitions(-Wall)
|
||||||
|
include_directories(../include ${Boost_INCLUDE_DIRS})
|
||||||
set(LIBRARIES stdc++ m ${Boost_LIBRARIES})
|
set(LIBRARIES stdc++ m ${Boost_LIBRARIES})
|
||||||
|
|
||||||
if(Boost_PYTHON_FOUND AND PYTHONLIBS_FOUND)
|
if(Boost_PYTHON_FOUND AND PYTHONLIBS_FOUND)
|
||||||
set(USE_PYTHON True)
|
set(USE_PYTHON True)
|
||||||
include_directories(${PYTHON_INCLUDE_DIRS})
|
|
||||||
LIST(APPEND LIBRARIES ${PYTHON_LIBRARIES})
|
if(USE_PYTHON)
|
||||||
add_definitions(-DUSE_PYTHON)
|
include_directories(${PYTHON_INCLUDE_DIRS})
|
||||||
|
LIST(APPEND LIBRARIES ${PYTHON_LIBRARIES})
|
||||||
|
add_definitions(-DUSE_PYTHON)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NUMPY_FOUND)
|
if(NUMPY_FOUND)
|
||||||
|
set(USE_NUMPY True)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(USE_NUMPY)
|
||||||
include_directories(${NUMPY_INCLUDE_DIR})
|
include_directories(${NUMPY_INCLUDE_DIR})
|
||||||
add_definitions(-DUSE_NUMPY)
|
add_definitions(-DUSE_NUMPY)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# core library
|
# core library
|
||||||
add_library(histogram SHARED
|
add_library(boost_histogram SHARED
|
||||||
src/axis.cpp
|
../src/axis.cpp
|
||||||
src/histogram_base.cpp
|
../src/histogram_base.cpp
|
||||||
src/histogram.cpp
|
../src/histogram.cpp
|
||||||
src/nstore.cpp
|
../src/nstore.cpp
|
||||||
src/zero_suppression.cpp
|
../src/zero_suppression.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(histogram ${LIBRARIES})
|
target_link_libraries(boost_histogram ${LIBRARIES})
|
||||||
|
|
||||||
if(CMAKE_BUILD_TYPE STREQUAL "debug")
|
if(CMAKE_BUILD_TYPE STREQUAL "debug")
|
||||||
message(STATUS "debug mode: optimizations off")
|
message(STATUS "debug mode: optimizations off")
|
||||||
else()
|
else()
|
||||||
message(STATUS "release mode: optimizations on")
|
message(STATUS "release mode: optimizations on")
|
||||||
target_compile_options(histogram PUBLIC "-O3 -fomit-frame-pointer -mtune=generic")
|
target_compile_options(boost_histogram PUBLIC "-O3 -fomit-frame-pointer -mtune=generic")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# python bindings
|
# python bindings
|
||||||
if(USE_PYTHON)
|
if(USE_PYTHON)
|
||||||
add_library(pyhistogram MODULE
|
add_library(pyhistogram MODULE
|
||||||
src/python/module.cpp
|
../src/python/module.cpp
|
||||||
src/python/histogram_base.cpp
|
../src/python/histogram_base.cpp
|
||||||
src/python/histogram.cpp
|
../src/python/histogram.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(pyhistogram histogram ${LIBRARIES})
|
target_link_libraries(pyhistogram boost_histogram ${LIBRARIES})
|
||||||
set_target_properties(pyhistogram PROPERTIES OUTPUT_NAME "histogram" PREFIX "" SUFFIX ".so")
|
set_target_properties(pyhistogram PROPERTIES OUTPUT_NAME "histogram" PREFIX "" SUFFIX ".so")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# examples
|
# examples
|
||||||
add_executable(sizeof
|
add_executable(sizeof
|
||||||
examples/sizeof.cpp
|
../examples/sizeof.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(sizeof ${LIBRARIES})
|
target_link_libraries(sizeof ${LIBRARIES})
|
||||||
|
|
||||||
find_package(ROOT) # only used in speed comparison
|
find_package(ROOT) # only used in speed comparison
|
||||||
if(ROOT_FOUND)
|
if(ROOT_FOUND)
|
||||||
add_executable(nhistogram_speed
|
add_executable(nhistogram_speed
|
||||||
examples/speed_vs_root.cpp)
|
../examples/speed_vs_root.cpp)
|
||||||
target_include_directories(nhistogram_speed PUBLIC ${ROOT_INCLUDE_DIR})
|
target_include_directories(nhistogram_speed PUBLIC ${ROOT_INCLUDE_DIR})
|
||||||
target_link_libraries(nhistogram_speed histogram ${ROOT_LIBRARIES} ${LIBRARIES})
|
target_link_libraries(nhistogram_speed boost_histogram ${ROOT_LIBRARIES} ${LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# tests
|
# tests
|
||||||
enable_testing()
|
enable_testing()
|
||||||
add_executable(zero_suppression_test
|
add_executable(zero_suppression_test
|
||||||
test/zero_suppression_test.cpp)
|
../test/zero_suppression_test.cpp)
|
||||||
target_link_libraries(zero_suppression_test histogram ${LIBRARIES})
|
target_link_libraries(zero_suppression_test boost_histogram ${LIBRARIES})
|
||||||
add_test(zero_suppression_test zero_suppression_test)
|
add_test(zero_suppression_test zero_suppression_test)
|
||||||
|
|
||||||
add_executable(histogram_test
|
add_executable(histogram_test
|
||||||
test/histogram_test.cpp)
|
../test/histogram_test.cpp)
|
||||||
target_link_libraries(histogram_test histogram ${LIBRARIES})
|
target_link_libraries(histogram_test boost_histogram ${LIBRARIES})
|
||||||
add_test(histogram_test histogram_test)
|
add_test(histogram_test histogram_test)
|
||||||
|
|
||||||
if(Boost_PYTHON_FOUND)
|
if(USE_PYTHON)
|
||||||
add_custom_target(python_suite_test ALL
|
add_custom_target(python_suite_test ALL
|
||||||
cp ${CMAKE_SOURCE_DIR}/test/python_suite_test.py .)
|
ln -sf ${PROJECT_SOURCE_DIR}/../test/python_suite_test.py .)
|
||||||
add_test(python_suite_test python_suite_test.py)
|
add_test(python_suite_test python_suite_test.py)
|
||||||
|
set_tests_properties(python_suite_test
|
||||||
|
PROPERTIES ENVIRONMENT "PYTHONPATH=.:${PYTHONPATH}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# install
|
||||||
|
install(DIRECTORY ../include/boost DESTINATION include)
|
||||||
|
install(TARGETS boost_histogram DESTINATION lib)
|
||||||
|
if (USE_PYTHON)
|
||||||
|
execute_process(COMMAND python -c "from distutils.sysconfig import get_python_lib; import sys; sys.stdout.write(get_python_lib())"
|
||||||
|
OUTPUT_VARIABLE PYTHON_MODULE_DIRS)
|
||||||
|
set(PYTHON_MODULE_DIRS "${PYTHON_MODULE_DIRS}:$ENV{PYTHONPATH}")
|
||||||
|
string(REPLACE ":" "\n " PYTHON_MODULE_DIRS ${PYTHON_MODULE_DIRS})
|
||||||
|
get_target_property(PYTHON_MODULE pyhistogram LOCATION)
|
||||||
|
|
||||||
|
install(CODE "message(\"= How-to install Python module =\\nCopy\\n ${PYTHON_MODULE}\\ninto one of these paths:\\n ${PYTHON_MODULE_DIRS}\")")
|
||||||
endif()
|
endif()
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
cmake_minimum_required (VERSION 2.8)
|
|
||||||
include(CMake/CMakeLists.txt)
|
|
61
README.md
61
README.md
@ -1,33 +1,64 @@
|
|||||||
# histogram
|
# Histogram
|
||||||
|
|
||||||
Fast n-dimensional histogram with convenient interface for C++ and Python
|
Fast n-dimensional histogram with convenient interface for C++ and Python
|
||||||
|
|
||||||
This project contains an easy-to-use powerful n-dimensional histogram class implemented in `C++0x`, optimized for convenience and excellent performance under heavy duty. The histogram has a complete C++ and a [Python](http://www.python.org) interface, and can be moved over the language boundary with ease. [Numpy](http://www.numpy.org) is fully supported; histograms can be filled with Numpy arrays at C speeds and are convertible into Numpy arrays without copying data. Histograms can be streamed from/to files and pickled in Python.
|
This project contains an easy-to-use powerful n-dimensional histogram class implemented in `C++0x`, optimized for convenience and excellent performance under heavy duty. The histogram has a complete C++ and a [Python](http://www.python.org) interface, and can be moved over the language boundary with ease. [Numpy](http://www.numpy.org) is fully supported; histograms can be filled with Numpy arrays at C speeds and are convertible into Numpy arrays without copying data. Histograms can be streamed from/to files and pickled in Python.
|
||||||
|
|
||||||
This project only depends on [Boost](http://www.boost.org). Optional dependencies are Python and Numpy.
|
My goal is to submit this project to the [Boost](http://www.boost.org) libraries, that's why it uses the boost directory structure and namespace. The code is released under the MIT License, making it free to use in open- and closed-source projects.
|
||||||
|
|
||||||
My goal is to submit this project to the Boost libraries, that's why it uses the boost directory structure and namespace. The code is released under the MIT License, making it free to use in open- and closed-source projects.
|
### Dependencies
|
||||||
|
|
||||||
|
* [Boost](http://www.boost.org)
|
||||||
|
* [CMake](https://cmake.org)
|
||||||
|
* Optional:
|
||||||
|
[Python](http://www.python.org)
|
||||||
|
[Numpy](http://www.numpy.org)
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
* N-dimensional histogram
|
||||||
|
* Intuitive and convenient interface, everything *just works*
|
||||||
|
* Support for different binning scenarios, including binning of angles
|
||||||
|
* High-performance, cache-friendly design
|
||||||
|
* Space-efficient memory storage
|
||||||
|
* Serialization support with zero-suppression
|
||||||
|
* Multi-language support: C++ and Python
|
||||||
|
* Numpy support
|
||||||
|
|
||||||
|
### Build instructions
|
||||||
|
|
||||||
|
`git clone git@github.com:HDembinski/histogram.git`
|
||||||
|
|
||||||
|
`mkdir build; cd build`
|
||||||
|
|
||||||
|
`cmake ../histogram.git/CMake`
|
||||||
|
|
||||||
|
`make install` (or just `make` if you don't want to install yet)
|
||||||
|
|
||||||
|
To run the tests, do `ctest`.
|
||||||
|
|
||||||
## Rationale
|
## Rationale
|
||||||
|
|
||||||
There is a lack of a widely-used free histogram class. While it is easy to write an 1-dimensional histogram, writing an n-dimensional histogram poses more of a challenge. If you add serialization and Python/Numpy support onto the wish-list, the air becomes thin. The main competitor is the [ROOT framework](https://root.cern.ch). This histogram class is designed to be more convenient to use, and as fast or faster than the equivalent ROOT histograms. It comes without heavy baggage, instead it has a clean and modern C++ design which follows the advice given in popular C++ books, like those of [Meyers](http://www.aristeia.com/books.html) and [Sutter and Alexandrescu](http://www.gotw.ca/publications/c++cs.htm).
|
There is a lack of a widely-used free histogram class. While it is easy to write an 1-dimensional histogram, writing an n-dimensional histogram poses more of a challenge. If you add serialization and Python/Numpy support onto the wish-list, the air becomes thin. The main competitor is the [ROOT framework](https://root.cern.ch). This histogram class is designed to be more convenient to use, and as fast or faster than the equivalent ROOT histograms. It comes without heavy baggage, instead it has a clean and modern C++ design which follows the advice given in popular C++ books, like those of [Meyers](http://www.aristeia.com/books.html) and [Sutter and Alexandrescu](http://www.gotw.ca/publications/c++cs.htm).
|
||||||
|
|
||||||
## Features
|
## Design choices
|
||||||
|
|
||||||
### Interface convenience, language transparency
|
I designed the histogram based on a decade of experience collected in working with Big Data, more precisely in the field of particle physics and astroparticle physics. I follow these principles:
|
||||||
A histogram should have the same consistent interface whatever the dimension. Like `std::vector` it should *just work*, users shouldn't be forced to make *a priori* choices among several histogram classes and options everytime they encounter a new data set. Python is a great language for data analysis, so the histogram should have Python bindings. Data analysis in Python is Numpy-based, so Numpy support is a must. The histogram should be usable as an interface between a complex simulation or data-storage system written in C++ and data-analysis/plotting in Python: define the histogram in Python, let it be filled on the C++ side, and then get it back for further data analysis or plotting.
|
|
||||||
|
|
||||||
### Powerful binning strategies
|
|
||||||
The histogram supports half a dozent different binning strategies, conveniently encapsulated in axis objects. There is the standard sorting of real-valued data into bins of equal or varying width, but also binning of angles or integer values. Extra bins that count over- and underflow values are added by default. This feature can be turned off individually for each dimension. The extra bins do not disturb normal counting. On an axis with n-bins, the first bin has the index `0`, the last bin `n-1`, while the under- and overflow bins are accessible at `-1` and `n`, respectively.
|
|
||||||
|
|
||||||
### Performance, cache-friendliness and memory-efficiency
|
|
||||||
Dense storage in memory is a must for high performance. Unfortunately, the [curse of dimensionality](https://en.wikipedia.org/wiki/Curse_of_dimensionality) quickly become a problem as the number of dimensions grows, leading to histograms which consume large amounts (up to GBs) of memory. Fortunately, having many dimensions typically reduces the number of counts per bin, since tuples get spread over many dimensions. The histogram uses an adaptive count size per bin to exploit this, which starts with the smallest size per bin of 1 byte and increases transparently as needed up to 8 byte per bin. A `std::vector` grows in *length* as new elements are added, while the count storage grows in *depth*.
|
|
||||||
|
|
||||||
## Design principles
|
|
||||||
|
|
||||||
* "Do one thing and do it well", Doug McIlroy
|
* "Do one thing and do it well", Doug McIlroy
|
||||||
* The [Zen of Python](https://www.python.org/dev/peps/pep-0020) (also applies to other languages).
|
* The [Zen of Python](https://www.python.org/dev/peps/pep-0020) (also applies to other languages).
|
||||||
|
|
||||||
I designed the histogram based on a decade of experience collected in working with Big Data, more precisely in the field of particle physics and astroparticle physics.
|
### Interface convenience, language transparency
|
||||||
|
|
||||||
|
A histogram should have the same consistent interface whatever the dimension. Like `std::vector` it should *just work*, users shouldn't be forced to make *a priori* choices among several histogram classes and options everytime they encounter a new data set. Python is a great language for data analysis, so the histogram should have Python bindings. Data analysis in Python is Numpy-based, so Numpy support is a must. The histogram should be usable as an interface between a complex simulation or data-storage system written in C++ and data-analysis/plotting in Python: define the histogram in Python, let it be filled on the C++ side, and then get it back for further data analysis or plotting.
|
||||||
|
|
||||||
|
### Powerful binning strategies
|
||||||
|
|
||||||
|
The histogram supports half a dozent different binning strategies, conveniently encapsulated in axis objects. There is the standard sorting of real-valued data into bins of equal or varying width, but also binning of angles or integer values. Extra bins that count over- and underflow values are added by default. This feature can be turned off individually for each dimension. The extra bins do not disturb normal counting. On an axis with n-bins, the first bin has the index `0`, the last bin `n-1`, while the under- and overflow bins are accessible at `-1` and `n`, respectively.
|
||||||
|
|
||||||
|
### Performance, cache-friendliness and memory-efficiency
|
||||||
|
|
||||||
|
Dense storage in memory is a must for high performance. Unfortunately, the [curse of dimensionality](https://en.wikipedia.org/wiki/Curse_of_dimensionality) quickly become a problem as the number of dimensions grows, leading to histograms which consume large amounts (up to GBs) of memory. Fortunately, having many dimensions typically reduces the number of counts per bin, since tuples get spread over many dimensions. The histogram uses an adaptive count size per bin to exploit this, which starts with the smallest size per bin of 1 byte and increases transparently as needed up to 8 byte per bin. A `std::vector` grows in *length* as new elements are added, while the count storage grows in *depth*.
|
||||||
|
|
||||||
## State of project
|
## State of project
|
||||||
|
|
||||||
|
@ -1,52 +0,0 @@
|
|||||||
#ifndef _BOOST_HISTOGRAM_VECTOR_OPERATORS_HPP_
|
|
||||||
#define _BOOST_HISTOGRAM_VECTOR_OPERATORS_HPP_
|
|
||||||
|
|
||||||
#include <stdexcept>
|
|
||||||
|
|
||||||
namespace boost {
|
|
||||||
namespace histogram {
|
|
||||||
|
|
||||||
// vectors: generic ==
|
|
||||||
template<typename T>
|
|
||||||
bool operator==(const std::vector<T>& a, const std::vector<T>& b) {
|
|
||||||
if (a.size() != b.size())
|
|
||||||
return false;
|
|
||||||
// pointer arithmetric is faster
|
|
||||||
const T* da = &a.front();
|
|
||||||
const T* db = &b.front();
|
|
||||||
for (unsigned i = 0, n = a.size(); i < n; ++i)
|
|
||||||
if (da[i] != db[i])
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// vectors: generic +=
|
|
||||||
template<typename T>
|
|
||||||
std::vector<T>& operator+=(std::vector<T>& a, const std::vector<T>& b) {
|
|
||||||
if (a.size() != b.size())
|
|
||||||
throw std::invalid_argument("sizes do not match");
|
|
||||||
// pointer arithmetric is faster
|
|
||||||
T* da = &a.front();
|
|
||||||
const T* db = &b.front();
|
|
||||||
for (unsigned i = 0, n = a.size(); i < n; ++i)
|
|
||||||
da[i] += db[i];
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
// vectors: generic -=
|
|
||||||
template<typename T>
|
|
||||||
std::vector<T>& operator-=(std::vector<T>& a, const std::vector<T>& b) {
|
|
||||||
if (a.size() != b.size())
|
|
||||||
throw std::invalid_argument("sizes do not match");
|
|
||||||
// pointer arithmetric is faster
|
|
||||||
T* da = &a.front();
|
|
||||||
const T* db = &b.front();
|
|
||||||
for (unsigned i = 0, n = a.size(); i < n; ++i)
|
|
||||||
da[i] -= db[i];
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
Loading…
x
Reference in New Issue
Block a user