speed improvements for 1d and 2d histograms

* rename get_size to axes_rank and move axes buffer to axes.hpp
* added fill experiments
* faster and simpler fill_impl
* faster specializations for linearize_value
This commit is contained in:
Hans Dembinski 2019-05-26 23:51:15 +02:00 committed by GitHub
parent 5f937d3665
commit ecd142080d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 533 additions and 222 deletions

View File

@ -29,6 +29,7 @@ endmacro()
add_benchmark(axis_index)
add_benchmark(histogram_filling)
add_benchmark(histogram_filling_experiments)
add_benchmark(histogram_iteration)
if (Threads_FOUND)
add_benchmark(histogram_parallel_filling)

View File

@ -0,0 +1,2 @@
#/bin/bash
for cpu in /sys/devices/system/cpu/cpu? ; do echo performance > $cpu/cpufreq/scaling_governor; done

View File

@ -0,0 +1,2 @@
#/bin/bash
for cpu in /sys/devices/system/cpu/cpu? ; do echo powersave > $cpu/cpufreq/scaling_governor; done

View File

@ -68,35 +68,35 @@ using SStore = std::vector<int>;
using DStore = unlimited_storage<>;
BENCHMARK_TEMPLATE(fill_1d, static_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_1d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_1d, dynamic_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_1d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_1d, dynamic_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_2d, static_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_2d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_2d, dynamic_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_2d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_2d, dynamic_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_3d, static_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_3d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_3d, dynamic_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_3d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_3d, dynamic_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_6d, static_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_6d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_6d, dynamic_tag, SStore, uniform);
BENCHMARK_TEMPLATE(fill_6d, static_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_6d, dynamic_tag, DStore, uniform);
BENCHMARK_TEMPLATE(fill_1d, static_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_1d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_1d, dynamic_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_1d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_1d, dynamic_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_2d, static_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_2d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_2d, dynamic_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_2d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_2d, dynamic_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_3d, static_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_3d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_3d, dynamic_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_3d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_3d, dynamic_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_6d, static_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_6d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_6d, dynamic_tag, SStore, normal);
BENCHMARK_TEMPLATE(fill_6d, static_tag, DStore, normal);
BENCHMARK_TEMPLATE(fill_6d, dynamic_tag, DStore, normal);

View File

@ -0,0 +1,195 @@
// Copyright 2015-2019 Hans Dembinski
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <benchmark/benchmark.h>
#include <boost/histogram/axis.hpp>
#include <boost/histogram/axis/traits.hpp>
#include <boost/histogram/detail/axes.hpp>
#include <boost/histogram/detail/throw_exception.hpp>
#include <boost/mp11/algorithm.hpp>
#include <random>
#include <tuple>
#include <type_traits>
#include <vector>
using namespace boost::histogram;
using reg = axis::regular<>;
using integ = axis::integer<>;
using var = axis::variable<>;
using vector_of_variant = std::vector<axis::variant<reg, integ, var>>;
using uniform = std::uniform_real_distribution<>;
using normal = std::normal_distribution<>;
template <class T, class U>
auto make_storage(const U& axes) {
return std::vector<T>(detail::bincount(axes), 0);
}
template <class T>
auto make_strides(const T& axes) {
std::vector<std::size_t> strides(detail::axes_rank(axes) + 1, 1);
auto sit = strides.begin();
detail::for_each_axis(axes, [&](const auto& a) { *++sit *= axis::traits::extent(a); });
return strides;
}
template <class Axes, class Storage, class Tuple>
void fill_b(const Axes& axes, Storage& storage, const Tuple& t) {
using namespace boost::mp11;
std::size_t stride = 1, index = 0;
mp_for_each<mp_iota<mp_size<Tuple>>>([&](auto i) {
const auto& a = boost::histogram::detail::axis_get<i>(axes);
const auto& v = std::get<i>(t);
index += (a.index(v) + 1) * stride;
stride *= axis::traits::extent(a);
});
++storage[index];
}
template <class Axes, class Storage, class Tuple>
void fill_c(const Axes& axes, const std::size_t* strides, Storage& storage,
const Tuple& t) {
using namespace boost::mp11;
std::size_t index = 0;
assert(boost::histogram::detail::axes_rank(axes) ==
boost::histogram::detail::axes_rank(t));
mp_for_each<mp_iota<mp_size<Tuple>>>([&](auto i) {
const auto& a = boost::histogram::detail::axis_get<i>(axes);
const auto& v = std::get<i>(t);
index += (a.index(v) + 1) * *strides++;
});
++storage[index];
}
template <class Distribution>
Distribution init();
template <>
uniform init<uniform>() {
return uniform{0.0, 1.0};
}
template <>
normal init<normal>() {
return normal{0.5, 0.3};
}
template <class T, class Distribution>
static void fill_1d_a(benchmark::State& state) {
auto axes = std::make_tuple(reg(100, 0, 1));
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
for (auto _ : state) {
const auto i = std::get<0>(axes).index(dis(gen));
++storage[i + 1];
}
}
template <class T, class Distribution>
static void fill_1d_b(benchmark::State& state) {
auto axes = std::make_tuple(reg(100, 0, 1));
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
for (auto _ : state) { fill_b(axes, storage, std::forward_as_tuple(dis(gen))); }
}
template <class T, class Distribution>
static void fill_1d_c(benchmark::State& state) {
auto axes = std::make_tuple(reg(100, 0, 1));
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
auto strides = make_strides(axes);
for (auto _ : state) {
fill_c(axes, strides.data(), storage, std::forward_as_tuple(dis(gen)));
}
}
template <class T, class Distribution>
static void fill_1d_c_dyn(benchmark::State& state) {
auto axes = vector_of_variant({reg(100, 0, 1)});
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
auto strides = make_strides(axes);
for (auto _ : state) {
fill_c(axes, strides.data(), storage, std::forward_as_tuple(dis(gen)));
}
}
template <class T, class Distribution>
static void fill_2d_a(benchmark::State& state) {
auto axes = std::make_tuple(reg(100, 0, 1), reg(100, 0, 1));
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
for (auto _ : state) {
const auto i0 = std::get<0>(axes).index(dis(gen));
const auto i1 = std::get<1>(axes).index(dis(gen));
const auto stride = axis::traits::extent(std::get<0>(axes));
++storage[(i0 + 1) * stride + (i1 + 1)];
}
}
template <class T, class Distribution>
static void fill_2d_b(benchmark::State& state) {
auto axes = std::make_tuple(reg(100, 0, 1), reg(100, 0, 1));
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
for (auto _ : state) {
fill_b(axes, storage, std::forward_as_tuple(dis(gen), dis(gen)));
}
}
template <class T, class Distribution>
static void fill_2d_c(benchmark::State& state) {
auto axes = std::make_tuple(reg(100, 0, 1), reg(100, 0, 1));
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
auto strides = make_strides(axes);
assert(strides.size() == 3);
assert(strides[0] == 1);
assert(strides[1] == 102);
for (auto _ : state) {
fill_c(axes, strides.data(), storage, std::forward_as_tuple(dis(gen), dis(gen)));
}
}
template <class T, class Distribution>
static void fill_2d_c_dyn(benchmark::State& state) {
auto axes = vector_of_variant({reg(100, 0, 1), reg(100, 0, 1)});
std::default_random_engine gen(1);
Distribution dis = init<Distribution>();
auto storage = make_storage<T>(axes);
auto strides = make_strides(axes);
assert(strides.size() == 3);
assert(strides[0] == 1);
assert(strides[1] == 102);
for (auto _ : state) {
fill_c(axes, strides.data(), storage, std::forward_as_tuple(dis(gen), dis(gen)));
}
}
BENCHMARK_TEMPLATE(fill_1d_a, int, uniform);
BENCHMARK_TEMPLATE(fill_1d_a, double, uniform);
BENCHMARK_TEMPLATE(fill_1d_b, double, uniform);
BENCHMARK_TEMPLATE(fill_1d_c, double, uniform);
BENCHMARK_TEMPLATE(fill_1d_c_dyn, double, uniform);
BENCHMARK_TEMPLATE(fill_2d_a, double, uniform);
BENCHMARK_TEMPLATE(fill_2d_b, double, uniform);
BENCHMARK_TEMPLATE(fill_2d_c, double, uniform);
BENCHMARK_TEMPLATE(fill_2d_c_dyn, double, uniform);
BENCHMARK_TEMPLATE(fill_1d_a, double, normal);
BENCHMARK_TEMPLATE(fill_1d_b, double, normal);
BENCHMARK_TEMPLATE(fill_1d_c, double, normal);
BENCHMARK_TEMPLATE(fill_2d_a, double, normal);
BENCHMARK_TEMPLATE(fill_2d_b, double, normal);
BENCHMARK_TEMPLATE(fill_2d_c, double, normal);

View File

@ -169,14 +169,14 @@ Result value_as(const Axis& axis, real_index_type index) {
@param value argument to be passed to `index` method
*/
template <class Axis, class U>
auto index(const Axis& axis, const U& value) {
axis::index_type index(const Axis& axis, const U& value) {
using V = detail::remove_cvref_t<detail::arg_type<decltype(&Axis::index)>>;
return axis.index(detail::try_cast<V, std::invalid_argument>(value));
}
// specialization for variant
template <class... Ts, class U>
auto index(const variant<Ts...>& axis, const U& value) {
axis::index_type index(const variant<Ts...>& axis, const U& value) {
return axis.index(value);
}

View File

@ -7,6 +7,7 @@
#ifndef BOOST_HISTOGRAM_DETAIL_AXES_HPP
#define BOOST_HISTOGRAM_DETAIL_AXES_HPP
#include <array>
#include <boost/assert.hpp>
#include <boost/histogram/axis/traits.hpp>
#include <boost/histogram/axis/variant.hpp>
@ -16,15 +17,40 @@
#include <boost/mp11/algorithm.hpp>
#include <boost/mp11/list.hpp>
#include <boost/mp11/tuple.hpp>
#include <boost/mp11/utility.hpp>
#include <boost/throw_exception.hpp>
#include <stdexcept>
#include <tuple>
#include <type_traits>
/* Most of the histogram code is generic and works for any number of axes. Buffers with a
* fixed maximum capacity are used in some places, which have a size equal to the rank of
* a histogram. The buffers are statically allocated to improve performance, which means
* that they need a preset maximum capacity. 32 seems like a safe upper limit for the rank
* (you can nevertheless increase it here if necessary): the simplest non-trivial axis has
* 2 bins; even if counters are used which need only a byte of storage per bin, this still
* corresponds to 4 GB of storage.
*/
#ifndef BOOST_HISTOGRAM_DETAIL_AXES_LIMIT
#define BOOST_HISTOGRAM_DETAIL_AXES_LIMIT 32
#endif
namespace boost {
namespace histogram {
namespace detail {
template <class T>
unsigned axes_rank(const T& axes) {
using std::begin;
using std::end;
return static_cast<unsigned>(std::distance(begin(axes), end(axes)));
}
template <class... Ts>
constexpr unsigned axes_rank(const std::tuple<Ts...>&) {
return static_cast<unsigned>(sizeof...(Ts));
}
template <unsigned N, class... Ts>
decltype(auto) axis_get(std::tuple<Ts...>& axes) {
return std::get<N>(axes);
@ -152,7 +178,7 @@ auto make_empty_dynamic_axes(const std::tuple<Ts...>&) {
template <typename T>
void axis_index_is_valid(const T& axes, const unsigned N) {
BOOST_ASSERT_MSG(N < get_size(axes), "index out of range");
BOOST_ASSERT_MSG(N < axes_rank(axes), "index out of range");
}
template <typename F, typename T>
@ -188,6 +214,44 @@ std::size_t bincount(const T& axes) {
return n;
}
template <class T>
using tuple_size_t = typename std::tuple_size<T>::type;
template <class T>
using buffer_size = mp11::mp_eval_or<
std::integral_constant<std::size_t, BOOST_HISTOGRAM_DETAIL_AXES_LIMIT>, tuple_size_t,
T>;
template <class T, std::size_t N>
class sub_array : public std::array<T, N> {
public:
explicit sub_array(std::size_t s) : size_(s) {
BOOST_ASSERT_MSG(size_ <= N, "requested size exceeds size of static buffer");
}
sub_array(std::size_t s, const T& value) : size_(s) {
BOOST_ASSERT_MSG(size_ <= N, "requested size exceeds size of static buffer");
std::array<T, N>::fill(value);
}
// need to override both versions of std::array
auto end() noexcept { return std::array<T, N>::begin() + size_; }
auto end() const noexcept { return std::array<T, N>::begin() + size_; }
auto size() const noexcept { return size_; }
private:
std::size_t size_;
};
template <class U, class T>
using stack_buffer = sub_array<U, buffer_size<T>::value>;
template <class U, class T, class... Ts>
auto make_stack_buffer(const T& t, const Ts&... ts) {
return stack_buffer<U, T>(axes_rank(t), ts...);
}
} // namespace detail
} // namespace histogram
} // namespace boost

View File

@ -35,18 +35,29 @@ using has_underflow =
decltype(axis::traits::static_options<T>::test(axis::option::underflow));
template <class T>
struct is_growing
: decltype(axis::traits::static_options<T>::test(axis::option::growth)) {};
template <class... Ts>
struct is_growing<std::tuple<Ts...>> : mp11::mp_or<is_growing<Ts>...> {};
template <class... Ts>
struct is_growing<axis::variant<Ts...>> : mp11::mp_or<is_growing<Ts>...> {};
using is_growing = decltype(axis::traits::static_options<T>::test(axis::option::growth));
template <class T>
using has_growing_axis =
mp11::mp_if<is_vector_like<T>, is_growing<mp11::mp_first<T>>, is_growing<T>>;
using is_multidim = is_tuple<std::decay_t<arg_type<decltype(&T::index)>>>;
template <template <class> class Trait, class T>
struct has_special_axis_impl : Trait<T> {};
template <template <class> class Trait, class... Ts>
struct has_special_axis_impl<Trait, std::tuple<Ts...>> : mp11::mp_or<Trait<Ts>...> {};
template <template <class> class Trait, class... Ts>
struct has_special_axis_impl<Trait, axis::variant<Ts...>> : mp11::mp_or<Trait<Ts>...> {};
template <template <class> class Trait, class T>
using has_special_axis =
has_special_axis_impl<Trait, mp11::mp_if<is_vector_like<T>, mp11::mp_first<T>, T>>;
template <class T>
using has_multidim_axis = has_special_axis<is_multidim, T>;
template <class T>
using has_growing_axis = has_special_axis<is_growing, T>;
/// Index with an invalid state
struct optional_index {
@ -56,53 +67,83 @@ struct optional_index {
std::size_t operator*() const { return idx; }
};
inline void linearize(optional_index& out, const axis::index_type extent,
const axis::index_type j) noexcept {
// j is internal index shifted by +1 if axis has underflow bin
out.idx += j * out.stride;
// set stride to 0, if j is invalid
out.stride *= (0 <= j && j < extent) * extent;
inline void linearize(std::false_type, std::false_type, optional_index& out,
const axis::index_type size, const axis::index_type i) noexcept {
out.idx += i * out.stride;
out.stride *= i >= 0 && i < size ? size : 0;
}
inline void linearize(std::false_type, std::true_type, optional_index& out,
const axis::index_type size, const axis::index_type i) noexcept {
out.idx += i * out.stride;
out.stride *= i >= 0 ? size + 1 : 0;
}
inline void linearize(std::true_type, std::false_type, optional_index& out,
const axis::index_type size, const axis::index_type i) noexcept {
// internal index must be shifted by +1 since axis has underflow bin
out.idx += (i + 1) * out.stride;
out.stride *= i < size ? size + 1 : 0;
}
inline void linearize(std::true_type, std::true_type, optional_index& out,
const axis::index_type size, const axis::index_type i) noexcept {
// internal index must be shifted by +1 since axis has underflow bin
out.idx += (i + 1) * out.stride;
out.stride *= size + 2;
}
// for non-growing axis
template <class Axis, class Value>
void linearize_value(optional_index& o, const Axis& a, const Value& v) {
using B = decltype(axis::traits::static_options<Axis>::test(axis::option::underflow));
const auto j = axis::traits::index(a, v) + B::value;
linearize(o, axis::traits::extent(a), j);
using O = axis::traits::static_options<Axis>;
using V = detail::remove_cvref_t<detail::arg_type<decltype(&Axis::index)>>;
linearize(O::test(axis::option::underflow), O::test(axis::option::overflow), o,
a.size(), a.index(static_cast<V>(v)));
}
// for variant that does not contain any growing axis
template <class... Ts, class Value>
void linearize_value(optional_index& o, const axis::variant<Ts...>& a, const Value& v) {
axis::visit([&o, &v](const auto& a) { linearize_value(o, a, v); }, a);
axis::visit(
[&o, &v](const auto& a) {
using A = std::decay_t<decltype(a)>;
using O = axis::traits::static_options<A>;
// axis::traits::index uses try_cast which is needed here
linearize(O::test(axis::option::underflow), O::test(axis::option::overflow), o,
a.size(), axis::traits::index(a, v));
},
a);
}
// for growing axis
template <class Axis, class Value>
bool linearize_value(optional_index& o, axis::index_type& s, Axis& a, const Value& v) {
axis::index_type j;
std::tie(j, s) = axis::traits::update(a, v);
j += has_underflow<Axis>::value;
linearize(o, axis::traits::extent(a), j);
bool linearize_with_growth_value(optional_index& o, axis::index_type& s, Axis& a,
const Value& v) {
using O = axis::traits::static_options<Axis>;
axis::index_type i;
std::tie(i, s) = axis::traits::update(a, v);
linearize(O::test(axis::option::underflow), O::test(axis::option::overflow), o,
a.size(), i);
return s != 0;
}
// for variant which contains at least one growing axis
template <class... Ts, class Value>
bool linearize_value(optional_index& o, axis::index_type& s, axis::variant<Ts...>& a,
const Value& v) {
axis::visit([&o, &s, &v](auto&& a) { linearize_value(o, s, a, v); }, a);
bool linearize_with_growth_value(optional_index& o, axis::index_type& s,
axis::variant<Ts...>& a, const Value& v) {
axis::visit([&o, &s, &v](auto&& a) { linearize_with_growth_value(o, s, a, v); }, a);
return s != 0;
}
template <class A>
void linearize_index(optional_index& out, const A& axis, const axis::index_type j) {
void linearize_index(optional_index& out, const A& axis, const axis::index_type i) {
// A may be axis or variant, cannot use static option detection here
const auto opt = axis::traits::options(axis);
const auto shift = opt & axis::option::underflow ? 1 : 0;
const auto n = axis.size() + (opt & axis::option::overflow ? 1 : 0);
linearize(out, n + shift, j + shift);
const auto extent = axis.size() + (opt & axis::option::overflow ? 1 : 0) + shift;
// i may be arbitrarily out of range
linearize(std::false_type{}, std::false_type{}, out, extent, i + shift);
}
template <class S, class A>
@ -120,8 +161,8 @@ void grow_storage(const A& axes, S& storage, const axis::index_type* shifts) {
s *= n;
});
auto new_storage = make_default(storage);
new_storage.reset(detail::bincount(axes));
const auto dlast = data + get_size(axes) - 1;
new_storage.reset(bincount(axes));
const auto dlast = data + axes_rank(axes) - 1;
for (const auto& x : storage) {
auto ns = new_storage.begin();
sit = shifts;
@ -166,6 +207,30 @@ void grow_storage(const A& axes, S& storage, const axis::index_type* shifts) {
storage = std::move(new_storage);
}
// histogram has no growing and no multidim axis, axis rank known at compile-time
template <class S, class... As, class... Us>
optional_index index(std::false_type, std::false_type, const std::tuple<As...>& axes, S&,
const std::tuple<Us...>& args) {
optional_index idx;
static_assert(sizeof...(As) == sizeof...(Us), "number of arguments != histogram rank");
mp11::mp_for_each<mp11::mp_iota_c<sizeof...(As)>>(
[&](auto i) { linearize_value(idx, axis_get<i>(axes), std::get<i>(args)); });
return idx;
}
// histogram has no growing and no multidim axis, axis rank known at run-time
template <class A, class S, class U>
optional_index index(std::false_type, std::false_type, const A& axes, S&, const U& args) {
constexpr auto nargs = static_cast<unsigned>(std::tuple_size<U>::value);
optional_index idx;
if (axes_rank(axes) != nargs)
BOOST_THROW_EXCEPTION(std::invalid_argument("number of arguments != histogram rank"));
constexpr auto nbuf = buffer_size<A>::value;
mp11::mp_for_each<mp11::mp_iota_c<(nargs < nbuf ? nargs : nbuf)>>(
[&](auto i) { linearize_value(idx, axis_get<i>(axes), std::get<i>(args)); });
return idx;
}
// special case: if histogram::operator()(tuple(1, 2)) is called on 1d histogram
// with axis that accepts 2d tuple, this should not fail
// - solution is to forward tuples of size > 1 directly to axis for 1d
@ -175,43 +240,60 @@ void grow_storage(const A& axes, S& storage, const axis::index_type* shifts) {
// (axis::variant provides generic call interface and hides concrete
// interface), so we throw at runtime if incompatible argument is passed (e.g.
// 3d tuple)
// histogram has only non-growing axes
template <unsigned I, unsigned N, class T, class S, class U>
optional_index to_index(std::false_type, const T& axes, S&, const U& args) {
// histogram has no growing multidim axis, axis rank == 1 known at compile-time
template <class S, class A, class U>
optional_index index(std::false_type, std::true_type, const std::tuple<A>& axes, S&,
const U& args) {
optional_index idx;
const auto rank = get_size(axes);
if (rank == 1 && N > 1)
linearize_value(idx, axis_get<0>(axes), tuple_slice<I, N>(args));
linearize_value(idx, axis_get<0>(axes), args);
return idx;
}
// histogram has no growing multidim axis, axis rank > 1 known at compile-time
template <class S, class U, class A0, class A1, class... As>
optional_index index(std::false_type, std::true_type,
const std::tuple<A0, A1, As...>& axes, S& s, const U& args) {
return index(std::false_type{}, std::false_type{}, axes, s, args);
}
// histogram has no growing multidim axis, axis rank known at run-time
template <class A, class S, class U>
optional_index index(std::false_type, std::true_type, const A& axes, S&, const U& args) {
optional_index idx;
const auto rank = axes_rank(axes);
constexpr auto nargs = static_cast<unsigned>(std::tuple_size<U>::value);
if (rank == 1 && nargs > 1)
linearize_value(idx, axis_get<0>(axes), args);
else {
if (rank != N)
if (rank != nargs)
BOOST_THROW_EXCEPTION(
std::invalid_argument("number of arguments != histogram rank"));
constexpr unsigned M = buffer_size<remove_cvref_t<decltype(axes)>>::value;
mp11::mp_for_each<mp11::mp_iota_c<(N < M ? N : M)>>([&](auto J) {
linearize_value(idx, axis_get<J>(axes), std::get<(J + I)>(args));
});
constexpr auto nbuf = buffer_size<A>::value;
mp11::mp_for_each<mp11::mp_iota_c<(nargs < nbuf ? nargs : nbuf)>>(
[&](auto i) { linearize_value(idx, axis_get<i>(axes), std::get<i>(args)); });
}
return idx;
}
// histogram has growing axes
template <unsigned I, unsigned N, class T, class S, class U>
optional_index to_index(std::true_type, T& axes, S& storage, const U& args) {
// histogram has growing axis
template <class B, class T, class S, class U>
optional_index index(std::true_type, B, T& axes, S& storage, const U& args) {
optional_index idx;
constexpr unsigned M = buffer_size<T>::value;
axis::index_type shifts[M];
const auto rank = get_size(axes);
constexpr unsigned nbuf = buffer_size<T>::value;
axis::index_type shifts[nbuf];
const auto rank = axes_rank(axes);
constexpr auto nargs = static_cast<unsigned>(std::tuple_size<U>::value);
bool update_needed = false;
if (rank == 1 && N > 1)
update_needed =
linearize_value(idx, shifts[0], axis_get<0>(axes), tuple_slice<I, N>(args));
if (rank == 1 && nargs > 1)
update_needed = linearize_with_growth_value(idx, shifts[0], axis_get<0>(axes), args);
else {
if (rank != N)
if (rank != nargs)
BOOST_THROW_EXCEPTION(
std::invalid_argument("number of arguments != histogram rank"));
mp11::mp_for_each<mp11::mp_iota_c<(N < M ? N : M)>>([&](auto J) {
update_needed |=
linearize_value(idx, shifts[J], axis_get<J>(axes), std::get<(J + I)>(args));
mp11::mp_for_each<mp11::mp_iota_c<(nargs < nbuf ? nargs : nbuf)>>([&](auto i) {
update_needed |= linearize_with_growth_value(idx, shifts[i], axis_get<i>(axes),
std::get<i>(args));
});
}
@ -219,15 +301,15 @@ optional_index to_index(std::true_type, T& axes, S& storage, const U& args) {
return idx;
}
template <typename U>
constexpr auto weight_sample_indices() {
template <class U>
constexpr auto weight_sample_indices() noexcept {
if (is_weight<U>::value) return std::make_pair(0, -1);
if (is_sample<U>::value) return std::make_pair(-1, 0);
return std::make_pair(-1, -1);
}
template <typename U0, typename U1, typename... Us>
constexpr auto weight_sample_indices() {
template <class U0, class U1, class... Us>
constexpr auto weight_sample_indices() noexcept {
using L = mp11::mp_list<U0, U1, Us...>;
const int n = sizeof...(Us) + 1;
if (is_weight<mp11::mp_at_c<L, 0>>::value) {
@ -253,77 +335,72 @@ constexpr auto weight_sample_indices() {
return std::make_pair(-1, -1);
}
template <class T>
void fill_impl2(std::true_type, T&& t) {
template <class T, class U>
void fill_impl(mp11::mp_int<-1>, mp11::mp_int<-1>, std::false_type, T&& t,
const U&) noexcept {
t();
}
template <class T, class U>
void fill_impl(mp11::mp_int<-1>, mp11::mp_int<-1>, std::true_type, T&& t,
const U&) noexcept {
++t;
}
template <class T, class U>
void fill_impl2(std::true_type, T&& t, const U& u) {
t += u;
}
template <class T, class... Us>
void fill_impl2(std::false_type, T&& t, const Us&... us) {
t(us...);
}
template <class T, class U>
void fill_impl1(mp11::mp_int<-1>, mp11::mp_int<-1>, T&& t, const U&) {
fill_impl2(has_operator_preincrement<remove_cvref_t<T>>{}, t);
template <class IW, class T, class U>
void fill_impl(IW, mp11::mp_int<-1>, std::false_type, T&& t, const U& u) noexcept {
t(std::get<IW::value>(u).value);
}
template <class IW, class T, class U>
void fill_impl1(IW, mp11::mp_int<-1>, T&& t, const U& u) {
fill_impl2(has_operator_preincrement<remove_cvref_t<T>>{}, t,
std::get<IW::value>(u).value);
void fill_impl(IW, mp11::mp_int<-1>, std::true_type, T&& t, const U& u) noexcept {
t += std::get<IW::value>(u).value;
}
template <class IS, class T, class U>
void fill_impl1(mp11::mp_int<-1>, IS, T&& t, const U& u) {
void fill_impl(mp11::mp_int<-1>, IS, std::false_type, T&& t, const U& u) noexcept {
mp11::tuple_apply([&t](auto&&... args) { t(args...); }, std::get<IS::value>(u).value);
}
template <class IW, class IS, class T, class U>
void fill_impl1(IW, IS, T&& t, const U& u) {
void fill_impl(IW, IS, std::false_type, T&& t, const U& u) noexcept {
mp11::tuple_apply([&](auto&&... args2) { t(std::get<IW::value>(u).value, args2...); },
std::get<IS::value>(u).value);
}
template <class A, class SM, class... Us>
typename SM::first_type::iterator fill(A& axes, SM& sm, const std::tuple<Us...>& tus) {
template <class A, class S, class... Us>
typename S::iterator fill(A& axes, S& storage, const std::tuple<Us...>& tus) {
constexpr auto iws = weight_sample_indices<Us...>();
constexpr unsigned n = sizeof...(Us) - (iws.first > -1) - (iws.second > -1);
constexpr unsigned i = (iws.first == 0 || iws.second == 0)
? (iws.first == 1 || iws.second == 1 ? 2 : 1)
: 0;
std::lock_guard<typename SM::second_type> lk{sm.second()};
auto& storage = sm.first();
optional_index idx = to_index<i, n>(has_growing_axis<A>(), axes, storage, tus);
const auto idx = index(has_growing_axis<A>{}, has_multidim_axis<A>{}, axes, storage,
tuple_slice<i, n>(tus));
if (idx) {
using mp11::mp_int;
fill_impl1(mp_int<iws.first>{}, mp_int<iws.second>{}, storage[*idx], tus);
fill_impl(mp11::mp_int<iws.first>{}, mp11::mp_int<iws.second>{},
has_operator_preincrement<typename S::value_type>{}, storage[*idx], tus);
return storage.begin() + *idx;
}
return storage.end();
}
template <typename A, typename... Us>
template <class A, class... Us>
optional_index at(const A& axes, const std::tuple<Us...>& args) {
if (get_size(axes) != sizeof...(Us))
if (axes_rank(axes) != sizeof...(Us))
BOOST_THROW_EXCEPTION(std::invalid_argument("number of arguments != histogram rank"));
optional_index idx;
mp11::mp_for_each<mp11::mp_iota_c<sizeof...(Us)>>([&](auto I) {
mp11::mp_for_each<mp11::mp_iota_c<sizeof...(Us)>>([&](auto i) {
// axes_get works with static and dynamic axes
linearize_index(idx, axis_get<I>(axes),
static_cast<axis::index_type>(std::get<I>(args)));
linearize_index(idx, axis_get<i>(axes),
static_cast<axis::index_type>(std::get<i>(args)));
});
return idx;
}
template <typename A, typename U>
template <class A, class U>
optional_index at(const A& axes, const U& args) {
if (get_size(axes) != get_size(args))
if (axes_rank(axes) != axes_rank(args))
BOOST_THROW_EXCEPTION(std::invalid_argument("number of arguments != histogram rank"));
optional_index idx;
using std::begin;

View File

@ -7,18 +7,6 @@
#ifndef BOOST_HISTOGRAM_DETAIL_META_HPP
#define BOOST_HISTOGRAM_DETAIL_META_HPP
/* Most of the histogram code is generic and works for any number of axes. Buffers with a
* fixed maximum capacity are used in some places, which have a size equal to the rank of
* a histogram. The buffers are statically allocated to improve performance, which means
* that they need a preset maximum capacity. 32 seems like a safe upper limit for the rank
* (you can nevertheless increase it here if necessary): the simplest non-trivial axis has
* 2 bins; even if counters are used which need only a byte of storage per bin, this still
* corresponds to 4 GB of storage.
*/
#ifndef BOOST_HISTOGRAM_DETAIL_AXES_LIMIT
#define BOOST_HISTOGRAM_DETAIL_AXES_LIMIT 32
#endif
#include <boost/config/workaround.hpp>
#if BOOST_WORKAROUND(BOOST_GCC, >= 60000)
#pragma GCC diagnostic push
@ -113,16 +101,16 @@ constexpr float highest() {
return std::numeric_limits<float>::infinity();
}
template <std::size_t I, class T, std::size_t... N>
decltype(auto) tuple_slice_impl(T&& t, mp11::index_sequence<N...>) {
return std::forward_as_tuple(std::get<(N + I)>(std::forward<T>(t))...);
template <std::size_t I, class T, std::size_t... K>
decltype(auto) tuple_slice_impl(T&& t, mp11::index_sequence<K...>) {
return std::forward_as_tuple(std::get<(I + K)>(std::forward<T>(t))...);
}
template <std::size_t I, std::size_t N, class T>
decltype(auto) tuple_slice(T&& t) {
static_assert(I + N <= mp11::mp_size<remove_cvref_t<T>>::value,
"I and N must describe a slice");
return tuple_slice_impl<I>(std::forward<T>(t), mp11::make_index_sequence<N>{});
template <std::size_t I, std::size_t N, class Tuple>
decltype(auto) tuple_slice(Tuple&& t) {
constexpr auto S = std::tuple_size<std::decay_t<Tuple>>::value;
static_assert(I + N <= S, "I and N must describe a slice");
return tuple_slice_impl<I>(std::forward<Tuple>(t), mp11::make_index_sequence<N>{});
}
#define BOOST_HISTOGRAM_DETECT(name, cond) \
@ -316,55 +304,6 @@ auto make_default(const T& t) {
[](const auto&) { return T{}; }, t);
}
template <class T>
using tuple_size_t = typename std::tuple_size<T>::type;
template <class T>
constexpr std::size_t get_size_impl(std::true_type, const T&) noexcept {
return std::tuple_size<T>::value;
}
template <class T>
std::size_t get_size_impl(std::false_type, const T& t) noexcept {
using std::begin;
using std::end;
return static_cast<std::size_t>(std::distance(begin(t), end(t)));
}
template <class T>
std::size_t get_size(const T& t) noexcept {
return get_size_impl(mp11::mp_valid<tuple_size_t, T>(), t);
}
template <class T>
using buffer_size = mp11::mp_eval_or<
std::integral_constant<std::size_t, BOOST_HISTOGRAM_DETAIL_AXES_LIMIT>, tuple_size_t,
T>;
template <class T, std::size_t N>
class sub_array : public std::array<T, N> {
public:
explicit sub_array(std::size_t s) : size_(s) {}
sub_array(std::size_t s, T value) : size_(s) { std::array<T, N>::fill(value); }
// need to override both versions of std::array
auto end() noexcept { return std::array<T, N>::begin() + size_; }
auto end() const noexcept { return std::array<T, N>::begin() + size_; }
auto size() const noexcept { return size_; }
private:
std::size_t size_ = N;
};
template <class U, class T>
using stack_buffer = sub_array<U, buffer_size<T>::value>;
template <class U, class T, class... Ts>
auto make_stack_buffer(const T& t, Ts&&... ts) {
return stack_buffer<U, T>(get_size(t), std::forward<Ts>(ts)...);
}
template <class T>
constexpr bool relaxed_equal(const T& a, const T& b) noexcept {
return static_if<has_operator_equal<T>>(

View File

@ -119,9 +119,7 @@ public:
explicit histogram(A&& a) : histogram(std::forward<A>(a), storage_type()) {}
/// Number of axes (dimensions).
constexpr unsigned rank() const noexcept {
return static_cast<unsigned>(detail::get_size(axes_));
}
constexpr unsigned rank() const noexcept { return detail::axes_rank(axes_); }
/// Total number of bins (including underflow/overflow).
std::size_t size() const noexcept { return storage_and_mutex_.first().size(); }
@ -179,13 +177,14 @@ public:
*/
template <class... Ts>
iterator operator()(const Ts&... ts) {
return operator()(std::make_tuple(ts...));
return operator()(std::forward_as_tuple(ts...));
}
/// Fill histogram with values, an optional weight, and/or a sample from a `std::tuple`.
template <class... Ts>
iterator operator()(const std::tuple<Ts...>& t) {
return detail::fill(axes_, storage_and_mutex_, t);
std::lock_guard<mutex_type> guard{storage_and_mutex_.second()};
return detail::fill(axes_, storage_and_mutex_.first(), t);
}
/** Access cell value at integral indices.
@ -201,18 +200,18 @@ public:
*/
template <class... Indices>
decltype(auto) at(axis::index_type i, Indices... is) {
return at(std::make_tuple(i, is...));
return at(std::forward_as_tuple(i, is...));
}
/// Access cell value at integral indices (read-only).
template <class... Indices>
decltype(auto) at(axis::index_type i, Indices... is) const {
return at(std::make_tuple(i, is...));
return at(std::forward_as_tuple(i, is...));
}
/// Access cell value at integral indices stored in `std::tuple`.
template <typename... Indices>
decltype(auto) at(const std::tuple<axis::index_type, Indices...>& is) {
decltype(auto) at(const std::tuple<Indices...>& is) {
const auto idx = detail::at(axes_, is);
if (!idx)
BOOST_THROW_EXCEPTION(std::out_of_range("at least one index out of bounds"));
@ -221,7 +220,7 @@ public:
/// Access cell value at integral indices stored in `std::tuple` (read-only).
template <typename... Indices>
decltype(auto) at(const std::tuple<axis::index_type, Indices...>& is) const {
decltype(auto) at(const std::tuple<Indices...>& is) const {
const auto idx = detail::at(axes_, is);
if (!idx)
BOOST_THROW_EXCEPTION(std::out_of_range("at least one index out of bounds"));

View File

@ -21,12 +21,12 @@ int main() {
struct growing {
auto update(int) { return std::make_pair(0, 0); }
};
using G = growing;
using T = growing;
using I = axis::integer<>;
using A = std::tuple<I, G>;
using B = std::vector<G>;
using C = std::vector<axis::variant<I, G>>;
using A = std::tuple<I, T>;
using B = std::vector<T>;
using C = std::vector<axis::variant<I, T>>;
using D = std::tuple<I>;
using E = std::vector<I>;
using F = std::vector<axis::variant<I>>;
@ -38,5 +38,27 @@ int main() {
BOOST_TEST_TRAIT_FALSE((detail::has_growing_axis<E>));
BOOST_TEST_TRAIT_FALSE((detail::has_growing_axis<F>));
}
{
struct multidim {
auto index(const std::tuple<int>&) { return 0; }
};
using T = multidim;
using I = axis::integer<>;
using A = std::tuple<I, T>;
using B = std::vector<T>;
using C = std::vector<axis::variant<I, T>>;
using D = std::tuple<I>;
using E = std::vector<I>;
using F = std::vector<axis::variant<I>>;
BOOST_TEST_TRAIT_TRUE((detail::has_multidim_axis<A>));
BOOST_TEST_TRAIT_TRUE((detail::has_multidim_axis<B>));
BOOST_TEST_TRAIT_TRUE((detail::has_multidim_axis<C>));
BOOST_TEST_TRAIT_FALSE((detail::has_multidim_axis<D>));
BOOST_TEST_TRAIT_FALSE((detail::has_multidim_axis<E>));
BOOST_TEST_TRAIT_FALSE((detail::has_multidim_axis<F>));
}
return boost::report_errors();
}

View File

@ -397,17 +397,5 @@ int main() {
BOOST_TEST_TRAIT_TRUE((has_operator_radd<B&, const B&>));
}
// get_size
{
std::tuple<int, int> a;
std::vector<int> b(3);
std::array<int, 4> c;
const std::tuple<int> d;
BOOST_TEST_EQ(get_size(a), 2);
BOOST_TEST_EQ(get_size(b), 3);
BOOST_TEST_EQ(get_size(c), 4);
BOOST_TEST_EQ(get_size(d), 1);
}
return boost::report_errors();
}

View File

@ -131,6 +131,18 @@ int main() {
BOOST_TEST(detail::axes_equal(t2, t3));
}
// axes_rank
{
std::tuple<int, int> a;
std::vector<int> b(3);
std::array<int, 4> c;
const std::tuple<int> d;
BOOST_TEST_EQ(detail::axes_rank(a), 2);
BOOST_TEST_EQ(detail::axes_rank(b), 3);
BOOST_TEST_EQ(detail::axes_rank(c), 4);
BOOST_TEST_EQ(detail::axes_rank(d), 1);
}
// bincount overflow
{
auto v = std::vector<axis::integer<>>(

View File

@ -63,15 +63,20 @@ int main() {
BOOST_TEST_THROWS(c(0), std::invalid_argument);
auto d = make(dynamic_tag(), a);
BOOST_TEST_THROWS(d(std::string()), std::invalid_argument);
struct axis2d {
auto index(const std::tuple<double, double>& x) const {
return axis::index_type{std::get<0>(x) == 1 && std::get<1>(x) == 2};
}
auto size() const { return axis::index_type{2}; }
} e;
auto f = make(dynamic_tag(), a, e);
BOOST_TEST_THROWS(f(0, 0, 0), std::invalid_argument);
BOOST_TEST_THROWS(f(0, std::make_tuple(0, 0), 1), std::invalid_argument);
}
// axis methods
{
auto c = make(dynamic_tag(), axis::category<std::string>({"A", "B"}));
BOOST_TEST_THROWS(c.axis().value(0), std::runtime_error);
}
// wrong dimension
// bad at
{
auto h1 = make(dynamic_tag(), axis::integer<>(0, 2));
h1(1);
@ -79,6 +84,12 @@ int main() {
BOOST_TEST_THROWS(h1.at(std::make_tuple(0, 0)), std::invalid_argument);
}
// incompatible axis variant methods
{
auto c = make(dynamic_tag(), axis::category<std::string>({"A", "B"}));
BOOST_TEST_THROWS(c.axis().value(0), std::runtime_error);
}
{
auto h = make_histogram(std::vector<axis::integer<>>(1, axis::integer<>(0, 3)));
h(0);

View File

@ -476,7 +476,7 @@ void run_tests() {
};
struct axis2d {
auto index(std::tuple<double, double> x) const {
auto index(const std::tuple<double, double>& x) const {
return axis::index_type{std::get<0>(x) == 1 && std::get<1>(x) == 2};
}
auto size() const { return axis::index_type{2}; }
@ -533,8 +533,6 @@ void run_tests() {
auto h1 = make(Tag(), axis::integer<>(0, 2));
h1(std::make_tuple(0)); // as if one had passed 0 directly
BOOST_TEST_EQ(h1.at(std::make_tuple(0)), 1); // as if one had passed 0 directly
// passing 2d tuple is an invalid argument
BOOST_TEST_THROWS(h1(std::make_tuple(0, 0)), std::invalid_argument);
struct axis2d {
auto index(std::tuple<int, int> x) const {

View File

@ -7,10 +7,11 @@
#ifndef BOOST_HISTOGRAM_TEST_UTILITY_HISTOGRAM_HPP
#define BOOST_HISTOGRAM_TEST_UTILITY_HISTOGRAM_HPP
#include <boost/histogram/axis.hpp>
#include <boost/histogram/axis/ostream.hpp>
#include <boost/histogram/axis/variant.hpp>
#include <boost/histogram/make_histogram.hpp>
#include <boost/histogram/ostream.hpp>
#include <boost/mp11/algorithm.hpp>
#include <type_traits>
#include <vector>
@ -18,32 +19,32 @@ namespace boost {
namespace histogram {
template <typename... Ts>
auto make_axis_vector(Ts&&... ts) {
using T = axis::variant<detail::remove_cvref_t<Ts>...>;
return std::vector<T>({T(std::forward<Ts>(ts))...});
auto make_axis_vector(const Ts&... ts) {
using Var = boost::mp11::mp_unique<axis::variant<Ts...>>;
return std::vector<Var>({Var(ts)...});
}
using static_tag = std::false_type;
using dynamic_tag = std::true_type;
template <typename... Axes>
auto make(static_tag, Axes&&... axes) {
return make_histogram(std::forward<Axes>(axes)...);
auto make(static_tag, const Axes&... axes) {
return make_histogram(axes...);
}
template <typename S, typename... Axes>
auto make_s(static_tag, S&& s, Axes&&... axes) {
return make_histogram_with(s, std::forward<Axes>(axes)...);
auto make_s(static_tag, S&& s, const Axes&... axes) {
return make_histogram_with(s, axes...);
}
template <typename... Axes>
auto make(dynamic_tag, Axes&&... axes) {
return make_histogram(make_axis_vector(std::forward<Axes>(axes)...));
auto make(dynamic_tag, const Axes&... axes) {
return make_histogram(make_axis_vector(axes...));
}
template <typename S, typename... Axes>
auto make_s(dynamic_tag, S&& s, Axes&&... axes) {
return make_histogram_with(s, make_axis_vector(std::forward<Axes>(axes)...));
auto make_s(dynamic_tag, S&& s, const Axes&... axes) {
return make_histogram_with(s, make_axis_vector(axes...));
}
} // namespace histogram