Feature/bulk visit (#217)

This commit is contained in:
joaquintides 2023-10-11 12:50:28 +02:00 committed by GitHub
parent ef0b3a0cd8
commit 8ee48fe909
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
54 changed files with 507 additions and 3 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -447,6 +447,10 @@ operations follow a https://en.wikipedia.org/wiki/Zipf%27s_law#Formal_definition
with different _skew_ parameters: the higher the skew, the more concentrated are the keys in the lower values
of the covered range.
`boost::concurrent_flat_map` is exercised using both regular and xref:#concurrent_bulk_visitation[bulk visitation]:
in the latter case, lookup keys are buffered in a local array and then processed at
once each time the buffer reaches xref:#concurrent_flat_map_constants[`bulk_visit_size`].
=== GCC 12, x64

View File

@ -13,6 +13,7 @@
with serial and parallel variants.
* Added efficient move construction of `boost::unordered_flat_(map|set)` from
`boost::concurrent_flat_(map|set)` and vice versa.
* Added bulk visitation to concurrent containers for increased lookup performance.
* Added debug-mode mechanisms for detecting illegal reentrancies into
a concurrent container from user code.
* Added Boost.Serialization support to all containers and their (non-local) iterator types.

View File

@ -194,6 +194,55 @@ may be inserted, modified or erased by other threads during visitation. It is
advisable not to assume too much about the exact global state of a concurrent container
at any point in your program.
== Bulk visitation
Suppose you have an `std::array` of keys you want to look up for in a concurrent map:
[source,c++]
----
std::array<int, N> keys;
...
for(const auto& key: keys) {
m.visit(key, [](auto& x) { ++x.second; });
}
----
_Bulk visitation_ allows us to pass all the keys in one operation:
[source,c++]
----
m.visit(keys.begin(), keys.end(), [](auto& x) { ++x.second; });
----
This functionality is not provided for mere syntactic convenience, though: by processing all the
keys at once, some internal optimizations can be applied that increase
performance over the regular, one-at-a-time case (consult the
xref:#benchmarks_boostconcurrent_flat_map[benchmarks]). In fact, it may be beneficial
to buffer incoming keys so that they can be bulk visited in chunks:
[source,c++]
----
static constexpr auto bulk_visit_size = boost::concurrent_flat_map<int,int>::bulk_visit_size;
std::array<int, bulk_visit_size> buffer;
std::size_t i=0;
while(...) { // processing loop
...
buffer[i++] = k;
if(i == bulk_visit_size) {
map.visit(buffer.begin(), buffer.end(), [](auto& x) { ++x.second; });
i = 0;
}
...
}
// flush remaining keys
map.visit(buffer.begin(), buffer.begin() + i, [](auto& x) { ++x.second; });
----
There's a latency/throughput tradeoff here: it will take longer for incoming keys to
be processed (since they are buffered), but the number of processed keys per second
is higher. `bulk_visit_size` is the recommended chunk size —smaller buffers
may yield worse performance.
== Blocking Operations
``boost::concurrent_flat_set``s and ``boost::concurrent_flat_map``s can be copied, assigned, cleared and merged just like any

View File

@ -50,6 +50,9 @@ namespace boost {
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
// constants
static constexpr size_type xref:#concurrent_flat_map_constants[bulk_visit_size] = _implementation-defined_;
// construct/copy/destroy
xref:#concurrent_flat_map_default_constructor[concurrent_flat_map]();
explicit xref:#concurrent_flat_map_bucket_count_constructor[concurrent_flat_map](size_type n,
@ -106,6 +109,13 @@ namespace boost {
template<class K, class F> size_t xref:#concurrent_flat_map_cvisit[visit](const K& k, F f) const;
template<class K, class F> size_t xref:#concurrent_flat_map_cvisit[cvisit](const K& k, F f) const;
template<class FwdIterator, class F>
size_t xref:concurrent_flat_map_bulk_visit[visit](FwdIterator first, FwdIterator last, F f);
template<class FwdIterator, class F>
size_t xref:concurrent_flat_map_bulk_visit[visit](FwdIterator first, FwdIterator last, F f) const;
template<class FwdIterator, class F>
size_t xref:concurrent_flat_map_bulk_visit[cvisit](FwdIterator first, FwdIterator last, F f) const;
template<class F> size_t xref:#concurrent_flat_map_cvisit_all[visit_all](F f);
template<class F> size_t xref:#concurrent_flat_map_cvisit_all[visit_all](F f) const;
template<class F> size_t xref:#concurrent_flat_map_cvisit_all[cvisit_all](F f) const;
@ -386,6 +396,13 @@ a function visiting elements of `m`) are detected and signalled through `BOOST_A
When run-time speed is a concern, the feature can be disabled by globally defining
this macro.
=== Constants
```cpp
static constexpr size_type bulk_visit_size;
```
Chunk size internally used in xref:concurrent_flat_map_bulk_visit[bulk visit] operations.
=== Constructors
@ -722,6 +739,42 @@ Notes:;; The `template<class K, class F>` overloads only participate in overload
---
==== Bulk visit
```c++
template<class FwdIterator, class F>
size_t visit(FwdIterator first, FwdIterator last, F f);
template<class FwdIterator, class F>
size_t visit(FwdIterator first, FwdIterator last, F f) const;
template<class FwdIterator, class F>
size_t cvisit(FwdIterator first, FwdIterator last, F f) const;
```
For each element `k` in the range [`first`, `last`),
if there is an element `x` in the container with key equivalent to `k`,
invokes `f` with a reference to `x`.
Such reference is const iff `*this` is const.
Although functionally equivalent to individually invoking
xref:concurrent_flat_map_cvisit[`[c\]visit`] for each key, bulk visitation
performs generally faster due to internal streamlining optimizations.
It is advisable that `std::distance(first,last)` be at least
xref:#concurrent_flat_map_constants[`bulk_visit_size`] to enjoy
a performance gain: beyond this size, performance is not expected
to increase further.
[horizontal]
Requires:;; `FwdIterator` is a https://en.cppreference.com/w/cpp/named_req/ForwardIterator[LegacyForwardIterator^]
({cpp}11 to {cpp}17),
or satisfies https://en.cppreference.com/w/cpp/iterator/forward_iterator[std::forward_iterator^] ({cpp}20 and later).
For `K` = `std::iterator_traits<FwdIterator>::value_type`, either `K` is `key_type` or
else `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs.
In the latter case, the library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent.
This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
Returns:;; The number of elements visited.
---
==== [c]visit_all
```c++

View File

@ -45,6 +45,9 @@ namespace boost {
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
// constants
static constexpr size_type xref:#concurrent_flat_set_constants[bulk_visit_size] = _implementation-defined_;
// construct/copy/destroy
xref:#concurrent_flat_set_default_constructor[concurrent_flat_set]();
explicit xref:#concurrent_flat_set_bucket_count_constructor[concurrent_flat_set](size_type n,
@ -98,6 +101,11 @@ namespace boost {
template<class K, class F> size_t xref:#concurrent_flat_set_cvisit[visit](const K& k, F f) const;
template<class K, class F> size_t xref:#concurrent_flat_set_cvisit[cvisit](const K& k, F f) const;
template<class FwdIterator, class F>
size_t xref:concurrent_flat_set_bulk_visit[visit](FwdIterator first, FwdIterator last, F f) const;
template<class FwdIterator, class F>
size_t xref:concurrent_flat_set_bulk_visit[cvisit](FwdIterator first, FwdIterator last, F f) const;
template<class F> size_t xref:#concurrent_flat_set_cvisit_all[visit_all](F f) const;
template<class F> size_t xref:#concurrent_flat_set_cvisit_all[cvisit_all](F f) const;
template<class ExecutionPolicy, class F>
@ -340,6 +348,13 @@ a function visiting elements of `m`) are detected and signalled through `BOOST_A
When run-time speed is a concern, the feature can be disabled by globally defining
this macro.
=== Constants
```cpp
static constexpr size_type bulk_visit_size;
```
Chunk size internally used in xref:concurrent_flat_set_bulk_visit[bulk visit] operations.
=== Constructors
@ -672,6 +687,39 @@ Notes:;; The `template<class K, class F>` overloads only participate in overload
---
==== Bulk visit
```c++
template<class FwdIterator, class F>
size_t visit(FwdIterator first, FwdIterator last, F f) const;
template<class FwdIterator, class F>
size_t cvisit(FwdIterator first, FwdIterator last, F f) const;
```
For each element `k` in the range [`first`, `last`),
if there is an element `x` in the container with key equivalent to `k`,
invokes `f` with a const reference to `x`.
Although functionally equivalent to individually invoking
xref:concurrent_flat_set_cvisit[`[c\]visit`] for each key, bulk visitation
performs generally faster due to internal streamlining optimizations.
It is advisable that `std::distance(first,last)` be at least
xref:#concurrent_flat_set_constants[`bulk_visit_size`] to enjoy
a performance gain: beyond this size, performance is not expected
to increase further.
[horizontal]
Requires:;; `FwdIterator` is a https://en.cppreference.com/w/cpp/named_req/ForwardIterator[LegacyForwardIterator^]
({cpp}11 to {cpp}17),
or satisfies https://en.cppreference.com/w/cpp/iterator/forward_iterator[std::forward_iterator^] ({cpp}20 and later).
For `K` = `std::iterator_traits<FwdIterator>::value_type`, either `K` is `key_type` or
else `Hash::is_transparent` and `Pred::is_transparent` are valid member typedefs.
In the latter case, the library assumes that `Hash` is callable with both `K` and `Key` and that `Pred` is transparent.
This enables heterogeneous lookup which avoids the cost of instantiating an instance of the `Key` type.
Returns:;; The number of elements visited.
---
==== [c]visit_all
```c++

View File

@ -1,6 +1,7 @@
/* Fast open-addressing concurrent hashmap.
*
* Copyright 2023 Christian Mazakas.
* Copyright 2023 Joaquin M Lopez Munoz.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
@ -72,6 +73,7 @@ namespace boost {
using pointer = typename boost::allocator_pointer<allocator_type>::type;
using const_pointer =
typename boost::allocator_const_pointer<allocator_type>::type;
static constexpr size_type bulk_visit_size = table_type::bulk_visit_size;
concurrent_flat_map()
: concurrent_flat_map(detail::foa::default_bucket_count)
@ -270,6 +272,33 @@ namespace boost {
return table_.visit(std::forward<K>(k), f);
}
template<class FwdIterator, class F>
BOOST_FORCEINLINE
size_t visit(FwdIterator first, FwdIterator last, F f)
{
BOOST_UNORDERED_STATIC_ASSERT_BULK_VISIT_ITERATOR(FwdIterator)
BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)
return table_.visit(first, last, f);
}
template<class FwdIterator, class F>
BOOST_FORCEINLINE
size_t visit(FwdIterator first, FwdIterator last, F f) const
{
BOOST_UNORDERED_STATIC_ASSERT_BULK_VISIT_ITERATOR(FwdIterator)
BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
return table_.visit(first, last, f);
}
template<class FwdIterator, class F>
BOOST_FORCEINLINE
size_t cvisit(FwdIterator first, FwdIterator last, F f) const
{
BOOST_UNORDERED_STATIC_ASSERT_BULK_VISIT_ITERATOR(FwdIterator)
BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
return table_.visit(first, last, f);
}
template <class F> size_type visit_all(F f)
{
BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F)

View File

@ -38,7 +38,10 @@ namespace boost {
using type_policy = detail::foa::flat_set_types<Key>;
detail::foa::concurrent_table<type_policy, Hash, Pred, Allocator> table_;
using table_type =
detail::foa::concurrent_table<type_policy, Hash, Pred, Allocator>;
table_type table_;
template <class K, class H, class KE, class A>
bool friend operator==(concurrent_flat_set<K, H, KE, A> const& lhs,
@ -67,6 +70,7 @@ namespace boost {
using pointer = typename boost::allocator_pointer<allocator_type>::type;
using const_pointer =
typename boost::allocator_const_pointer<allocator_type>::type;
static constexpr size_type bulk_visit_size = table_type::bulk_visit_size;
concurrent_flat_set()
: concurrent_flat_set(detail::foa::default_bucket_count)
@ -251,6 +255,24 @@ namespace boost {
return table_.visit(std::forward<K>(k), f);
}
template<class FwdIterator, class F>
BOOST_FORCEINLINE
size_t visit(FwdIterator first, FwdIterator last, F f) const
{
BOOST_UNORDERED_STATIC_ASSERT_BULK_VISIT_ITERATOR(FwdIterator)
BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
return table_.visit(first, last, f);
}
template<class FwdIterator, class F>
BOOST_FORCEINLINE
size_t cvisit(FwdIterator first, FwdIterator last, F f) const
{
BOOST_UNORDERED_STATIC_ASSERT_BULK_VISIT_ITERATOR(FwdIterator)
BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)
return table_.visit(first, last, f);
}
template <class F> size_type visit_all(F f) const
{
BOOST_UNORDERED_STATIC_ASSERT_CONST_INVOCABLE(F)

View File

@ -10,10 +10,12 @@
#ifndef BOOST_UNORDERED_DETAIL_CONCURRENT_STATIC_ASSERTS_HPP
#define BOOST_UNORDERED_DETAIL_CONCURRENT_STATIC_ASSERTS_HPP
#include <boost/config.hpp>
#include <boost/mp11/algorithm.hpp>
#include <boost/mp11/list.hpp>
#include <functional>
#include <iterator>
#include <type_traits>
#define BOOST_UNORDERED_STATIC_ASSERT_INVOCABLE(F) \
@ -72,4 +74,32 @@ namespace boost {
} // namespace boost
#if defined(BOOST_NO_CXX20_HDR_CONCEPTS)
#define BOOST_UNORDERED_STATIC_ASSERT_FWD_ITERATOR(Iterator) \
static_assert( \
std::is_base_of< \
std::forward_iterator_tag, \
typename std::iterator_traits<Iterator>::iterator_category>::value, \
"The provided iterator must be at least forward");
#else
#define BOOST_UNORDERED_STATIC_ASSERT_FWD_ITERATOR(Iterator) \
static_assert(std::forward_iterator<Iterator>, \
"The provided iterator must be at least forward");
#endif
#define BOOST_UNORDERED_STATIC_ASSERT_KEY_COMPATIBLE_ITERATOR(Iterator) \
static_assert( \
std::is_same< \
typename std::iterator_traits<Iterator>::value_type, \
key_type>::value || \
detail::are_transparent< \
typename std::iterator_traits<Iterator>::value_type, \
hasher, key_equal>::value, \
"The provided iterator must dereference to a compatible key value");
#define BOOST_UNORDERED_STATIC_ASSERT_BULK_VISIT_ITERATOR(Iterator) \
BOOST_UNORDERED_STATIC_ASSERT_FWD_ITERATOR(Iterator) \
BOOST_UNORDERED_STATIC_ASSERT_KEY_COMPATIBLE_ITERATOR(Iterator)
#endif // BOOST_UNORDERED_DETAIL_CONCURRENT_STATIC_ASSERTS_HPP

View File

@ -31,6 +31,7 @@
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <memory>
#include <new>
#include <type_traits>
@ -465,6 +466,7 @@ public:
using key_equal=typename super::key_equal;
using allocator_type=typename super::allocator_type;
using size_type=typename super::size_type;
static constexpr std::size_t bulk_visit_size=16;
private:
template<typename Value,typename T>
@ -564,6 +566,27 @@ public:
return visit(x,std::forward<F>(f));
}
template<typename FwdIterator,typename F>
BOOST_FORCEINLINE
std::size_t visit(FwdIterator first,FwdIterator last,F&& f)
{
return bulk_visit_impl(group_exclusive{},first,last,std::forward<F>(f));
}
template<typename FwdIterator,typename F>
BOOST_FORCEINLINE
std::size_t visit(FwdIterator first,FwdIterator last,F&& f)const
{
return bulk_visit_impl(group_shared{},first,last,std::forward<F>(f));
}
template<typename FwdIterator,typename F>
BOOST_FORCEINLINE
std::size_t cvisit(FwdIterator first,FwdIterator last,F&& f)const
{
return visit(first,last,std::forward<F>(f));
}
template<typename F> std::size_t visit_all(F&& f)
{
return visit_all_impl(group_exclusive{},std::forward<F>(f));
@ -1051,6 +1074,26 @@ private:
access_mode,x,this->position_for(hash),hash,std::forward<F>(f));
}
template<typename GroupAccessMode,typename FwdIterator,typename F>
BOOST_FORCEINLINE
std::size_t bulk_visit_impl(
GroupAccessMode access_mode,FwdIterator first,FwdIterator last,F&& f)const
{
auto lck=shared_access();
std::size_t res=0;
auto n=static_cast<std::size_t>(std::distance(first,last));
while(n){
auto m=n<2*bulk_visit_size?n:bulk_visit_size;
res+=unprotected_bulk_visit(access_mode,first,m,std::forward<F>(f));
n-=m;
std::advance(
first,
static_cast<
typename std::iterator_traits<FwdIterator>::difference_type>(m));
}
return res;
}
template<typename GroupAccessMode,typename F>
std::size_t visit_all_impl(GroupAccessMode access_mode,F&& f)const
{
@ -1149,6 +1192,76 @@ private:
return 0;
}
template<typename GroupAccessMode,typename FwdIterator,typename F>
BOOST_FORCEINLINE std::size_t unprotected_bulk_visit(
GroupAccessMode access_mode,FwdIterator first,std::size_t m,F&& f)const
{
BOOST_ASSERT(m<2*bulk_visit_size);
std::size_t res=0,
hashes[2*bulk_visit_size-1],
positions[2*bulk_visit_size-1];
int masks[2*bulk_visit_size-1];
auto it=first;
for(auto i=m;i--;++it){
auto hash=hashes[i]=this->hash_for(*it);
auto pos=positions[i]=this->position_for(hash);
BOOST_UNORDERED_PREFETCH(this->arrays.groups()+pos);
}
for(auto i=m;i--;){
auto hash=hashes[i];
auto pos=positions[i];
auto mask=masks[i]=(this->arrays.groups()+pos)->match(hash);
if(mask){
BOOST_UNORDERED_PREFETCH(this->arrays.group_accesses()+pos);
BOOST_UNORDERED_PREFETCH_ELEMENTS(this->arrays.elements()+pos*N,N);
}
}
it=first;
for(auto i=m;i--;++it){
auto pos=positions[i];
prober pb(pos);
auto pg=this->arrays.groups()+pos;
auto mask=masks[i];
element_type *p;
if(!mask)goto post_mask;
p=this->arrays.elements()+pos*N;
for(;;){
{
auto lck=access(access_mode,pos);
do{
auto n=unchecked_countr_zero(mask);
if(BOOST_LIKELY(
pg->is_occupied(n)&&
bool(this->pred()(*it,this->key_from(p[n]))))){
f(cast_for(access_mode,type_policy::value_from(p[n])));
++res;
goto next_key;
}
mask&=mask-1;
}while(mask);
}
post_mask:
do{
if(BOOST_LIKELY(pg->is_not_overflowed(hashes[i]))||
BOOST_UNLIKELY(!pb.next(this->arrays.groups_size_mask))){
goto next_key;
}
pos=pb.get();
pg=this->arrays.groups()+pos;
mask=pg->match(hashes[i]);
}while(!mask);
p=this->arrays.elements()+pos*N;
BOOST_UNORDERED_PREFETCH_ELEMENTS(p,N);
}
next_key:;
}
return res;
}
#if defined(BOOST_MSVC)
#pragma warning(pop) /* C4800 */
#endif

View File

@ -92,7 +92,7 @@
#elif defined(BOOST_UNORDERED_SSE2)
#define BOOST_UNORDERED_PREFETCH(p) _mm_prefetch((const char*)(p),_MM_HINT_T0)
#else
#define BOOST_UNORDERED_PREFETCH(p) ((void)0)
#define BOOST_UNORDERED_PREFETCH(p) ((void)(p))
#endif
/* We have experimentally confirmed that ARM architectures get a higher

View File

@ -20,7 +20,7 @@ function(foa_tests)
endfunction()
function(cfoa_tests)
boost_test(PREFIX boost_unordered_cfoa LINK_LIBRARIES Boost::compat Threads::Threads ${ARGN})
boost_test(PREFIX boost_unordered_cfoa LINK_LIBRARIES Boost::compat Boost::iterator Threads::Threads ${ARGN})
endfunction()
# FCA tests

View File

@ -3,13 +3,29 @@
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <boost/config.hpp>
#include <boost/config/workaround.hpp>
#if BOOST_WORKAROUND(BOOST_GCC_VERSION, < 40900)
// warning triggered in transform_iterator.hpp transitive includes
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wsign-conversion"
#endif
#include "helpers.hpp"
#include <boost/unordered/concurrent_flat_map.hpp>
#include <boost/unordered/concurrent_flat_set.hpp>
#include <boost/core/ignore_unused.hpp>
#include <boost/iterator/transform_iterator.hpp>
#if BOOST_WORKAROUND(BOOST_GCC_VERSION, < 40900)
#pragma GCC diagnostic pop
#endif
#include <algorithm>
#include <array>
#include <functional>
#include <vector>
@ -825,6 +841,129 @@ namespace {
check_raii_counts();
}
struct regular_key_extractor
{
template<typename T>
auto operator()(const T& x) const -> decltype(get_key(x))
{
return get_key(x);
}
} regular_key_extract;
struct transp_key_extractor
{
template<typename T>
auto operator()(const T& x) const -> decltype((get_key(x).x_))
{
return get_key(x).x_;
}
} transp_key_extract;
template <class X, class KeyExtractor, class GF>
void bulk_visit(
X*, KeyExtractor key_extract, GF gen_factory, test::random_generator rg)
{
using key_type = typename X::key_type;
using value_type = typename X::value_type;
// concurrent_flat_set visit is always const access
using arg_type = typename std::conditional<
std::is_same<key_type, value_type>::value,
value_type const,
value_type
>::type;
auto gen = gen_factory.template get<X>();
auto values = make_random_values(16384 * 16, [&] { return gen(rg); });
using values_type = decltype(values);
using span_value_type = typename values_type::value_type;
raii::reset_counts();
{
X x;
for (auto const& v: values) {
if (get_key(v).x_ % 3 != 0) x.insert(v);
}
X const& cx = x;
std::uint64_t old_default_constructor = raii::default_constructor;
std::uint64_t old_copy_constructor = raii::copy_constructor;
std::uint64_t old_move_constructor = raii::move_constructor;
std::uint64_t old_copy_assignment = raii::copy_assignment;
std::uint64_t old_move_assignment = raii::move_assignment;
std::atomic<std::size_t> num_visits{0};
thread_runner(values, [&x, &cx, &num_visits, key_extract]
(boost::span<span_value_type> s) {
auto it = boost::make_transform_iterator(s.begin(), key_extract);
std::size_t n = s.size(), m = 0, q = 0;
auto found = [&it, &m](value_type const& v) {
return std::find(
it, it + (std::ptrdiff_t)m, get_key(v)) != it + (std::ptrdiff_t)m;
};
while (n) {
if (m > n) m = n;
switch (q % 3) {
case 0:
x.visit(
it, it + (std::ptrdiff_t)m,
[&num_visits, &found](arg_type& v) {
if ( found(v) ) ++num_visits;
});
break;
case 1:
cx.visit(
it, it + (std::ptrdiff_t)m,
[&num_visits, &found](value_type const& v) {
if ( found(v) ) ++num_visits;
});
break;
case 2:
cx.cvisit(
it, it + (std::ptrdiff_t)m,
[&num_visits, &found](value_type const& v) {
if ( found(v) ) ++num_visits;
});
break;
default:
break;
}
it += (std::ptrdiff_t)m;
n -= m;
++m;
if (m > 5*X::bulk_visit_size){
m = 0;
++ q;
}
}
});
BOOST_TEST_EQ(num_visits, x.size());
BOOST_TEST_EQ(old_default_constructor, raii::default_constructor);
BOOST_TEST_EQ(old_copy_constructor, raii::copy_constructor);
BOOST_TEST_EQ(old_move_constructor, raii::move_constructor);
BOOST_TEST_EQ(old_copy_assignment, raii::copy_assignment);
BOOST_TEST_EQ(old_move_assignment, raii::move_assignment);
}
BOOST_TEST_GE(raii::default_constructor, 0u);
BOOST_TEST_GE(raii::copy_constructor, 0u);
BOOST_TEST_GE(raii::move_constructor, 0u);
BOOST_TEST_GT(raii::destructor, 0u);
BOOST_TEST_EQ(raii::default_constructor + raii::copy_constructor +
raii::move_constructor,
raii::destructor);
}
boost::unordered::concurrent_flat_map<raii, raii>* map;
boost::unordered::concurrent_flat_map<raii, raii, transp_hash,
transp_key_equal>* transp_map;
@ -869,6 +1008,22 @@ UNORDERED_TEST(
((sequential))
)
UNORDERED_TEST(
bulk_visit,
((map)(set))
((regular_key_extract))
((value_type_generator_factory))
((sequential))
)
UNORDERED_TEST(
bulk_visit,
((transp_map)(transp_set))
((transp_key_extract))
((value_type_generator_factory))
((sequential))
)
// clang-format on
RUN_TESTS()