diff --git a/server/tracy_robin_hood.h b/server/tracy_robin_hood.h index 81afbea4..fb279dab 100644 --- a/server/tracy_robin_hood.h +++ b/server/tracy_robin_hood.h @@ -6,12 +6,11 @@ // _/_____/ // // Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20 -// version 3.7.0 // https://github.com/martinus/robin-hood-hashing // // Licensed under the MIT License . // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2020 Martin Ankerl +// Copyright (c) 2018-2021 Martin Ankerl // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -35,23 +34,28 @@ #define ROBIN_HOOD_H_INCLUDED // see https://semver.org/ -#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes -#define ROBIN_HOOD_VERSION_MINOR 7 // for adding functionality in a backwards-compatible manner -#define ROBIN_HOOD_VERSION_PATCH 0 // for backwards-compatible bug fixes +#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes +#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner +#define ROBIN_HOOD_VERSION_PATCH 1 // for backwards-compatible bug fixes #include #include #include #include +#include // only to support hash of smart pointers #include #include #include #include +#if __cplusplus >= 201703L +# include +#endif // #define ROBIN_HOOD_LOG_ENABLED #ifdef ROBIN_HOOD_LOG_ENABLED # include -# define ROBIN_HOOD_LOG(x) std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl +# define ROBIN_HOOD_LOG(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; #else # define ROBIN_HOOD_LOG(x) #endif @@ -59,8 +63,8 @@ // #define ROBIN_HOOD_TRACE_ENABLED #ifdef ROBIN_HOOD_TRACE_ENABLED # include -# define ROBIN_HOOD_TRACE(x) \ - std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl +# define ROBIN_HOOD_TRACE(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; #else # define ROBIN_HOOD_TRACE(x) #endif @@ -128,46 +132,32 @@ static Counts& counts() { #endif // count leading/trailing bits -#if ((defined __i386 || defined __x86_64__) && defined __BMI__) || defined _M_IX86 || defined _M_X64 +#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS) # ifdef _MSC_VER +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 +# endif # include +# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ + [](size_t mask) noexcept -> int { \ + unsigned long index; \ + return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ + : ROBIN_HOOD(BITNESS); \ + }(x) # else -# include +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll +# endif +# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) # endif -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u32 -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u64 -# endif -# if defined __AVX2__ || defined __BMI__ -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ROBIN_HOOD(CTZ)(x) -# else -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ROBIN_HOOD(CTZ)(x) -# endif -#elif defined _MSC_VER -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 -# endif -# include -# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ - [](size_t mask) noexcept -> int { \ - unsigned long index; \ - return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ - : ROBIN_HOOD(BITNESS); \ - }(x) -#else -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll -# endif -# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) #endif // fallthrough @@ -191,6 +181,28 @@ static Counts& counts() { # define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0) #endif +// detect if native wchar_t type is availiable in MSVC +#ifdef _MSC_VER +# ifdef _NATIVE_WCHAR_T_DEFINED +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +#endif + +// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr +#ifdef _MSC_VER +# if _MSC_VER <= 1900 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +#endif + // workaround missing "is_trivially_copyable" in g++ < 5.0 // See https://stackoverflow.com/a/31798726/48181 #if defined(__GNUC__) && __GNUC__ < 5 @@ -290,6 +302,13 @@ using index_sequence_for = make_index_sequence; namespace detail { +// make sure we static_cast to the correct type for hash_int +#if ROBIN_HOOD(BITNESS) == 64 +using SizeT = uint64_t; +#else +using SizeT = uint32_t; +#endif + template T rotr(T x, unsigned k) { return (x >> k) | (x << (8U * sizeof(T) - k)); @@ -311,14 +330,14 @@ inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { // make sure this is not inlined as it is slow and dramatically enlarges code, thus making other // inlinings more difficult. Throws are also generally the slow path. template -ROBIN_HOOD(NOINLINE) +[[noreturn]] ROBIN_HOOD(NOINLINE) #if ROBIN_HOOD(HAS_EXCEPTIONS) -void doThrow(Args&&... args) { + void doThrow(Args&&... args) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) throw E(std::forward(args)...); } #else -void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { + void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { abort(); } #endif @@ -384,7 +403,8 @@ public: void reset() noexcept { while (mListForFree) { T* tmp = *mListForFree; - free(mListForFree); + ROBIN_HOOD_LOG("std::free") + std::free(mListForFree); mListForFree = reinterpret_cast_no_cast_align_warning(tmp); } mHead = nullptr; @@ -419,8 +439,10 @@ public: // calculate number of available elements in ptr if (numBytes < ALIGNMENT + ALIGNED_SIZE) { // not enough data for at least one element. Free and return. - free(ptr); + ROBIN_HOOD_LOG("std::free") + std::free(ptr); } else { + ROBIN_HOOD_LOG("add to buffer") add(ptr, numBytes); } } @@ -484,9 +506,10 @@ private: size_t const numElementsToAlloc = calcNumElementsToAlloc(); // alloc new memory: [prev |T, T, ... T] - // std::cout << (sizeof(T*) + ALIGNED_SIZE * numElementsToAlloc) << " bytes" << std::endl; size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; - add(assertNotNull(malloc(bytes)), bytes); + ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE + << " * " << numElementsToAlloc) + add(assertNotNull(std::malloc(bytes)), bytes); return mHead; } @@ -522,21 +545,14 @@ struct NodeAllocator { // we are not using the data, so just free it. void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept { - free(ptr); + ROBIN_HOOD_LOG("std::free") + std::free(ptr); } }; template struct NodeAllocator : public BulkPoolAllocator {}; -// dummy hash, unsed as mixer when robin_hood::hash is already used -template -struct identity_hash { - constexpr size_t operator()(T const& obj) const noexcept { - return static_cast(obj); - } -}; - // c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making // my own here. namespace swappable { @@ -597,14 +613,20 @@ struct pair { , second(std::forward(b)) {} template - constexpr pair( - std::piecewise_construct_t /*unused*/, std::tuple a, - std::tuple b) noexcept(noexcept(pair(std::declval&>(), - std::declval&>(), - ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()))) + // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members" + // if this constructor is constexpr +#if !ROBIN_HOOD(BROKEN_CONSTEXPR) + constexpr +#endif + pair(std::piecewise_construct_t /*unused*/, std::tuple a, + std::tuple + b) noexcept(noexcept(pair(std::declval&>(), + std::declval&>(), + ROBIN_HOOD_STD::index_sequence_for(), + ROBIN_HOOD_STD::index_sequence_for()))) : pair(a, b, ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()) {} + ROBIN_HOOD_STD::index_sequence_for()) { + } // constructor called from the std::piecewise_construct_t ctor template @@ -666,7 +688,7 @@ inline constexpr bool operator>=(pair const& x, pair const& y) { return !(x < y); } -static size_t hash_bytes(void const* ptr, size_t const len) noexcept { +inline size_t hash_bytes(void const* ptr, size_t len) noexcept { static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); static constexpr uint64_t seed = UINT64_C(0xe17a1465); static constexpr unsigned int r = 47; @@ -715,55 +737,87 @@ static size_t hash_bytes(void const* ptr, size_t const len) noexcept { } h ^= h >> r; - h *= m; - h ^= h >> r; + + // not doing the final step here, because this will be done by keyToIdx anyways + // h *= m; + // h ^= h >> r; return static_cast(h); } inline size_t hash_int(uint64_t x) noexcept { - // inspired by lemire's strongly universal hashing - // https://lemire.me/blog/2018/08/15/fast-strongly-universal-64-bit-hashing-everywhere/ - // - // Instead of shifts, we use rotations so we don't lose any bits. - // - // Added a final multiplcation with a constant for more mixing. It is most important that the - // lower bits are well mixed. - auto h1 = x * UINT64_C(0xA24BAED4963EE407); - auto h2 = detail::rotr(x, 32U) * UINT64_C(0x9FB21C651E98DF25); - auto h = detail::rotr(h1 + h2, 32U); - return static_cast(h); + // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested, + // and doesn't need any special 128bit operations. + x ^= x >> 33U; + x *= UINT64_C(0xff51afd7ed558ccd); + x ^= x >> 33U; + + // not doing the final step here, because this will be done by keyToIdx anyways + // x *= UINT64_C(0xc4ceb9fe1a85ec53); + // x ^= x >> 33U; + return static_cast(x); } // A thin wrapper around std::hash, performing an additional simple mixing step of the result. -template +template struct hash : public std::hash { size_t operator()(T const& obj) const noexcept(noexcept(std::declval>().operator()(std::declval()))) { // call base hash auto result = std::hash::operator()(obj); // return mixed of that, to be save against identity has - return hash_int(static_cast(result)); + return hash_int(static_cast(result)); } }; -template <> -struct hash { - size_t operator()(std::string const& str) const noexcept { - return hash_bytes(str.data(), str.size()); +template +struct hash> { + size_t operator()(std::basic_string const& str) const noexcept { + return hash_bytes(str.data(), sizeof(CharT) * str.size()); } }; +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) +template +struct hash> { + size_t operator()(std::basic_string_view const& sv) const noexcept { + return hash_bytes(sv.data(), sizeof(CharT) * sv.size()); + } +}; +#endif + template struct hash { size_t operator()(T* ptr) const noexcept { - return hash_int(reinterpret_cast(ptr)); + return hash_int(reinterpret_cast(ptr)); + } +}; + +template +struct hash> { + size_t operator()(std::unique_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash> { + size_t operator()(std::shared_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash::value>::type> { + size_t operator()(Enum e) const noexcept { + using Underlying = typename std::underlying_type::type; + return hash{}(static_cast(e)); } }; #define ROBIN_HOOD_HASH_INT(T) \ template <> \ struct hash { \ - size_t operator()(T obj) const noexcept { \ + size_t operator()(T const& obj) const noexcept { \ return hash_int(static_cast(obj)); \ } \ } @@ -779,7 +833,9 @@ ROBIN_HOOD_HASH_INT(signed char); ROBIN_HOOD_HASH_INT(unsigned char); ROBIN_HOOD_HASH_INT(char16_t); ROBIN_HOOD_HASH_INT(char32_t); +#if ROBIN_HOOD(HAS_NATIVE_WCHART) ROBIN_HOOD_HASH_INT(wchar_t); +#endif ROBIN_HOOD_HASH_INT(short); ROBIN_HOOD_HASH_INT(unsigned short); ROBIN_HOOD_HASH_INT(int); @@ -793,11 +849,17 @@ ROBIN_HOOD_HASH_INT(unsigned long long); #endif namespace detail { +template +struct void_type { + using type = void; +}; + template struct has_is_transparent : public std::false_type {}; template -struct has_is_transparent : public std::true_type {}; +struct has_is_transparent::type> + : public std::true_type {}; // using wrapper classes for hash and key_equal prevents the diamond problem when the same type // is used. see https://stackoverflow.com/a/28771920/48181 @@ -881,7 +943,8 @@ private: static constexpr size_t InitialNumElements = sizeof(uint64_t); static constexpr uint32_t InitialInfoNumBits = 5; static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits; - static constexpr uint8_t InitialInfoHashShift = sizeof(size_t) * 8 - InitialInfoNumBits; + static constexpr size_t InfoMask = InitialInfoInc - 1U; + static constexpr uint8_t InitialInfoHashShift = 0; using DataPool = detail::NodeAllocator; // type needs to be wider than uint8_t. @@ -1056,7 +1119,7 @@ private: using Node = DataNode; - // helpers for doInsert: extract first entry (only const required) + // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required) ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept { return n.getFirst(); } @@ -1214,7 +1277,7 @@ private: Iter operator++(int) noexcept { Iter tmp = *this; ++(*this); - return std::move(tmp); + return tmp; } reference operator*() const { @@ -1237,19 +1300,37 @@ private: private: // fast forward to the next non-free info byte + // I've tried a few variants that don't depend on intrinsics, but unfortunately they are + // quite a bit slower than this one. So I've reverted that change again. See map_benchmark. void fastForward() noexcept { size_t n = 0; while (0U == (n = detail::unaligned_load(mInfo))) { mInfo += sizeof(size_t); mKeyVals += sizeof(size_t); } -#if ROBIN_HOOD(LITTLE_ENDIAN) - auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; +#if defined(ROBIN_HOOD_DISABLE_INTRINSICS) + // we know for certain that within the next 8 bytes we'll find a non-zero one. + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 4; + mKeyVals += 4; + } + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 2; + mKeyVals += 2; + } + if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) { + mInfo += 1; + mKeyVals += 1; + } #else +# if ROBIN_HOOD(LITTLE_ENDIAN) + auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; +# else auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; -#endif +# endif mInfo += inc; mKeyVals += inc; +#endif } friend class Table; @@ -1264,17 +1345,17 @@ private: // The upper 1-5 bits need to be a reasonable good hash, to save comparisons. template void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const { - // for a user-specified hash that is *not* robin_hood::hash, apply robin_hood::hash as - // an additional mixing step. This serves as a bad hash prevention, if the given data is + // In addition to whatever hash is used, add another mul & shift so we get better hashing. + // This serves as a bad hash prevention, if the given data is // badly mixed. - using Mix = - typename std::conditional, hasher>::value, - ::tracy::detail::identity_hash, - ::tracy::hash>::type; - *idx = Mix{}(WHash::operator()(key)); + auto h = static_cast(WHash::operator()(key)); - *info = mInfoInc + static_cast(*idx >> mInfoHashShift); - *idx &= mMask; + h *= mHashMultiplier; + h ^= h >> 33U; + + // the lower InitialInfoNumBits are reserved for info. + *info = mInfoInc + static_cast((h & InfoMask) >> mInfoHashShift); + *idx = (static_cast(h) >> InitialInfoNumBits) & mMask; } // forwards the index by one, wrapping around at the end @@ -1364,12 +1445,12 @@ private: } // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. - // @return index where the element was created - size_t insert_move(Node&& keyval) { + // @return True on success, false if something went wrong + void insert_move(Node&& keyval) { // we don't retry, fail if overflowing // don't need to check max num elements if (0 == mMaxNumElementsAllowed && !try_increase_info()) { - throwOverflowError(); // impossible to reach LCOV_EXCL_LINE + throwOverflowError(); } size_t idx{}; @@ -1406,20 +1487,25 @@ private: mInfo[insertion_idx] = insertion_info; ++mNumElements; - return insertion_idx; } public: using iterator = Iter; using const_iterator = Iter; + Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual())) + : WHash() + , WKeyEqual() { + ROBIN_HOOD_TRACE(this) + } + // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. // This tremendously speeds up ctor & dtor of a map that never receives an element. The // penalty is payed at the first insert, and not before. Lookup of this empty map works // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the // standard, but we can ignore it. explicit Table( - size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, + size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal))) : WHash(h) , WKeyEqual(equal) { @@ -1450,6 +1536,7 @@ public: , DataPool(std::move(static_cast(o))) { ROBIN_HOOD_TRACE(this) if (o.mMask) { + mHashMultiplier = std::move(o.mHashMultiplier); mKeyVals = std::move(o.mKeyVals); mInfo = std::move(o.mInfo); mNumElements = std::move(o.mNumElements); @@ -1468,6 +1555,7 @@ public: if (o.mMask) { // only move stuff if the other map actually has some data destroy(); + mHashMultiplier = std::move(o.mHashMultiplier); mKeyVals = std::move(o.mKeyVals); mInfo = std::move(o.mInfo); mNumElements = std::move(o.mNumElements); @@ -1499,8 +1587,13 @@ public: // elements and insert them, but copying is probably faster. auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - mKeyVals = static_cast(detail::assertNotNull( - malloc(calcNumBytesTotal(numElementsWithBuffer)))); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mHashMultiplier = o.mHashMultiplier; + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); // no need for calloc because clonData does memcpy mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); mNumElements = o.mNumElements; @@ -1548,12 +1641,16 @@ public: // no luck: we don't have the same array size allocated, so we need to realloc. if (0 != mMask) { // only deallocate if we actually have data! - free(mKeyVals); + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); } auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - mKeyVals = static_cast(detail::assertNotNull( - malloc(calcNumBytesTotal(numElementsWithBuffer)))); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); // no need for calloc here because cloneData performs a memcpy. mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); @@ -1562,6 +1659,7 @@ public: WHash::operator=(static_cast(o)); WKeyEqual::operator=(static_cast(o)); DataPool::operator=(static_cast(o)); + mHashMultiplier = o.mHashMultiplier; mNumElements = o.mNumElements; mMask = o.mMask; mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; @@ -1629,13 +1727,54 @@ public: template typename std::enable_if::value, Q&>::type operator[](const key_type& key) { ROBIN_HOOD_TRACE(this) - return doCreateByKey(key); + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(key), std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); } template typename std::enable_if::value, Q&>::type operator[](key_type&& key) { ROBIN_HOOD_TRACE(this) - return doCreateByKey(std::move(key)); + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); } template @@ -1646,26 +1785,93 @@ public: } } + void insert(std::initializer_list ilist) { + for (auto&& vt : ilist) { + insert(std::move(vt)); + } + } + template std::pair emplace(Args&&... args) { ROBIN_HOOD_TRACE(this) Node n{*this, std::forward(args)...}; - auto r = doInsert(std::move(n)); - if (!r.second) { - // insertion not possible: destroy node - // NOLINTNEXTLINE(bugprone-use-after-move) + auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n)); + switch (idxAndState.second) { + case InsertionState::key_found: n.destroy(*this); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node(*this, std::move(n)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = std::move(n); + break; + + case InsertionState::overflow_error: + n.destroy(*this); + throwOverflowError(); + break; } - return r; + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + std::pair try_emplace(const key_type& key, Args&&... args) { + return try_emplace_impl(key, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& key, Args&&... args) { + return try_emplace_impl(std::move(key), std::forward(args)...); + } + + template + std::pair try_emplace(const_iterator hint, const key_type& key, + Args&&... args) { + (void)hint; + return try_emplace_impl(key, std::forward(args)...); + } + + template + std::pair try_emplace(const_iterator hint, key_type&& key, Args&&... args) { + (void)hint; + return try_emplace_impl(std::move(key), std::forward(args)...); + } + + template + std::pair insert_or_assign(const key_type& key, Mapped&& obj) { + return insertOrAssignImpl(key, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& key, Mapped&& obj) { + return insertOrAssignImpl(std::move(key), std::forward(obj)); + } + + template + std::pair insert_or_assign(const_iterator hint, const key_type& key, + Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(key, std::forward(obj)); + } + + template + std::pair insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(std::move(key), std::forward(obj)); } std::pair insert(const value_type& keyval) { ROBIN_HOOD_TRACE(this) - return doInsert(keyval); + return emplace(keyval); } std::pair insert(value_type&& keyval) { - return doInsert(std::move(keyval)); + return emplace(std::move(keyval)); } // Returns 1 if key is found, 0 otherwise. @@ -1849,23 +2055,36 @@ public: // reserves space for the specified number of elements. Makes sure the old data fits. // exactly the same as reserve(c). void rehash(size_t c) { - reserve(c); + // forces a reserve + reserve(c, true); } // reserves space for the specified number of elements. Makes sure the old data fits. - // Exactly the same as resize(c). Use resize(0) to shrink to fit. + // Exactly the same as rehash(c). Use rehash(0) to shrink to fit. void reserve(size_t c) { + // reserve, but don't force rehash + reserve(c, false); + } + + // If possible reallocates the map to a smaller one. This frees the underlying table. + // Does not do anything if load_factor is too large for decreasing the table's size. + void compact() { ROBIN_HOOD_TRACE(this) - auto const minElementsAllowed = (std::max)(c, mNumElements); auto newSize = InitialNumElements; - while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { + while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) { newSize *= 2; } if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { throwOverflowError(); } - rehashPowerOfTwo(newSize); + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (newSize < mMask + 1) { + rehashPowerOfTwo(newSize, true); + } } size_type size() const noexcept { // NOLINT(modernize-use-nodiscard) @@ -1956,9 +2175,30 @@ private: return find(e) != end(); } + void reserve(size_t c, bool forceRehash) { + ROBIN_HOOD_TRACE(this) + auto const minElementsAllowed = (std::max)(c, mNumElements); + auto newSize = InitialNumElements; + while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { + newSize *= 2; + } + if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { + throwOverflowError(); + } + + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (forceRehash || newSize > mMask + 1) { + rehashPowerOfTwo(newSize, false); + } + } + // reserves space for at least the specified number of elements. // only works if numBuckets if power of two - void rehashPowerOfTwo(size_t numBuckets) { + // True on success, false otherwise + void rehashPowerOfTwo(size_t numBuckets, bool forceFree) { ROBIN_HOOD_TRACE(this) Node* const oldKeyVals = mKeyVals; @@ -1967,18 +2207,29 @@ private: const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); // resize operation: move stuff - init_data(numBuckets); + initData(numBuckets); if (oldMaxElementsWithBuffer > 1) { for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) { if (oldInfo[i] != 0) { + // might throw an exception, which is really bad since we are in the middle of + // moving stuff. insert_move(std::move(oldKeyVals[i])); // destroy the node but DON'T destroy the data. oldKeyVals[i].~Node(); } } - // don't destroy old data: put it into the pool instead - DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); + // this check is not necessary as it's guarded by the previous if, but it helps + // silence g++'s overeager "attempt to free a non-heap object 'map' + // [-Werror=free-nonheap-object]" warning. + if (oldKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { + // don't destroy old data: put it into the pool instead + if (forceFree) { + std::free(oldKeyVals); + } else { + DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); + } + } } } @@ -1990,7 +2241,66 @@ private: #endif } - void init_data(size_t max_elements) { + template + std::pair try_emplace_impl(OtherKey&& key, Args&&... args) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + std::pair insertOrAssignImpl(OtherKey&& key, Mapped&& obj) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + mKeyVals[idxAndState.first].getSecond() = std::forward(obj); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + void initData(size_t max_elements) { mNumElements = 0; mMask = max_elements - 1; mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); @@ -1998,8 +2308,11 @@ private: auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements); // calloc also zeroes everything - mKeyVals = reinterpret_cast(detail::assertNotNull( - calloc(1, calcNumBytesTotal(numElementsWithBuffer)))); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = reinterpret_cast( + detail::assertNotNull(std::calloc(1, numBytesTotal))); mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); // set sentinel @@ -2009,86 +2322,34 @@ private: mInfoHashShift = InitialInfoHashShift; } - template - typename std::enable_if::value, Q&>::type doCreateByKey(Arg&& key) { - while (true) { + enum class InsertionState { overflow_error, key_found, new_node, overwrite_node }; + + // Finds key, and if not already present prepares a spot where to pot the key & value. + // This potentially shifts nodes out of the way, updates mInfo and number of inserted + // elements, so the only operation left to do is create/assign a new node at that spot. + template + std::pair insertKeyPrepareEmptySpot(OtherKey&& key) { + for (int i = 0; i < 256; ++i) { size_t idx{}; InfoType info{}; keyToIdx(key, &idx, &info); nextWhileLess(&info, &idx); - // while we potentially have a match. Can't do a do-while here because when mInfo is - // 0 we don't want to skip forward - while (info == mInfo[idx]) { - if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { - // key already exists, do not insert. - return mKeyVals[idx].getSecond(); - } - next(&info, &idx); - } - - // unlikely that this evaluates to true - if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { - increase_size(); - continue; - } - - // key not found, so we are now exactly where we want to insert it. - auto const insertion_idx = idx; - auto const insertion_info = info; - if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - - // find an empty spot - while (0 != mInfo[idx]) { - next(&info, &idx); - } - - auto& l = mKeyVals[insertion_idx]; - if (idx == insertion_idx) { - // put at empty spot. This forwards all arguments into the node where the object - // is constructed exactly where it is needed. - ::new (static_cast(&l)) - Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); - } else { - shiftUp(idx, insertion_idx); - l = Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); - } - - // mKeyVals[idx].getFirst() = std::move(key); - mInfo[insertion_idx] = static_cast(insertion_info); - - ++mNumElements; - return mKeyVals[insertion_idx].getSecond(); - } - } - - // This is exactly the same code as operator[], except for the return values - template - std::pair doInsert(Arg&& keyval) { - while (true) { - size_t idx{}; - InfoType info{}; - keyToIdx(getFirstConst(keyval), &idx, &info); - nextWhileLess(&info, &idx); - // while we potentially have a match while (info == mInfo[idx]) { - if (WKeyEqual::operator()(getFirstConst(keyval), mKeyVals[idx].getFirst())) { + if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { // key already exists, do NOT insert. // see http://en.cppreference.com/w/cpp/container/unordered_map/insert - return std::make_pair(iterator(mKeyVals + idx, mInfo + idx), - false); + return std::make_pair(idx, InsertionState::key_found); } next(&info, &idx); } // unlikely that this evaluates to true if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { - increase_size(); + if (!increase_size()) { + return std::make_pair(size_t(0), InsertionState::overflow_error); + } continue; } @@ -2104,20 +2365,19 @@ private: next(&info, &idx); } - auto& l = mKeyVals[insertion_idx]; - if (idx == insertion_idx) { - ::new (static_cast(&l)) Node(*this, std::forward(keyval)); - } else { + if (idx != insertion_idx) { shiftUp(idx, insertion_idx); - l = Node(*this, std::forward(keyval)); } - // put at empty spot mInfo[insertion_idx] = static_cast(insertion_info); - ++mNumElements; - return std::make_pair(iterator(mKeyVals + insertion_idx, mInfo + insertion_idx), true); + return std::make_pair(insertion_idx, idx == insertion_idx + ? InsertionState::new_node + : InsertionState::overwrite_node); } + + // enough attempts failed, so finally give up. + return std::make_pair(size_t(0), InsertionState::overflow_error); } bool try_increase_info() { @@ -2148,28 +2408,42 @@ private: return true; } - void increase_size() { + // True if resize was possible, false otherwise + bool increase_size() { // nothing allocated yet? just allocate InitialNumElements if (0 == mMask) { - init_data(InitialNumElements); - return; + initData(InitialNumElements); + return true; } auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); if (mNumElements < maxNumElementsAllowed && try_increase_info()) { - return; + return true; } ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed=" << maxNumElementsAllowed << ", load=" << (static_cast(mNumElements) * 100.0 / (static_cast(mMask) + 1))) - // it seems we have a really bad hash function! don't try to resize again - if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { - throwOverflowError(); - } - rehashPowerOfTwo((mMask + 1) * 2); + nextHashMultiplier(); + if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { + // we have to resize, even though there would still be plenty of space left! + // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case + // we have to rehash a few times + rehashPowerOfTwo(mMask + 1, true); + } else { + // Each resize use a different hash so we don't so easily overflow. + // Make sure we only have odd numbers, so that the multiplication is reversible! + rehashPowerOfTwo((mMask + 1) * 2, false); + } + return true; + } + + void nextHashMultiplier() { + // adding an *even* number, so that the multiplier will always stay odd. This is necessary + // so that the hash stays a mixing function (and thus doesn't have any information loss). + mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54); } void destroy() { @@ -2183,10 +2457,11 @@ private: // This protection against not deleting mMask shouldn't be needed as it's sufficiently // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise - // reports a compile error: attempt to free a non-heap object ‘fm’ + // reports a compile error: attempt to free a non-heap object 'fm' // [-Werror=free-nonheap-object] if (mKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { - free(mKeyVals); + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); } } @@ -2201,13 +2476,14 @@ private: } // members are sorted so no padding occurs - Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 8 - uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 16 - size_t mNumElements = 0; // 8 byte 24 - size_t mMask = 0; // 8 byte 32 - size_t mMaxNumElementsAllowed = 0; // 8 byte 40 - InfoType mInfoInc = InitialInfoInc; // 4 byte 44 - InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 48 + uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53); // 8 byte 8 + Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 16 + uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 24 + size_t mNumElements = 0; // 8 byte 32 + size_t mMask = 0; // 8 byte 40 + size_t mMaxNumElementsAllowed = 0; // 8 byte 48 + InfoType mInfoInc = InitialInfoInc; // 4 byte 52 + InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 56 // 16 byte 56 if NodeAllocator };