| 1 | // Copyright (C) 2022 Joaquin M Lopez Munoz. |
| 2 | // Copyright (C) 2022 Christian Mazakas |
| 3 | // |
| 4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying |
| 5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| 6 | |
| 7 | #ifndef BOOST_UNORDERED_DETAIL_PRIME_FMOD_HPP |
| 8 | #define BOOST_UNORDERED_DETAIL_PRIME_FMOD_HPP |
| 9 | |
| 10 | #include <boost/cstdint.hpp> |
| 11 | #include <boost/preprocessor/seq/enum.hpp> |
| 12 | #include <boost/preprocessor/seq/for_each.hpp> |
| 13 | #include <boost/preprocessor/seq/size.hpp> |
| 14 | #include <boost/unordered/detail/narrow_cast.hpp> |
| 15 | |
| 16 | #include <boost/config.hpp> |
| 17 | |
| 18 | #include <climits> |
| 19 | #include <cstddef> |
| 20 | |
| 21 | #if defined(SIZE_MAX) |
| 22 | #if ((((SIZE_MAX >> 16) >> 16) >> 16) >> 15) != 0 |
| 23 | #define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T |
| 24 | #endif |
| 25 | #elif defined(UINTPTR_MAX) /* used as proxy for std::size_t */ |
| 26 | #if ((((UINTPTR_MAX >> 16) >> 16) >> 16) >> 15) != 0 |
| 27 | #define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T |
| 28 | #endif |
| 29 | #endif |
| 30 | |
| 31 | #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) && defined(_MSC_VER) |
| 32 | #include <intrin.h> |
| 33 | #endif |
| 34 | |
| 35 | namespace boost { |
| 36 | namespace unordered { |
| 37 | namespace detail { |
| 38 | template <class = void> struct prime_fmod_size |
| 39 | { |
| 40 | // Because we compile for C++03, we don't have access to any inline |
| 41 | // initialization for array data members so the definitions must exist |
| 42 | // out-of-line. To keep the library header-only, we introduce a dummy |
| 43 | // template parameter which permits the definition to be included in |
| 44 | // multiple TUs without conflict. |
| 45 | // |
| 46 | static std::size_t sizes[]; |
| 47 | static std::size_t const sizes_len; |
| 48 | static std::size_t (*positions[])(std::size_t); |
| 49 | |
| 50 | #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) |
| 51 | static boost::uint64_t inv_sizes32[]; |
| 52 | static std::size_t const inv_sizes32_len; |
| 53 | #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ |
| 54 | |
| 55 | static inline std::size_t size_index(std::size_t n) |
| 56 | { |
| 57 | std::size_t i = 0; |
| 58 | for (; i < (sizes_len - 1); ++i) { |
| 59 | if (sizes[i] >= n) { |
| 60 | break; |
| 61 | } |
| 62 | } |
| 63 | return i; |
| 64 | } |
| 65 | |
| 66 | static inline std::size_t size(std::size_t size_index) |
| 67 | { |
| 68 | return sizes[size_index]; |
| 69 | } |
| 70 | |
| 71 | template <std::size_t Size> static std::size_t modulo(std::size_t hash) |
| 72 | { |
| 73 | return hash % Size; |
| 74 | } |
| 75 | |
| 76 | #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) |
| 77 | // We emulate the techniques taken from: |
| 78 | // Faster Remainder by Direct Computation: Applications to Compilers and |
| 79 | // Software Libraries |
| 80 | // https://arxiv.org/abs/1902.01961 |
| 81 | // |
| 82 | // In essence, use fancy math to directly calculate the remainder (aka |
| 83 | // modulo) exploiting how compilers transform division |
| 84 | // |
| 85 | |
| 86 | static inline boost::uint64_t get_remainder( |
| 87 | boost::uint64_t fractional, boost::uint32_t d) |
| 88 | { |
| 89 | #if defined(_MSC_VER) |
| 90 | // use MSVC intrinsics when available to avoid promotion to 128 bits |
| 91 | |
| 92 | return __umulh(fractional, d); |
| 93 | #elif defined(BOOST_HAS_INT128) |
| 94 | return static_cast<boost::uint64_t>( |
| 95 | ((boost::uint128_type)fractional * d) >> 64); |
| 96 | #else |
| 97 | // portable implementation in the absence of boost::uint128_type on 64 |
| 98 | // bits, which happens at least in GCC 4.5 and prior |
| 99 | |
| 100 | boost::uint64_t r1 = (fractional & UINT32_MAX) * d; |
| 101 | boost::uint64_t r2 = (fractional >> 32) * d; |
| 102 | r2 += r1 >> 32; |
| 103 | return r2 >> 32; |
| 104 | #endif /* defined(_MSC_VER) */ |
| 105 | } |
| 106 | |
| 107 | static inline boost::uint32_t fast_modulo( |
| 108 | boost::uint32_t a, boost::uint64_t M, boost::uint32_t d) |
| 109 | { |
| 110 | boost::uint64_t fractional = M * a; |
| 111 | return (boost::uint32_t)(get_remainder(fractional, d)); |
| 112 | } |
| 113 | #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ |
| 114 | |
| 115 | static inline std::size_t position( |
| 116 | std::size_t hash, std::size_t size_index) |
| 117 | { |
| 118 | #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) |
| 119 | std::size_t sizes_under_32bit = inv_sizes32_len; |
| 120 | if (BOOST_LIKELY(size_index < sizes_under_32bit)) { |
| 121 | return fast_modulo( |
| 122 | a: narrow_cast<boost::uint32_t>(x: hash) + narrow_cast<boost::uint32_t>(x: hash >> 32), |
| 123 | M: inv_sizes32[size_index], d: boost::uint32_t(sizes[size_index])); |
| 124 | } else { |
| 125 | return positions[size_index - sizes_under_32bit](hash); |
| 126 | } |
| 127 | #else |
| 128 | return positions[size_index](hash); |
| 129 | #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ |
| 130 | } |
| 131 | }; // prime_fmod_size |
| 132 | |
| 133 | #define BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE \ |
| 134 | (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ |
| 135 | 12289ul)(24593ul)(49157ul)(98317ul)(196613ul)(393241ul)(786433ul)( \ |
| 136 | 1572869ul)(3145739ul)(6291469ul)(12582917ul)(25165843ul)(50331653ul)( \ |
| 137 | 100663319ul)(201326611ul)(402653189ul)(805306457ul)(1610612741ul)( \ |
| 138 | 3221225473ul) |
| 139 | |
| 140 | #if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) |
| 141 | |
| 142 | #define BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT \ |
| 143 | BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE(4294967291ul) |
| 144 | |
| 145 | #define BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT |
| 146 | |
| 147 | #else |
| 148 | |
| 149 | #define BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT \ |
| 150 | BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE |
| 151 | |
| 152 | // The original sequence here is this: |
| 153 | // (6442450939ul) |
| 154 | // (12884901893ul) |
| 155 | // (25769803751ul) |
| 156 | // (51539607551ul) |
| 157 | // (103079215111ul) |
| 158 | // (206158430209ul) |
| 159 | // (412316860441ul) |
| 160 | // (824633720831ul) |
| 161 | // (1649267441651ul) |
| 162 | // |
| 163 | // but this causes problems on versions of mingw where the `long` type is 32 |
| 164 | // bits, even for 64-bit targets. We work around this by replacing the literals |
| 165 | // with compile-time arithmetic, using bitshifts to reconstruct the number. |
| 166 | // |
| 167 | |
| 168 | // clang-format off |
| 169 | #define BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT \ |
| 170 | ((boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(2147483643ul)) \ |
| 171 | ((boost::ulong_long_type(3ul) << 32) + boost::ulong_long_type(5ul)) \ |
| 172 | ((boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(4294967271ul)) \ |
| 173 | ((boost::ulong_long_type(11ul) << 32) + boost::ulong_long_type(4294967295ul)) \ |
| 174 | ((boost::ulong_long_type(24ul) << 32) + boost::ulong_long_type(7ul)) \ |
| 175 | ((boost::ulong_long_type(48ul) << 32) + boost::ulong_long_type(1ul)) \ |
| 176 | ((boost::ulong_long_type(96ul) << 32) + boost::ulong_long_type(25ul)) \ |
| 177 | ((boost::ulong_long_type(191ul) << 32) + boost::ulong_long_type(4294967295ul)) \ |
| 178 | ((boost::ulong_long_type(383ul) << 32) + boost::ulong_long_type(4294967283ul)) |
| 179 | // clang-format on |
| 180 | |
| 181 | #endif /* BOOST_UNORDERED_FCA_HAS_64B_SIZE_T */ |
| 182 | |
| 183 | #define BOOST_UNORDERED_PRIME_FMOD_SIZES \ |
| 184 | BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT |
| 185 | |
| 186 | template <class T> |
| 187 | std::size_t prime_fmod_size<T>::sizes[] = { |
| 188 | BOOST_PP_SEQ_ENUM(BOOST_UNORDERED_PRIME_FMOD_SIZES)}; |
| 189 | |
| 190 | template <class T> |
| 191 | std::size_t const prime_fmod_size<T>::sizes_len = BOOST_PP_SEQ_SIZE( |
| 192 | BOOST_UNORDERED_PRIME_FMOD_SIZES); |
| 193 | |
| 194 | // Similarly here, we have to re-express the integer initialization using |
| 195 | // arithmetic such that each literal can fit in a 32-bit value. |
| 196 | // |
| 197 | #if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) |
| 198 | // clang-format off |
| 199 | template <class T> |
| 200 | boost::uint64_t prime_fmod_size<T>::inv_sizes32[] = { |
| 201 | (boost::ulong_long_type(330382099ul) << 32) + boost::ulong_long_type(2973438898ul) /* = 1418980313362273202 */, |
| 202 | (boost::ulong_long_type(148102320ul) << 32) + boost::ulong_long_type(2369637129ul) /* = 636094623231363849 */, |
| 203 | (boost::ulong_long_type(81037118ul) << 32) + boost::ulong_long_type(3403558990ul) /* = 348051774975651918 */, |
| 204 | (boost::ulong_long_type(44278013ul) << 32) + boost::ulong_long_type(1549730468ul) /* = 190172619316593316 */, |
| 205 | (boost::ulong_long_type(22253716ul) << 32) + boost::ulong_long_type(2403401389ul) /* = 95578984837873325 */, |
| 206 | (boost::ulong_long_type(11041047ul) << 32) + boost::ulong_long_type(143533612ul) /* = 47420935922132524 */, |
| 207 | (boost::ulong_long_type(5585133ul) << 32) + boost::ulong_long_type(106117528ul) /* = 23987963684927896 */, |
| 208 | (boost::ulong_long_type(2783517ul) << 32) + boost::ulong_long_type(1572687312ul) /* = 11955116055547344 */, |
| 209 | (boost::ulong_long_type(1394922ul) << 32) + boost::ulong_long_type(3428720239ul) /* = 5991147799191151 */, |
| 210 | (boost::ulong_long_type(698255ul) << 32) + boost::ulong_long_type(552319807ul) /* = 2998982941588287 */, |
| 211 | (boost::ulong_long_type(349496ul) << 32) + boost::ulong_long_type(3827689953ul) /* = 1501077717772769 */, |
| 212 | (boost::ulong_long_type(174641ul) << 32) + boost::ulong_long_type(3699438549ul) /* = 750081082979285 */, |
| 213 | (boost::ulong_long_type(87372ul) << 32) + boost::ulong_long_type(1912757574ul) /* = 375261795343686 */, |
| 214 | (boost::ulong_long_type(43684ul) << 32) + boost::ulong_long_type(3821029929ul) /* = 187625172388393 */, |
| 215 | (boost::ulong_long_type(21844ul) << 32) + boost::ulong_long_type(3340590800ul) /* = 93822606204624 */, |
| 216 | (boost::ulong_long_type(10921ul) << 32) + boost::ulong_long_type(4175852267ul) /* = 46909513691883 */, |
| 217 | (boost::ulong_long_type(5461ul) << 32) + boost::ulong_long_type(1401829642ul) /* = 23456218233098 */, |
| 218 | (boost::ulong_long_type(2730ul) << 32) + boost::ulong_long_type(2826028947ul) /* = 11728086747027 */, |
| 219 | (boost::ulong_long_type(1365ul) << 32) + boost::ulong_long_type(1411150351ul) /* = 5864041509391 */, |
| 220 | (boost::ulong_long_type(682ul) << 32) + boost::ulong_long_type(2857253105ul) /* = 2932024948977 */, |
| 221 | (boost::ulong_long_type(341ul) << 32) + boost::ulong_long_type(1431073224ul) /* = 1466014921160 */, |
| 222 | (boost::ulong_long_type(170ul) << 32) + boost::ulong_long_type(2862758116ul) /* = 733007198436 */, |
| 223 | (boost::ulong_long_type(85ul) << 32) + boost::ulong_long_type(1431619357ul) /* = 366503839517 */, |
| 224 | (boost::ulong_long_type(42ul) << 32) + boost::ulong_long_type(2863269661ul) /* = 183251896093 */, |
| 225 | (boost::ulong_long_type(21ul) << 32) + boost::ulong_long_type(1431647119ul) /* = 91625960335 */, |
| 226 | (boost::ulong_long_type(10ul) << 32) + boost::ulong_long_type(2863310962ul) /* = 45812983922 */, |
| 227 | (boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(1431653234ul) /* = 22906489714 */, |
| 228 | (boost::ulong_long_type(2ul) << 32) + boost::ulong_long_type(2863311496ul) /* = 11453246088 */, |
| 229 | (boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(1431655764ul) /* = 5726623060 */, |
| 230 | }; |
| 231 | // clang-format on |
| 232 | |
| 233 | template <class T> |
| 234 | std::size_t const |
| 235 | prime_fmod_size<T>::inv_sizes32_len = sizeof(inv_sizes32) / |
| 236 | sizeof(inv_sizes32[0]); |
| 237 | |
| 238 | #endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */ |
| 239 | |
| 240 | #define BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT(z, _, n) \ |
| 241 | prime_fmod_size<T>::template modulo<n>, |
| 242 | |
| 243 | template <class T> |
| 244 | std::size_t (*prime_fmod_size<T>::positions[])(std::size_t) = { |
| 245 | #if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) |
| 246 | BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~, |
| 247 | BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT) |
| 248 | #else |
| 249 | BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~, |
| 250 | BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT) |
| 251 | #endif |
| 252 | }; |
| 253 | |
| 254 | #undef BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT |
| 255 | #undef BOOST_UNORDERED_PRIME_FMOD_SIZES |
| 256 | #undef BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT |
| 257 | #undef BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT |
| 258 | #undef BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE |
| 259 | } // namespace detail |
| 260 | } // namespace unordered |
| 261 | } // namespace boost |
| 262 | |
| 263 | #endif // BOOST_UNORDERED_DETAIL_PRIME_FMOD_HPP |
| 264 | |