1// Copyright (C) 2022 Joaquin M Lopez Munoz.
2// Copyright (C) 2022 Christian Mazakas
3//
4// Distributed under the Boost Software License, Version 1.0. (See accompanying
5// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
7#ifndef BOOST_UNORDERED_DETAIL_PRIME_FMOD_HPP
8#define BOOST_UNORDERED_DETAIL_PRIME_FMOD_HPP
9
10#include <boost/cstdint.hpp>
11#include <boost/preprocessor/seq/enum.hpp>
12#include <boost/preprocessor/seq/for_each.hpp>
13#include <boost/preprocessor/seq/size.hpp>
14#include <boost/unordered/detail/narrow_cast.hpp>
15
16#include <boost/config.hpp>
17
18#include <climits>
19#include <cstddef>
20
21#if defined(SIZE_MAX)
22#if ((((SIZE_MAX >> 16) >> 16) >> 16) >> 15) != 0
23#define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T
24#endif
25#elif defined(UINTPTR_MAX) /* used as proxy for std::size_t */
26#if ((((UINTPTR_MAX >> 16) >> 16) >> 16) >> 15) != 0
27#define BOOST_UNORDERED_FCA_HAS_64B_SIZE_T
28#endif
29#endif
30
31#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) && defined(_MSC_VER)
32#include <intrin.h>
33#endif
34
35namespace boost {
36 namespace unordered {
37 namespace detail {
38 template <class = void> struct prime_fmod_size
39 {
40 // Because we compile for C++03, we don't have access to any inline
41 // initialization for array data members so the definitions must exist
42 // out-of-line. To keep the library header-only, we introduce a dummy
43 // template parameter which permits the definition to be included in
44 // multiple TUs without conflict.
45 //
46 static std::size_t sizes[];
47 static std::size_t const sizes_len;
48 static std::size_t (*positions[])(std::size_t);
49
50#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
51 static boost::uint64_t inv_sizes32[];
52 static std::size_t const inv_sizes32_len;
53#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
54
55 static inline std::size_t size_index(std::size_t n)
56 {
57 std::size_t i = 0;
58 for (; i < (sizes_len - 1); ++i) {
59 if (sizes[i] >= n) {
60 break;
61 }
62 }
63 return i;
64 }
65
66 static inline std::size_t size(std::size_t size_index)
67 {
68 return sizes[size_index];
69 }
70
71 template <std::size_t Size> static std::size_t modulo(std::size_t hash)
72 {
73 return hash % Size;
74 }
75
76#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
77 // We emulate the techniques taken from:
78 // Faster Remainder by Direct Computation: Applications to Compilers and
79 // Software Libraries
80 // https://arxiv.org/abs/1902.01961
81 //
82 // In essence, use fancy math to directly calculate the remainder (aka
83 // modulo) exploiting how compilers transform division
84 //
85
86 static inline boost::uint64_t get_remainder(
87 boost::uint64_t fractional, boost::uint32_t d)
88 {
89#if defined(_MSC_VER)
90 // use MSVC intrinsics when available to avoid promotion to 128 bits
91
92 return __umulh(fractional, d);
93#elif defined(BOOST_HAS_INT128)
94 return static_cast<boost::uint64_t>(
95 ((boost::uint128_type)fractional * d) >> 64);
96#else
97 // portable implementation in the absence of boost::uint128_type on 64
98 // bits, which happens at least in GCC 4.5 and prior
99
100 boost::uint64_t r1 = (fractional & UINT32_MAX) * d;
101 boost::uint64_t r2 = (fractional >> 32) * d;
102 r2 += r1 >> 32;
103 return r2 >> 32;
104#endif /* defined(_MSC_VER) */
105 }
106
107 static inline boost::uint32_t fast_modulo(
108 boost::uint32_t a, boost::uint64_t M, boost::uint32_t d)
109 {
110 boost::uint64_t fractional = M * a;
111 return (boost::uint32_t)(get_remainder(fractional, d));
112 }
113#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
114
115 static inline std::size_t position(
116 std::size_t hash, std::size_t size_index)
117 {
118#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
119 std::size_t sizes_under_32bit = inv_sizes32_len;
120 if (BOOST_LIKELY(size_index < sizes_under_32bit)) {
121 return fast_modulo(
122 a: narrow_cast<boost::uint32_t>(x: hash) + narrow_cast<boost::uint32_t>(x: hash >> 32),
123 M: inv_sizes32[size_index], d: boost::uint32_t(sizes[size_index]));
124 } else {
125 return positions[size_index - sizes_under_32bit](hash);
126 }
127#else
128 return positions[size_index](hash);
129#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
130 }
131 }; // prime_fmod_size
132
133#define BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE \
134 (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \
135 12289ul)(24593ul)(49157ul)(98317ul)(196613ul)(393241ul)(786433ul)( \
136 1572869ul)(3145739ul)(6291469ul)(12582917ul)(25165843ul)(50331653ul)( \
137 100663319ul)(201326611ul)(402653189ul)(805306457ul)(1610612741ul)( \
138 3221225473ul)
139
140#if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
141
142#define BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT \
143 BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE(4294967291ul)
144
145#define BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT
146
147#else
148
149#define BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT \
150 BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE
151
152// The original sequence here is this:
153// (6442450939ul)
154// (12884901893ul)
155// (25769803751ul)
156// (51539607551ul)
157// (103079215111ul)
158// (206158430209ul)
159// (412316860441ul)
160// (824633720831ul)
161// (1649267441651ul)
162//
163// but this causes problems on versions of mingw where the `long` type is 32
164// bits, even for 64-bit targets. We work around this by replacing the literals
165// with compile-time arithmetic, using bitshifts to reconstruct the number.
166//
167
168// clang-format off
169#define BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT \
170 ((boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(2147483643ul)) \
171 ((boost::ulong_long_type(3ul) << 32) + boost::ulong_long_type(5ul)) \
172 ((boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(4294967271ul)) \
173 ((boost::ulong_long_type(11ul) << 32) + boost::ulong_long_type(4294967295ul)) \
174 ((boost::ulong_long_type(24ul) << 32) + boost::ulong_long_type(7ul)) \
175 ((boost::ulong_long_type(48ul) << 32) + boost::ulong_long_type(1ul)) \
176 ((boost::ulong_long_type(96ul) << 32) + boost::ulong_long_type(25ul)) \
177 ((boost::ulong_long_type(191ul) << 32) + boost::ulong_long_type(4294967295ul)) \
178 ((boost::ulong_long_type(383ul) << 32) + boost::ulong_long_type(4294967283ul))
179 // clang-format on
180
181#endif /* BOOST_UNORDERED_FCA_HAS_64B_SIZE_T */
182
183#define BOOST_UNORDERED_PRIME_FMOD_SIZES \
184 BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT
185
186 template <class T>
187 std::size_t prime_fmod_size<T>::sizes[] = {
188 BOOST_PP_SEQ_ENUM(BOOST_UNORDERED_PRIME_FMOD_SIZES)};
189
190 template <class T>
191 std::size_t const prime_fmod_size<T>::sizes_len = BOOST_PP_SEQ_SIZE(
192 BOOST_UNORDERED_PRIME_FMOD_SIZES);
193
194// Similarly here, we have to re-express the integer initialization using
195// arithmetic such that each literal can fit in a 32-bit value.
196//
197#if defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
198 // clang-format off
199 template <class T>
200 boost::uint64_t prime_fmod_size<T>::inv_sizes32[] = {
201 (boost::ulong_long_type(330382099ul) << 32) + boost::ulong_long_type(2973438898ul) /* = 1418980313362273202 */,
202 (boost::ulong_long_type(148102320ul) << 32) + boost::ulong_long_type(2369637129ul) /* = 636094623231363849 */,
203 (boost::ulong_long_type(81037118ul) << 32) + boost::ulong_long_type(3403558990ul) /* = 348051774975651918 */,
204 (boost::ulong_long_type(44278013ul) << 32) + boost::ulong_long_type(1549730468ul) /* = 190172619316593316 */,
205 (boost::ulong_long_type(22253716ul) << 32) + boost::ulong_long_type(2403401389ul) /* = 95578984837873325 */,
206 (boost::ulong_long_type(11041047ul) << 32) + boost::ulong_long_type(143533612ul) /* = 47420935922132524 */,
207 (boost::ulong_long_type(5585133ul) << 32) + boost::ulong_long_type(106117528ul) /* = 23987963684927896 */,
208 (boost::ulong_long_type(2783517ul) << 32) + boost::ulong_long_type(1572687312ul) /* = 11955116055547344 */,
209 (boost::ulong_long_type(1394922ul) << 32) + boost::ulong_long_type(3428720239ul) /* = 5991147799191151 */,
210 (boost::ulong_long_type(698255ul) << 32) + boost::ulong_long_type(552319807ul) /* = 2998982941588287 */,
211 (boost::ulong_long_type(349496ul) << 32) + boost::ulong_long_type(3827689953ul) /* = 1501077717772769 */,
212 (boost::ulong_long_type(174641ul) << 32) + boost::ulong_long_type(3699438549ul) /* = 750081082979285 */,
213 (boost::ulong_long_type(87372ul) << 32) + boost::ulong_long_type(1912757574ul) /* = 375261795343686 */,
214 (boost::ulong_long_type(43684ul) << 32) + boost::ulong_long_type(3821029929ul) /* = 187625172388393 */,
215 (boost::ulong_long_type(21844ul) << 32) + boost::ulong_long_type(3340590800ul) /* = 93822606204624 */,
216 (boost::ulong_long_type(10921ul) << 32) + boost::ulong_long_type(4175852267ul) /* = 46909513691883 */,
217 (boost::ulong_long_type(5461ul) << 32) + boost::ulong_long_type(1401829642ul) /* = 23456218233098 */,
218 (boost::ulong_long_type(2730ul) << 32) + boost::ulong_long_type(2826028947ul) /* = 11728086747027 */,
219 (boost::ulong_long_type(1365ul) << 32) + boost::ulong_long_type(1411150351ul) /* = 5864041509391 */,
220 (boost::ulong_long_type(682ul) << 32) + boost::ulong_long_type(2857253105ul) /* = 2932024948977 */,
221 (boost::ulong_long_type(341ul) << 32) + boost::ulong_long_type(1431073224ul) /* = 1466014921160 */,
222 (boost::ulong_long_type(170ul) << 32) + boost::ulong_long_type(2862758116ul) /* = 733007198436 */,
223 (boost::ulong_long_type(85ul) << 32) + boost::ulong_long_type(1431619357ul) /* = 366503839517 */,
224 (boost::ulong_long_type(42ul) << 32) + boost::ulong_long_type(2863269661ul) /* = 183251896093 */,
225 (boost::ulong_long_type(21ul) << 32) + boost::ulong_long_type(1431647119ul) /* = 91625960335 */,
226 (boost::ulong_long_type(10ul) << 32) + boost::ulong_long_type(2863310962ul) /* = 45812983922 */,
227 (boost::ulong_long_type(5ul) << 32) + boost::ulong_long_type(1431653234ul) /* = 22906489714 */,
228 (boost::ulong_long_type(2ul) << 32) + boost::ulong_long_type(2863311496ul) /* = 11453246088 */,
229 (boost::ulong_long_type(1ul) << 32) + boost::ulong_long_type(1431655764ul) /* = 5726623060 */,
230 };
231 // clang-format on
232
233 template <class T>
234 std::size_t const
235 prime_fmod_size<T>::inv_sizes32_len = sizeof(inv_sizes32) /
236 sizeof(inv_sizes32[0]);
237
238#endif /* defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T) */
239
240#define BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT(z, _, n) \
241 prime_fmod_size<T>::template modulo<n>,
242
243 template <class T>
244 std::size_t (*prime_fmod_size<T>::positions[])(std::size_t) = {
245#if !defined(BOOST_UNORDERED_FCA_HAS_64B_SIZE_T)
246 BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
247 BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT)
248#else
249 BOOST_PP_SEQ_FOR_EACH(BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT, ~,
250 BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT)
251#endif
252 };
253
254#undef BOOST_UNORDERED_PRIME_FMOD_POSITIONS_ELEMENT
255#undef BOOST_UNORDERED_PRIME_FMOD_SIZES
256#undef BOOST_UNORDERED_PRIME_FMOD_SIZES_64BIT
257#undef BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT
258#undef BOOST_UNORDERED_PRIME_FMOD_SIZES_32BIT_INCOMPLETE
259 } // namespace detail
260 } // namespace unordered
261} // namespace boost
262
263#endif // BOOST_UNORDERED_DETAIL_PRIME_FMOD_HPP
264