1 | // -*- C++ -*-
|
---|
2 |
|
---|
3 | // Copyright (C) 2007-2021 Free Software Foundation, Inc.
|
---|
4 | //
|
---|
5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
6 | // software; you can redistribute it and/or modify it under the terms
|
---|
7 | // of the GNU General Public License as published by the Free Software
|
---|
8 | // Foundation; either version 3, or (at your option) any later
|
---|
9 | // version.
|
---|
10 |
|
---|
11 | // This library is distributed in the hope that it will be useful, but
|
---|
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
14 | // General Public License for more details.
|
---|
15 |
|
---|
16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
18 | // 3.1, as published by the Free Software Foundation.
|
---|
19 |
|
---|
20 | // You should have received a copy of the GNU General Public License and
|
---|
21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
23 | // <http://www.gnu.org/licenses/>.
|
---|
24 |
|
---|
25 | /** @file parallel/random_shuffle.h
|
---|
26 | * @brief Parallel implementation of std::random_shuffle().
|
---|
27 | * This file is a GNU parallel extension to the Standard C++ Library.
|
---|
28 | */
|
---|
29 |
|
---|
30 | // Written by Johannes Singler.
|
---|
31 |
|
---|
32 | #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
|
---|
33 | #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
|
---|
34 |
|
---|
35 | #include <limits>
|
---|
36 | #include <bits/stl_numeric.h>
|
---|
37 | #include <parallel/parallel.h>
|
---|
38 | #include <parallel/random_number.h>
|
---|
39 |
|
---|
40 | namespace __gnu_parallel
|
---|
41 | {
|
---|
42 | /** @brief Type to hold the index of a bin.
|
---|
43 | *
|
---|
44 | * Since many variables of this type are allocated, it should be
|
---|
45 | * chosen as small as possible.
|
---|
46 | */
|
---|
47 | typedef unsigned short _BinIndex;
|
---|
48 |
|
---|
49 | /** @brief Data known to every thread participating in
|
---|
50 | __gnu_parallel::__parallel_random_shuffle(). */
|
---|
51 | template<typename _RAIter>
|
---|
52 | struct _DRandomShufflingGlobalData
|
---|
53 | {
|
---|
54 | typedef std::iterator_traits<_RAIter> _TraitsType;
|
---|
55 | typedef typename _TraitsType::value_type _ValueType;
|
---|
56 | typedef typename _TraitsType::difference_type _DifferenceType;
|
---|
57 |
|
---|
58 | /** @brief Begin iterator of the __source. */
|
---|
59 | _RAIter& _M_source;
|
---|
60 |
|
---|
61 | /** @brief Temporary arrays for each thread. */
|
---|
62 | _ValueType** _M_temporaries;
|
---|
63 |
|
---|
64 | /** @brief Two-dimensional array to hold the thread-bin distribution.
|
---|
65 | *
|
---|
66 | * Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */
|
---|
67 | _DifferenceType** _M_dist;
|
---|
68 |
|
---|
69 | /** @brief Start indexes of the threads' __chunks. */
|
---|
70 | _DifferenceType* _M_starts;
|
---|
71 |
|
---|
72 | /** @brief Number of the thread that will further process the
|
---|
73 | corresponding bin. */
|
---|
74 | _ThreadIndex* _M_bin_proc;
|
---|
75 |
|
---|
76 | /** @brief Number of bins to distribute to. */
|
---|
77 | int _M_num_bins;
|
---|
78 |
|
---|
79 | /** @brief Number of bits needed to address the bins. */
|
---|
80 | int _M_num_bits;
|
---|
81 |
|
---|
82 | /** @brief Constructor. */
|
---|
83 | _DRandomShufflingGlobalData(_RAIter& __source)
|
---|
84 | : _M_source(__source) { }
|
---|
85 | };
|
---|
86 |
|
---|
87 | /** @brief Local data for a thread participating in
|
---|
88 | __gnu_parallel::__parallel_random_shuffle().
|
---|
89 | */
|
---|
90 | template<typename _RAIter, typename _RandomNumberGenerator>
|
---|
91 | struct _DRSSorterPU
|
---|
92 | {
|
---|
93 | /** @brief Number of threads participating in total. */
|
---|
94 | int _M_num_threads;
|
---|
95 |
|
---|
96 | /** @brief Begin index for bins taken care of by this thread. */
|
---|
97 | _BinIndex _M_bins_begin;
|
---|
98 |
|
---|
99 | /** @brief End index for bins taken care of by this thread. */
|
---|
100 | _BinIndex __bins_end;
|
---|
101 |
|
---|
102 | /** @brief Random _M_seed for this thread. */
|
---|
103 | uint32_t _M_seed;
|
---|
104 |
|
---|
105 | /** @brief Pointer to global data. */
|
---|
106 | _DRandomShufflingGlobalData<_RAIter>* _M_sd;
|
---|
107 | };
|
---|
108 |
|
---|
109 | /** @brief Generate a random number in @c [0,2^__logp).
|
---|
110 | * @param __logp Logarithm (basis 2) of the upper range __bound.
|
---|
111 | * @param __rng Random number generator to use.
|
---|
112 | */
|
---|
113 | template<typename _RandomNumberGenerator>
|
---|
114 | inline int
|
---|
115 | __random_number_pow2(int __logp, _RandomNumberGenerator& __rng)
|
---|
116 | { return __rng.__genrand_bits(__logp); }
|
---|
117 |
|
---|
118 | /** @brief Random shuffle code executed by each thread.
|
---|
119 | * @param __pus Array of thread-local data records. */
|
---|
120 | template<typename _RAIter, typename _RandomNumberGenerator>
|
---|
121 | void
|
---|
122 | __parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
|
---|
123 | _RandomNumberGenerator>* __pus)
|
---|
124 | {
|
---|
125 | typedef std::iterator_traits<_RAIter> _TraitsType;
|
---|
126 | typedef typename _TraitsType::value_type _ValueType;
|
---|
127 | typedef typename _TraitsType::difference_type _DifferenceType;
|
---|
128 |
|
---|
129 | _ThreadIndex __iam = omp_get_thread_num();
|
---|
130 | _DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam];
|
---|
131 | _DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd;
|
---|
132 |
|
---|
133 | // Indexing: _M_dist[bin][processor]
|
---|
134 | _DifferenceType __length = (__sd->_M_starts[__iam + 1]
|
---|
135 | - __sd->_M_starts[__iam]);
|
---|
136 | _BinIndex* __oracles = new _BinIndex[__length];
|
---|
137 | _DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1];
|
---|
138 | _BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins];
|
---|
139 | _ValueType** __temporaries = new _ValueType*[__d->_M_num_threads];
|
---|
140 |
|
---|
141 | // Compute oracles and count appearances.
|
---|
142 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
|
---|
143 | __dist[__b] = 0;
|
---|
144 | int __num_bits = __sd->_M_num_bits;
|
---|
145 |
|
---|
146 | _RandomNumber __rng(__d->_M_seed);
|
---|
147 |
|
---|
148 | // First main loop.
|
---|
149 | for (_DifferenceType __i = 0; __i < __length; ++__i)
|
---|
150 | {
|
---|
151 | _BinIndex __oracle = __random_number_pow2(__num_bits, __rng);
|
---|
152 | __oracles[__i] = __oracle;
|
---|
153 |
|
---|
154 | // To allow prefix (partial) sum.
|
---|
155 | ++(__dist[__oracle + 1]);
|
---|
156 | }
|
---|
157 |
|
---|
158 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
|
---|
159 | __sd->_M_dist[__b][__iam + 1] = __dist[__b];
|
---|
160 |
|
---|
161 | # pragma omp barrier
|
---|
162 |
|
---|
163 | # pragma omp single
|
---|
164 | {
|
---|
165 | // Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now
|
---|
166 | // contains the total number of items in bin __s
|
---|
167 | for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s)
|
---|
168 | __gnu_sequential::partial_sum(__sd->_M_dist[__s + 1],
|
---|
169 | __sd->_M_dist[__s + 1]
|
---|
170 | + __d->_M_num_threads + 1,
|
---|
171 | __sd->_M_dist[__s + 1]);
|
---|
172 | }
|
---|
173 |
|
---|
174 | # pragma omp barrier
|
---|
175 |
|
---|
176 | _SequenceIndex __offset = 0, __global_offset = 0;
|
---|
177 | for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s)
|
---|
178 | __global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads];
|
---|
179 |
|
---|
180 | # pragma omp barrier
|
---|
181 |
|
---|
182 | for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s)
|
---|
183 | {
|
---|
184 | for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t)
|
---|
185 | __sd->_M_dist[__s + 1][__t] += __offset;
|
---|
186 | __offset = __sd->_M_dist[__s + 1][__d->_M_num_threads];
|
---|
187 | }
|
---|
188 |
|
---|
189 | __sd->_M_temporaries[__iam] = static_cast<_ValueType*>
|
---|
190 | (::operator new(sizeof(_ValueType) * __offset));
|
---|
191 |
|
---|
192 | # pragma omp barrier
|
---|
193 |
|
---|
194 | // Draw local copies to avoid false sharing.
|
---|
195 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
|
---|
196 | __dist[__b] = __sd->_M_dist[__b][__iam];
|
---|
197 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b)
|
---|
198 | __bin_proc[__b] = __sd->_M_bin_proc[__b];
|
---|
199 | for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t)
|
---|
200 | __temporaries[__t] = __sd->_M_temporaries[__t];
|
---|
201 |
|
---|
202 | _RAIter __source = __sd->_M_source;
|
---|
203 | _DifferenceType __start = __sd->_M_starts[__iam];
|
---|
204 |
|
---|
205 | // Distribute according to oracles, second main loop.
|
---|
206 | for (_DifferenceType __i = 0; __i < __length; ++__i)
|
---|
207 | {
|
---|
208 | _BinIndex __target_bin = __oracles[__i];
|
---|
209 | _ThreadIndex __target_p = __bin_proc[__target_bin];
|
---|
210 |
|
---|
211 | // Last column [__d->_M_num_threads] stays unchanged.
|
---|
212 | ::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++]))
|
---|
213 | _ValueType(*(__source + __i + __start));
|
---|
214 | }
|
---|
215 |
|
---|
216 | delete[] __oracles;
|
---|
217 | delete[] __dist;
|
---|
218 | delete[] __bin_proc;
|
---|
219 | delete[] __temporaries;
|
---|
220 |
|
---|
221 | # pragma omp barrier
|
---|
222 |
|
---|
223 | // Shuffle bins internally.
|
---|
224 | for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b)
|
---|
225 | {
|
---|
226 | _ValueType* __begin =
|
---|
227 | (__sd->_M_temporaries[__iam]
|
---|
228 | + (__b == __d->_M_bins_begin
|
---|
229 | ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])),
|
---|
230 | *__end = (__sd->_M_temporaries[__iam]
|
---|
231 | + __sd->_M_dist[__b + 1][__d->_M_num_threads]);
|
---|
232 |
|
---|
233 | __sequential_random_shuffle(__begin, __end, __rng);
|
---|
234 | std::copy(__begin, __end, __sd->_M_source + __global_offset
|
---|
235 | + (__b == __d->_M_bins_begin
|
---|
236 | ? 0 : __sd->_M_dist[__b][__d->_M_num_threads]));
|
---|
237 | }
|
---|
238 |
|
---|
239 | for (_SequenceIndex __i = 0; __i < __offset; ++__i)
|
---|
240 | __sd->_M_temporaries[__iam][__i].~_ValueType();
|
---|
241 | ::operator delete(__sd->_M_temporaries[__iam]);
|
---|
242 | }
|
---|
243 |
|
---|
244 | /** @brief Round up to the next greater power of 2.
|
---|
245 | * @param __x _Integer to round up */
|
---|
246 | template<typename _Tp>
|
---|
247 | _Tp
|
---|
248 | __round_up_to_pow2(_Tp __x)
|
---|
249 | {
|
---|
250 | if (__x <= 1)
|
---|
251 | return 1;
|
---|
252 | else
|
---|
253 | return (_Tp)1 << (__rd_log2(__x - 1) + 1);
|
---|
254 | }
|
---|
255 |
|
---|
256 | /** @brief Main parallel random shuffle step.
|
---|
257 | * @param __begin Begin iterator of sequence.
|
---|
258 | * @param __end End iterator of sequence.
|
---|
259 | * @param __n Length of sequence.
|
---|
260 | * @param __num_threads Number of threads to use.
|
---|
261 | * @param __rng Random number generator to use.
|
---|
262 | */
|
---|
263 | template<typename _RAIter, typename _RandomNumberGenerator>
|
---|
264 | void
|
---|
265 | __parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end,
|
---|
266 | typename std::iterator_traits
|
---|
267 | <_RAIter>::difference_type __n,
|
---|
268 | _ThreadIndex __num_threads,
|
---|
269 | _RandomNumberGenerator& __rng)
|
---|
270 | {
|
---|
271 | typedef std::iterator_traits<_RAIter> _TraitsType;
|
---|
272 | typedef typename _TraitsType::value_type _ValueType;
|
---|
273 | typedef typename _TraitsType::difference_type _DifferenceType;
|
---|
274 |
|
---|
275 | _GLIBCXX_CALL(__n)
|
---|
276 |
|
---|
277 | const _Settings& __s = _Settings::get();
|
---|
278 |
|
---|
279 | if (__num_threads > __n)
|
---|
280 | __num_threads = static_cast<_ThreadIndex>(__n);
|
---|
281 |
|
---|
282 | _BinIndex __num_bins, __num_bins_cache;
|
---|
283 |
|
---|
284 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
---|
285 | // Try the L1 cache first.
|
---|
286 |
|
---|
287 | // Must fit into L1.
|
---|
288 | __num_bins_cache =
|
---|
289 | std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb
|
---|
290 | / sizeof(_ValueType)));
|
---|
291 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
---|
292 |
|
---|
293 | // No more buckets than TLB entries, power of 2
|
---|
294 | // Power of 2 and at least one element per bin, at most the TLB size.
|
---|
295 | __num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
|
---|
296 |
|
---|
297 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
---|
298 | // 2 TLB entries needed per bin.
|
---|
299 | __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
|
---|
300 | #endif
|
---|
301 | __num_bins = __round_up_to_pow2(__num_bins);
|
---|
302 |
|
---|
303 | if (__num_bins < __num_bins_cache)
|
---|
304 | {
|
---|
305 | #endif
|
---|
306 | // Now try the L2 cache
|
---|
307 | // Must fit into L2
|
---|
308 | __num_bins_cache = static_cast<_BinIndex>
|
---|
309 | (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
|
---|
310 | / sizeof(_ValueType))));
|
---|
311 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
---|
312 |
|
---|
313 | // No more buckets than TLB entries, power of 2.
|
---|
314 | __num_bins = static_cast<_BinIndex>
|
---|
315 | (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
---|
316 | // Power of 2 and at least one element per bin, at most the TLB size.
|
---|
317 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
---|
318 | // 2 TLB entries needed per bin.
|
---|
319 | __num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2),
|
---|
320 | __num_bins);
|
---|
321 | #endif
|
---|
322 | __num_bins = __round_up_to_pow2(__num_bins);
|
---|
323 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
---|
324 | }
|
---|
325 | #endif
|
---|
326 |
|
---|
327 | __num_bins = __round_up_to_pow2(
|
---|
328 | std::max<_BinIndex>(__num_threads, __num_bins));
|
---|
329 |
|
---|
330 | if (__num_threads <= 1)
|
---|
331 | {
|
---|
332 | _RandomNumber __derived_rng(
|
---|
333 | __rng(std::numeric_limits<uint32_t>::max()));
|
---|
334 | __sequential_random_shuffle(__begin, __end, __derived_rng);
|
---|
335 | return;
|
---|
336 | }
|
---|
337 |
|
---|
338 | _DRandomShufflingGlobalData<_RAIter> __sd(__begin);
|
---|
339 | _DRSSorterPU<_RAIter, _RandomNumber >* __pus;
|
---|
340 | _DifferenceType* __starts;
|
---|
341 |
|
---|
342 | # pragma omp parallel num_threads(__num_threads)
|
---|
343 | {
|
---|
344 | _ThreadIndex __num_threads = omp_get_num_threads();
|
---|
345 | # pragma omp single
|
---|
346 | {
|
---|
347 | __pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads];
|
---|
348 |
|
---|
349 | __sd._M_temporaries = new _ValueType*[__num_threads];
|
---|
350 | __sd._M_dist = new _DifferenceType*[__num_bins + 1];
|
---|
351 | __sd._M_bin_proc = new _ThreadIndex[__num_bins];
|
---|
352 | for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b)
|
---|
353 | __sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
|
---|
354 | for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b)
|
---|
355 | {
|
---|
356 | __sd._M_dist[0][0] = 0;
|
---|
357 | __sd._M_dist[__b][0] = 0;
|
---|
358 | }
|
---|
359 | __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
|
---|
360 | int __bin_cursor = 0;
|
---|
361 | __sd._M_num_bins = __num_bins;
|
---|
362 | __sd._M_num_bits = __rd_log2(__num_bins);
|
---|
363 |
|
---|
364 | _DifferenceType __chunk_length = __n / __num_threads,
|
---|
365 | __split = __n % __num_threads,
|
---|
366 | __start = 0;
|
---|
367 | _DifferenceType __bin_chunk_length = __num_bins / __num_threads,
|
---|
368 | __bin_split = __num_bins % __num_threads;
|
---|
369 | for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
---|
370 | {
|
---|
371 | __starts[__i] = __start;
|
---|
372 | __start += (__i < __split
|
---|
373 | ? (__chunk_length + 1) : __chunk_length);
|
---|
374 | int __j = __pus[__i]._M_bins_begin = __bin_cursor;
|
---|
375 |
|
---|
376 | // Range of bins for this processor.
|
---|
377 | __bin_cursor += (__i < __bin_split
|
---|
378 | ? (__bin_chunk_length + 1)
|
---|
379 | : __bin_chunk_length);
|
---|
380 | __pus[__i].__bins_end = __bin_cursor;
|
---|
381 | for (; __j < __bin_cursor; ++__j)
|
---|
382 | __sd._M_bin_proc[__j] = __i;
|
---|
383 | __pus[__i]._M_num_threads = __num_threads;
|
---|
384 | __pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max());
|
---|
385 | __pus[__i]._M_sd = &__sd;
|
---|
386 | }
|
---|
387 | __starts[__num_threads] = __start;
|
---|
388 | } //single
|
---|
389 | // Now shuffle in parallel.
|
---|
390 | __parallel_random_shuffle_drs_pu(__pus);
|
---|
391 | } // parallel
|
---|
392 |
|
---|
393 | delete[] __starts;
|
---|
394 | delete[] __sd._M_bin_proc;
|
---|
395 | for (int __s = 0; __s < (__num_bins + 1); ++__s)
|
---|
396 | delete[] __sd._M_dist[__s];
|
---|
397 | delete[] __sd._M_dist;
|
---|
398 | delete[] __sd._M_temporaries;
|
---|
399 |
|
---|
400 | delete[] __pus;
|
---|
401 | }
|
---|
402 |
|
---|
403 | /** @brief Sequential cache-efficient random shuffle.
|
---|
404 | * @param __begin Begin iterator of sequence.
|
---|
405 | * @param __end End iterator of sequence.
|
---|
406 | * @param __rng Random number generator to use.
|
---|
407 | */
|
---|
408 | template<typename _RAIter, typename _RandomNumberGenerator>
|
---|
409 | void
|
---|
410 | __sequential_random_shuffle(_RAIter __begin, _RAIter __end,
|
---|
411 | _RandomNumberGenerator& __rng)
|
---|
412 | {
|
---|
413 | typedef std::iterator_traits<_RAIter> _TraitsType;
|
---|
414 | typedef typename _TraitsType::value_type _ValueType;
|
---|
415 | typedef typename _TraitsType::difference_type _DifferenceType;
|
---|
416 |
|
---|
417 | _DifferenceType __n = __end - __begin;
|
---|
418 | const _Settings& __s = _Settings::get();
|
---|
419 |
|
---|
420 | _BinIndex __num_bins, __num_bins_cache;
|
---|
421 |
|
---|
422 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
---|
423 | // Try the L1 cache first, must fit into L1.
|
---|
424 | __num_bins_cache = std::max<_DifferenceType>
|
---|
425 | (1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
|
---|
426 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
---|
427 |
|
---|
428 | // No more buckets than TLB entries, power of 2
|
---|
429 | // Power of 2 and at least one element per bin, at most the TLB size
|
---|
430 | __num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
|
---|
431 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
---|
432 | // 2 TLB entries needed per bin
|
---|
433 | __num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins);
|
---|
434 | #endif
|
---|
435 | __num_bins = __round_up_to_pow2(__num_bins);
|
---|
436 |
|
---|
437 | if (__num_bins < __num_bins_cache)
|
---|
438 | {
|
---|
439 | #endif
|
---|
440 | // Now try the L2 cache, must fit into L2.
|
---|
441 | __num_bins_cache = static_cast<_BinIndex>
|
---|
442 | (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
|
---|
443 | / sizeof(_ValueType))));
|
---|
444 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
|
---|
445 |
|
---|
446 | // No more buckets than TLB entries, power of 2
|
---|
447 | // Power of 2 and at least one element per bin, at most the TLB size.
|
---|
448 | __num_bins = static_cast<_BinIndex>
|
---|
449 | (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
|
---|
450 |
|
---|
451 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
---|
452 | // 2 TLB entries needed per bin
|
---|
453 | __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
|
---|
454 | #endif
|
---|
455 | __num_bins = __round_up_to_pow2(__num_bins);
|
---|
456 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
---|
457 | }
|
---|
458 | #endif
|
---|
459 |
|
---|
460 | int __num_bits = __rd_log2(__num_bins);
|
---|
461 |
|
---|
462 | if (__num_bins > 1)
|
---|
463 | {
|
---|
464 | _ValueType* __target =
|
---|
465 | static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n));
|
---|
466 | _BinIndex* __oracles = new _BinIndex[__n];
|
---|
467 | _DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1],
|
---|
468 | * __dist1 = new _DifferenceType[__num_bins + 1];
|
---|
469 |
|
---|
470 | for (int __b = 0; __b < __num_bins + 1; ++__b)
|
---|
471 | __dist0[__b] = 0;
|
---|
472 |
|
---|
473 | _RandomNumber __bitrng(__rng(0xFFFFFFFF));
|
---|
474 |
|
---|
475 | for (_DifferenceType __i = 0; __i < __n; ++__i)
|
---|
476 | {
|
---|
477 | _BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng);
|
---|
478 | __oracles[__i] = __oracle;
|
---|
479 |
|
---|
480 | // To allow prefix (partial) sum.
|
---|
481 | ++(__dist0[__oracle + 1]);
|
---|
482 | }
|
---|
483 |
|
---|
484 | // Sum up bins.
|
---|
485 | __gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1,
|
---|
486 | __dist0);
|
---|
487 |
|
---|
488 | for (int __b = 0; __b < __num_bins + 1; ++__b)
|
---|
489 | __dist1[__b] = __dist0[__b];
|
---|
490 |
|
---|
491 | // Distribute according to oracles.
|
---|
492 | for (_DifferenceType __i = 0; __i < __n; ++__i)
|
---|
493 | ::new(&(__target[(__dist0[__oracles[__i]])++]))
|
---|
494 | _ValueType(*(__begin + __i));
|
---|
495 |
|
---|
496 | for (int __b = 0; __b < __num_bins; ++__b)
|
---|
497 | __sequential_random_shuffle(__target + __dist1[__b],
|
---|
498 | __target + __dist1[__b + 1], __rng);
|
---|
499 |
|
---|
500 | // Copy elements back.
|
---|
501 | std::copy(__target, __target + __n, __begin);
|
---|
502 |
|
---|
503 | delete[] __dist0;
|
---|
504 | delete[] __dist1;
|
---|
505 | delete[] __oracles;
|
---|
506 |
|
---|
507 | for (_DifferenceType __i = 0; __i < __n; ++__i)
|
---|
508 | __target[__i].~_ValueType();
|
---|
509 | ::operator delete(__target);
|
---|
510 | }
|
---|
511 | else
|
---|
512 | __gnu_sequential::random_shuffle(__begin, __end, __rng);
|
---|
513 | }
|
---|
514 |
|
---|
515 | /** @brief Parallel random public call.
|
---|
516 | * @param __begin Begin iterator of sequence.
|
---|
517 | * @param __end End iterator of sequence.
|
---|
518 | * @param __rng Random number generator to use.
|
---|
519 | */
|
---|
520 | template<typename _RAIter, typename _RandomNumberGenerator>
|
---|
521 | inline void
|
---|
522 | __parallel_random_shuffle(_RAIter __begin, _RAIter __end,
|
---|
523 | _RandomNumberGenerator __rng = _RandomNumber())
|
---|
524 | {
|
---|
525 | typedef std::iterator_traits<_RAIter> _TraitsType;
|
---|
526 | typedef typename _TraitsType::difference_type _DifferenceType;
|
---|
527 | _DifferenceType __n = __end - __begin;
|
---|
528 | __parallel_random_shuffle_drs(__begin, __end, __n,
|
---|
529 | __get_max_threads(), __rng);
|
---|
530 | }
|
---|
531 | }
|
---|
532 |
|
---|
533 | #endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */
|
---|