1 | // Optimizations for random number functions, x86 version -*- C++ -*-
|
---|
2 |
|
---|
3 | // Copyright (C) 2012-2021 Free Software Foundation, Inc.
|
---|
4 | //
|
---|
5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
6 | // software; you can redistribute it and/or modify it under the
|
---|
7 | // terms of the GNU General Public License as published by the
|
---|
8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
9 | // any later version.
|
---|
10 |
|
---|
11 | // This library is distributed in the hope that it will be useful,
|
---|
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | // GNU General Public License for more details.
|
---|
15 |
|
---|
16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
18 | // 3.1, as published by the Free Software Foundation.
|
---|
19 |
|
---|
20 | // You should have received a copy of the GNU General Public License and
|
---|
21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
23 | // <http://www.gnu.org/licenses/>.
|
---|
24 |
|
---|
25 | /** @file bits/opt_random.h
|
---|
26 | * This is an internal header file, included by other library headers.
|
---|
27 | * Do not attempt to use it directly. @headername{random}
|
---|
28 | */
|
---|
29 |
|
---|
30 | #ifndef _BITS_OPT_RANDOM_H
|
---|
31 | #define _BITS_OPT_RANDOM_H 1
|
---|
32 |
|
---|
33 | #ifdef __SSE3__
|
---|
34 | #include <pmmintrin.h>
|
---|
35 | #endif
|
---|
36 |
|
---|
37 |
|
---|
38 | #pragma GCC system_header
|
---|
39 |
|
---|
40 |
|
---|
41 | namespace std _GLIBCXX_VISIBILITY(default)
|
---|
42 | {
|
---|
43 | _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
---|
44 |
|
---|
45 | #ifdef __SSE3__
|
---|
46 | template<>
|
---|
47 | template<typename _UniformRandomNumberGenerator>
|
---|
48 | void
|
---|
49 | normal_distribution<double>::
|
---|
50 | __generate(typename normal_distribution<double>::result_type* __f,
|
---|
51 | typename normal_distribution<double>::result_type* __t,
|
---|
52 | _UniformRandomNumberGenerator& __urng,
|
---|
53 | const param_type& __param)
|
---|
54 | {
|
---|
55 | typedef uint64_t __uctype;
|
---|
56 |
|
---|
57 | if (__f == __t)
|
---|
58 | return;
|
---|
59 |
|
---|
60 | if (_M_saved_available)
|
---|
61 | {
|
---|
62 | _M_saved_available = false;
|
---|
63 | *__f++ = _M_saved * __param.stddev() + __param.mean();
|
---|
64 |
|
---|
65 | if (__f == __t)
|
---|
66 | return;
|
---|
67 | }
|
---|
68 |
|
---|
69 | constexpr uint64_t __maskval = 0xfffffffffffffull;
|
---|
70 | static const __m128i __mask = _mm_set1_epi64x(__maskval);
|
---|
71 | static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull);
|
---|
72 | static const __m128d __three = _mm_set1_pd(3.0);
|
---|
73 | const __m128d __av = _mm_set1_pd(__param.mean());
|
---|
74 |
|
---|
75 | const __uctype __urngmin = __urng.min();
|
---|
76 | const __uctype __urngmax = __urng.max();
|
---|
77 | const __uctype __urngrange = __urngmax - __urngmin;
|
---|
78 | const __uctype __uerngrange = __urngrange + 1;
|
---|
79 |
|
---|
80 | while (__f + 1 < __t)
|
---|
81 | {
|
---|
82 | double __le;
|
---|
83 | __m128d __x;
|
---|
84 | do
|
---|
85 | {
|
---|
86 | union
|
---|
87 | {
|
---|
88 | __m128i __i;
|
---|
89 | __m128d __d;
|
---|
90 | } __v;
|
---|
91 |
|
---|
92 | if (__urngrange > __maskval)
|
---|
93 | {
|
---|
94 | if (__detail::_Power_of_2(__uerngrange))
|
---|
95 | __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(),
|
---|
96 | __urng()),
|
---|
97 | __mask);
|
---|
98 | else
|
---|
99 | {
|
---|
100 | const __uctype __uerange = __maskval + 1;
|
---|
101 | const __uctype __scaling = __urngrange / __uerange;
|
---|
102 | const __uctype __past = __uerange * __scaling;
|
---|
103 | uint64_t __v1;
|
---|
104 | do
|
---|
105 | __v1 = __uctype(__urng()) - __urngmin;
|
---|
106 | while (__v1 >= __past);
|
---|
107 | __v1 /= __scaling;
|
---|
108 | uint64_t __v2;
|
---|
109 | do
|
---|
110 | __v2 = __uctype(__urng()) - __urngmin;
|
---|
111 | while (__v2 >= __past);
|
---|
112 | __v2 /= __scaling;
|
---|
113 |
|
---|
114 | __v.__i = _mm_set_epi64x(__v1, __v2);
|
---|
115 | }
|
---|
116 | }
|
---|
117 | else if (__urngrange == __maskval)
|
---|
118 | __v.__i = _mm_set_epi64x(__urng(), __urng());
|
---|
119 | else if ((__urngrange + 2) * __urngrange >= __maskval
|
---|
120 | && __detail::_Power_of_2(__uerngrange))
|
---|
121 | {
|
---|
122 | uint64_t __v1 = __urng() * __uerngrange + __urng();
|
---|
123 | uint64_t __v2 = __urng() * __uerngrange + __urng();
|
---|
124 |
|
---|
125 | __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2),
|
---|
126 | __mask);
|
---|
127 | }
|
---|
128 | else
|
---|
129 | {
|
---|
130 | size_t __nrng = 2;
|
---|
131 | __uctype __high = __maskval / __uerngrange / __uerngrange;
|
---|
132 | while (__high > __uerngrange)
|
---|
133 | {
|
---|
134 | ++__nrng;
|
---|
135 | __high /= __uerngrange;
|
---|
136 | }
|
---|
137 | const __uctype __highrange = __high + 1;
|
---|
138 | const __uctype __scaling = __urngrange / __highrange;
|
---|
139 | const __uctype __past = __highrange * __scaling;
|
---|
140 | __uctype __tmp;
|
---|
141 |
|
---|
142 | uint64_t __v1;
|
---|
143 | do
|
---|
144 | {
|
---|
145 | do
|
---|
146 | __tmp = __uctype(__urng()) - __urngmin;
|
---|
147 | while (__tmp >= __past);
|
---|
148 | __v1 = __tmp / __scaling;
|
---|
149 | for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
|
---|
150 | {
|
---|
151 | __tmp = __v1;
|
---|
152 | __v1 *= __uerngrange;
|
---|
153 | __v1 += __uctype(__urng()) - __urngmin;
|
---|
154 | }
|
---|
155 | }
|
---|
156 | while (__v1 > __maskval || __v1 < __tmp);
|
---|
157 |
|
---|
158 | uint64_t __v2;
|
---|
159 | do
|
---|
160 | {
|
---|
161 | do
|
---|
162 | __tmp = __uctype(__urng()) - __urngmin;
|
---|
163 | while (__tmp >= __past);
|
---|
164 | __v2 = __tmp / __scaling;
|
---|
165 | for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
|
---|
166 | {
|
---|
167 | __tmp = __v2;
|
---|
168 | __v2 *= __uerngrange;
|
---|
169 | __v2 += __uctype(__urng()) - __urngmin;
|
---|
170 | }
|
---|
171 | }
|
---|
172 | while (__v2 > __maskval || __v2 < __tmp);
|
---|
173 |
|
---|
174 | __v.__i = _mm_set_epi64x(__v1, __v2);
|
---|
175 | }
|
---|
176 |
|
---|
177 | __v.__i = _mm_or_si128(__v.__i, __two);
|
---|
178 | __x = _mm_sub_pd(__v.__d, __three);
|
---|
179 | __m128d __m = _mm_mul_pd(__x, __x);
|
---|
180 | __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m));
|
---|
181 | }
|
---|
182 | while (__le == 0.0 || __le >= 1.0);
|
---|
183 |
|
---|
184 | double __mult = (std::sqrt(-2.0 * std::log(__le) / __le)
|
---|
185 | * __param.stddev());
|
---|
186 |
|
---|
187 | __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av);
|
---|
188 |
|
---|
189 | _mm_storeu_pd(__f, __x);
|
---|
190 | __f += 2;
|
---|
191 | }
|
---|
192 |
|
---|
193 | if (__f != __t)
|
---|
194 | {
|
---|
195 | result_type __x, __y, __r2;
|
---|
196 |
|
---|
197 | __detail::_Adaptor<_UniformRandomNumberGenerator, result_type>
|
---|
198 | __aurng(__urng);
|
---|
199 |
|
---|
200 | do
|
---|
201 | {
|
---|
202 | __x = result_type(2.0) * __aurng() - 1.0;
|
---|
203 | __y = result_type(2.0) * __aurng() - 1.0;
|
---|
204 | __r2 = __x * __x + __y * __y;
|
---|
205 | }
|
---|
206 | while (__r2 > 1.0 || __r2 == 0.0);
|
---|
207 |
|
---|
208 | const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2);
|
---|
209 | _M_saved = __x * __mult;
|
---|
210 | _M_saved_available = true;
|
---|
211 | *__f = __y * __mult * __param.stddev() + __param.mean();
|
---|
212 | }
|
---|
213 | }
|
---|
214 | #endif
|
---|
215 |
|
---|
216 |
|
---|
217 | _GLIBCXX_END_NAMESPACE_VERSION
|
---|
218 | } // namespace
|
---|
219 |
|
---|
220 |
|
---|
221 | #endif // _BITS_OPT_RANDOM_H
|
---|