1 | // Math overloads for simd -*- C++ -*-
|
---|
2 |
|
---|
3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
---|
4 | //
|
---|
5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
6 | // software; you can redistribute it and/or modify it under the
|
---|
7 | // terms of the GNU General Public License as published by the
|
---|
8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
9 | // any later version.
|
---|
10 |
|
---|
11 | // This library is distributed in the hope that it will be useful,
|
---|
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | // GNU General Public License for more details.
|
---|
15 |
|
---|
16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
18 | // 3.1, as published by the Free Software Foundation.
|
---|
19 |
|
---|
20 | // You should have received a copy of the GNU General Public License and
|
---|
21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
23 | // <http://www.gnu.org/licenses/>.
|
---|
24 |
|
---|
25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
|
---|
26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
|
---|
27 |
|
---|
28 | #if __cplusplus >= 201703L
|
---|
29 |
|
---|
30 | #include <utility>
|
---|
31 | #include <iomanip>
|
---|
32 |
|
---|
33 | _GLIBCXX_SIMD_BEGIN_NAMESPACE
|
---|
34 | template <typename _Tp, typename _V>
|
---|
35 | using _Samesize = fixed_size_simd<_Tp, _V::size()>;
|
---|
36 |
|
---|
37 | // _Math_return_type {{{
|
---|
38 | template <typename _DoubleR, typename _Tp, typename _Abi>
|
---|
39 | struct _Math_return_type;
|
---|
40 |
|
---|
41 | template <typename _DoubleR, typename _Tp, typename _Abi>
|
---|
42 | using _Math_return_type_t =
|
---|
43 | typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
|
---|
44 |
|
---|
45 | template <typename _Tp, typename _Abi>
|
---|
46 | struct _Math_return_type<double, _Tp, _Abi>
|
---|
47 | { using type = simd<_Tp, _Abi>; };
|
---|
48 |
|
---|
49 | template <typename _Tp, typename _Abi>
|
---|
50 | struct _Math_return_type<bool, _Tp, _Abi>
|
---|
51 | { using type = simd_mask<_Tp, _Abi>; };
|
---|
52 |
|
---|
53 | template <typename _DoubleR, typename _Tp, typename _Abi>
|
---|
54 | struct _Math_return_type
|
---|
55 | { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
|
---|
56 |
|
---|
57 | //}}}
|
---|
58 | // _GLIBCXX_SIMD_MATH_CALL_ {{{
|
---|
59 | #define _GLIBCXX_SIMD_MATH_CALL_(__name) \
|
---|
60 | template <typename _Tp, typename _Abi, typename..., \
|
---|
61 | typename _R = _Math_return_type_t< \
|
---|
62 | decltype(std::__name(declval<double>())), _Tp, _Abi>> \
|
---|
63 | enable_if_t<is_floating_point_v<_Tp>, _R> \
|
---|
64 | __name(simd<_Tp, _Abi> __x) \
|
---|
65 | { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
|
---|
66 |
|
---|
67 | // }}}
|
---|
68 | //_Extra_argument_type{{{
|
---|
69 | template <typename _Up, typename _Tp, typename _Abi>
|
---|
70 | struct _Extra_argument_type;
|
---|
71 |
|
---|
72 | template <typename _Tp, typename _Abi>
|
---|
73 | struct _Extra_argument_type<_Tp*, _Tp, _Abi>
|
---|
74 | {
|
---|
75 | using type = simd<_Tp, _Abi>*;
|
---|
76 | static constexpr double* declval();
|
---|
77 | static constexpr bool __needs_temporary_scalar = true;
|
---|
78 |
|
---|
79 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
|
---|
80 | { return &__data(*__x); }
|
---|
81 | };
|
---|
82 |
|
---|
83 | template <typename _Up, typename _Tp, typename _Abi>
|
---|
84 | struct _Extra_argument_type<_Up*, _Tp, _Abi>
|
---|
85 | {
|
---|
86 | static_assert(is_integral_v<_Up>);
|
---|
87 | using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
|
---|
88 | static constexpr _Up* declval();
|
---|
89 | static constexpr bool __needs_temporary_scalar = true;
|
---|
90 |
|
---|
91 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
|
---|
92 | { return &__data(*__x); }
|
---|
93 | };
|
---|
94 |
|
---|
95 | template <typename _Tp, typename _Abi>
|
---|
96 | struct _Extra_argument_type<_Tp, _Tp, _Abi>
|
---|
97 | {
|
---|
98 | using type = simd<_Tp, _Abi>;
|
---|
99 | static constexpr double declval();
|
---|
100 | static constexpr bool __needs_temporary_scalar = false;
|
---|
101 |
|
---|
102 | _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
|
---|
103 | _S_data(const type& __x)
|
---|
104 | { return __data(__x); }
|
---|
105 | };
|
---|
106 |
|
---|
107 | template <typename _Up, typename _Tp, typename _Abi>
|
---|
108 | struct _Extra_argument_type
|
---|
109 | {
|
---|
110 | static_assert(is_integral_v<_Up>);
|
---|
111 | using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
|
---|
112 | static constexpr _Up declval();
|
---|
113 | static constexpr bool __needs_temporary_scalar = false;
|
---|
114 |
|
---|
115 | _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
|
---|
116 | _S_data(const type& __x)
|
---|
117 | { return __data(__x); }
|
---|
118 | };
|
---|
119 |
|
---|
120 | //}}}
|
---|
121 | // _GLIBCXX_SIMD_MATH_CALL2_ {{{
|
---|
122 | #define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_) \
|
---|
123 | template < \
|
---|
124 | typename _Tp, typename _Abi, typename..., \
|
---|
125 | typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
|
---|
126 | typename _R = _Math_return_type_t< \
|
---|
127 | decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
|
---|
128 | enable_if_t<is_floating_point_v<_Tp>, _R> \
|
---|
129 | __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
|
---|
130 | { \
|
---|
131 | return {__private_init, \
|
---|
132 | _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
|
---|
133 | } \
|
---|
134 | template <typename _Up, typename _Tp, typename _Abi> \
|
---|
135 | _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
|
---|
136 | decltype(std::__name( \
|
---|
137 | declval<double>(), \
|
---|
138 | declval<enable_if_t< \
|
---|
139 | conjunction_v< \
|
---|
140 | is_same<arg2_, _Tp>, \
|
---|
141 | negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
|
---|
142 | is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
|
---|
143 | double>>())), \
|
---|
144 | _Tp, _Abi> \
|
---|
145 | __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
|
---|
146 | { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
|
---|
147 |
|
---|
148 | // }}}
|
---|
149 | // _GLIBCXX_SIMD_MATH_CALL3_ {{{
|
---|
150 | #define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_) \
|
---|
151 | template <typename _Tp, typename _Abi, typename..., \
|
---|
152 | typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
|
---|
153 | typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>, \
|
---|
154 | typename _R = _Math_return_type_t< \
|
---|
155 | decltype(std::__name(declval<double>(), _Arg2::declval(), \
|
---|
156 | _Arg3::declval())), \
|
---|
157 | _Tp, _Abi>> \
|
---|
158 | enable_if_t<is_floating_point_v<_Tp>, _R> \
|
---|
159 | __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
|
---|
160 | const typename _Arg3::type& __z) \
|
---|
161 | { \
|
---|
162 | return {__private_init, \
|
---|
163 | _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
|
---|
164 | _Arg3::_S_data(__z))}; \
|
---|
165 | } \
|
---|
166 | template < \
|
---|
167 | typename _T0, typename _T1, typename _T2, typename..., \
|
---|
168 | typename _U0 = __remove_cvref_t<_T0>, \
|
---|
169 | typename _U1 = __remove_cvref_t<_T1>, \
|
---|
170 | typename _U2 = __remove_cvref_t<_T2>, \
|
---|
171 | typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
|
---|
172 | typename = enable_if_t<conjunction_v< \
|
---|
173 | is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
|
---|
174 | is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
|
---|
175 | negation<conjunction< \
|
---|
176 | is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
|
---|
177 | _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
|
---|
178 | declval<const _Simd&>(), \
|
---|
179 | declval<const _Simd&>())) \
|
---|
180 | __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
|
---|
181 | { \
|
---|
182 | return __name(_Simd(static_cast<_T0&&>(__xx)), \
|
---|
183 | _Simd(static_cast<_T1&&>(__yy)), \
|
---|
184 | _Simd(static_cast<_T2&&>(__zz))); \
|
---|
185 | }
|
---|
186 |
|
---|
187 | // }}}
|
---|
188 | // __cosSeries {{{
|
---|
189 | template <typename _Abi>
|
---|
190 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
|
---|
191 | __cosSeries(const simd<float, _Abi>& __x)
|
---|
192 | {
|
---|
193 | const simd<float, _Abi> __x2 = __x * __x;
|
---|
194 | simd<float, _Abi> __y;
|
---|
195 | __y = 0x1.ap-16f; // 1/8!
|
---|
196 | __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
|
---|
197 | __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
|
---|
198 | return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
|
---|
199 | }
|
---|
200 |
|
---|
201 | template <typename _Abi>
|
---|
202 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
|
---|
203 | __cosSeries(const simd<double, _Abi>& __x)
|
---|
204 | {
|
---|
205 | const simd<double, _Abi> __x2 = __x * __x;
|
---|
206 | simd<double, _Abi> __y;
|
---|
207 | __y = 0x1.AC00000000000p-45; // 1/16!
|
---|
208 | __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
|
---|
209 | __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
|
---|
210 | __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
|
---|
211 | __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
|
---|
212 | __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
|
---|
213 | __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
|
---|
214 | return (__y * __x2 - .5f) * __x2 + 1.f;
|
---|
215 | }
|
---|
216 |
|
---|
217 | // }}}
|
---|
218 | // __sinSeries {{{
|
---|
219 | template <typename _Abi>
|
---|
220 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
|
---|
221 | __sinSeries(const simd<float, _Abi>& __x)
|
---|
222 | {
|
---|
223 | const simd<float, _Abi> __x2 = __x * __x;
|
---|
224 | simd<float, _Abi> __y;
|
---|
225 | __y = -0x1.9CC000p-13f; // -1/7!
|
---|
226 | __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
|
---|
227 | __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
|
---|
228 | return __y * (__x2 * __x) + __x;
|
---|
229 | }
|
---|
230 |
|
---|
231 | template <typename _Abi>
|
---|
232 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
|
---|
233 | __sinSeries(const simd<double, _Abi>& __x)
|
---|
234 | {
|
---|
235 | // __x = [0, 0.7854 = pi/4]
|
---|
236 | // __x² = [0, 0.6169 = pi²/8]
|
---|
237 | const simd<double, _Abi> __x2 = __x * __x;
|
---|
238 | simd<double, _Abi> __y;
|
---|
239 | __y = -0x1.ACF0000000000p-41; // -1/15!
|
---|
240 | __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
|
---|
241 | __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
|
---|
242 | __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
|
---|
243 | __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
|
---|
244 | __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
|
---|
245 | __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
|
---|
246 | return __y * (__x2 * __x) + __x;
|
---|
247 | }
|
---|
248 |
|
---|
249 | // }}}
|
---|
250 | // __zero_low_bits {{{
|
---|
251 | template <int _Bits, typename _Tp, typename _Abi>
|
---|
252 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
|
---|
253 | __zero_low_bits(simd<_Tp, _Abi> __x)
|
---|
254 | {
|
---|
255 | const simd<_Tp, _Abi> __bitmask
|
---|
256 | = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
|
---|
257 | return {__private_init,
|
---|
258 | _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
|
---|
259 | }
|
---|
260 |
|
---|
261 | // }}}
|
---|
262 | // __fold_input {{{
|
---|
263 |
|
---|
264 | /**@internal
|
---|
265 | * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
|
---|
266 | * quadrant 0: [-¼π, ¼π]
|
---|
267 | * quadrant 1: [ ¼π, ¾π]
|
---|
268 | * quadrant 2: [ ¾π, 1¼π]
|
---|
269 | * quadrant 3: [1¼π, 1¾π]
|
---|
270 | *
|
---|
271 | * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
|
---|
272 | * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
|
---|
273 | * ```
|
---|
274 | * y = trunc(x / ¼π);
|
---|
275 | * y += fmod(y, 2);
|
---|
276 | * ```
|
---|
277 | * This can be simplified by moving the (implicit) division by 2 into the
|
---|
278 | * truncation expression. The `+= fmod` effect can the be achieved by using
|
---|
279 | * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
|
---|
280 | * `2/π * x` is better (faster).
|
---|
281 | */
|
---|
282 | template <typename _Tp, typename _Abi>
|
---|
283 | struct _Folded
|
---|
284 | {
|
---|
285 | simd<_Tp, _Abi> _M_x;
|
---|
286 | rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
|
---|
287 | };
|
---|
288 |
|
---|
289 | namespace __math_float {
|
---|
290 | inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
|
---|
291 | inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
|
---|
292 | inline constexpr float __pi_2_5bits0
|
---|
293 | = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
|
---|
294 | inline constexpr float __pi_2_5bits0_rem
|
---|
295 | = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
|
---|
296 | } // namespace __math_float
|
---|
297 | namespace __math_double {
|
---|
298 | inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
|
---|
299 | inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
|
---|
300 | inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
|
---|
301 | } // namespace __math_double
|
---|
302 |
|
---|
303 | template <typename _Abi>
|
---|
304 | _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
|
---|
305 | __fold_input(const simd<float, _Abi>& __x)
|
---|
306 | {
|
---|
307 | using _V = simd<float, _Abi>;
|
---|
308 | using _IV = rebind_simd_t<int, _V>;
|
---|
309 | using namespace __math_float;
|
---|
310 | _Folded<float, _Abi> __r;
|
---|
311 | __r._M_x = abs(__x);
|
---|
312 | #if 0
|
---|
313 | // zero most mantissa bits:
|
---|
314 | constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
|
---|
315 | const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
|
---|
316 | // split π into 4 parts, the first three with 13 trailing zeros (to make the
|
---|
317 | // following multiplications precise):
|
---|
318 | constexpr float __pi0 = 0x1.920000p1f;
|
---|
319 | constexpr float __pi1 = 0x1.fb4000p-11f;
|
---|
320 | constexpr float __pi2 = 0x1.444000p-23f;
|
---|
321 | constexpr float __pi3 = 0x1.68c234p-38f;
|
---|
322 | __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
|
---|
323 | #else
|
---|
324 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
|
---|
325 | __r._M_quadrant = 0;
|
---|
326 | else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
|
---|
327 | {
|
---|
328 | const _V __y = nearbyint(__r._M_x * __2_over_pi);
|
---|
329 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
|
---|
330 | __r._M_x -= __y * __pi_2_5bits0;
|
---|
331 | __r._M_x -= __y * __pi_2_5bits0_rem;
|
---|
332 | }
|
---|
333 | else
|
---|
334 | {
|
---|
335 | using __math_double::__2_over_pi;
|
---|
336 | using __math_double::__pi_2;
|
---|
337 | using _VD = rebind_simd_t<double, _V>;
|
---|
338 | _VD __xd = static_simd_cast<_VD>(__r._M_x);
|
---|
339 | _VD __y = nearbyint(__xd * __2_over_pi);
|
---|
340 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
|
---|
341 | __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
|
---|
342 | }
|
---|
343 | #endif
|
---|
344 | return __r;
|
---|
345 | }
|
---|
346 |
|
---|
347 | template <typename _Abi>
|
---|
348 | _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
|
---|
349 | __fold_input(const simd<double, _Abi>& __x)
|
---|
350 | {
|
---|
351 | using _V = simd<double, _Abi>;
|
---|
352 | using _IV = rebind_simd_t<int, _V>;
|
---|
353 | using namespace __math_double;
|
---|
354 |
|
---|
355 | _Folded<double, _Abi> __r;
|
---|
356 | __r._M_x = abs(__x);
|
---|
357 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
|
---|
358 | {
|
---|
359 | __r._M_quadrant = 0;
|
---|
360 | return __r;
|
---|
361 | }
|
---|
362 | const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
|
---|
363 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
|
---|
364 |
|
---|
365 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
|
---|
366 | {
|
---|
367 | // x - y * pi/2, y uses no more than 11 mantissa bits
|
---|
368 | __r._M_x -= __y * 0x1.921FB54443000p0;
|
---|
369 | __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
|
---|
370 | __r._M_x -= __y * 0x1.45C06E0E68948p-86;
|
---|
371 | }
|
---|
372 | else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
|
---|
373 | {
|
---|
374 | // x - y * pi/2, y uses no more than 29 mantissa bits
|
---|
375 | __r._M_x -= __y * 0x1.921FB40000000p0;
|
---|
376 | __r._M_x -= __y * 0x1.4442D00000000p-24;
|
---|
377 | __r._M_x -= __y * 0x1.8469898CC5170p-48;
|
---|
378 | }
|
---|
379 | else
|
---|
380 | {
|
---|
381 | // x - y * pi/2, y may require all mantissa bits
|
---|
382 | const _V __y_hi = __zero_low_bits<26>(__y);
|
---|
383 | const _V __y_lo = __y - __y_hi;
|
---|
384 | const auto __pi_2_1 = 0x1.921FB50000000p0;
|
---|
385 | const auto __pi_2_2 = 0x1.110B460000000p-26;
|
---|
386 | const auto __pi_2_3 = 0x1.1A62630000000p-54;
|
---|
387 | const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
|
---|
388 | __r._M_x = __r._M_x - __y_hi * __pi_2_1
|
---|
389 | - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
|
---|
390 | - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
|
---|
391 | - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
|
---|
392 | - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
|
---|
393 | - max(__y * __pi_2_4, __y_lo * __pi_2_3)
|
---|
394 | - min(__y * __pi_2_4, __y_lo * __pi_2_3);
|
---|
395 | }
|
---|
396 | return __r;
|
---|
397 | }
|
---|
398 |
|
---|
399 | // }}}
|
---|
400 | // __extract_exponent_as_int {{{
|
---|
401 | template <typename _Tp, typename _Abi>
|
---|
402 | rebind_simd_t<int, simd<_Tp, _Abi>>
|
---|
403 | __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
|
---|
404 | {
|
---|
405 | using _Vp = simd<_Tp, _Abi>;
|
---|
406 | using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
|
---|
407 | using namespace std::experimental::__float_bitwise_operators;
|
---|
408 | const _Vp __exponent_mask
|
---|
409 | = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
|
---|
410 | return static_simd_cast<rebind_simd_t<int, _Vp>>(
|
---|
411 | __bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
|
---|
412 | >> (__digits_v<_Tp> - 1));
|
---|
413 | }
|
---|
414 |
|
---|
415 | // }}}
|
---|
416 | // __impl_or_fallback {{{
|
---|
417 | template <typename ImplFun, typename FallbackFun, typename... _Args>
|
---|
418 | _GLIBCXX_SIMD_INTRINSIC auto
|
---|
419 | __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
|
---|
420 | _Args&&... __args)
|
---|
421 | -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
|
---|
422 | { return __impl_fun(static_cast<_Args&&>(__args)...); }
|
---|
423 |
|
---|
424 | template <typename ImplFun, typename FallbackFun, typename... _Args>
|
---|
425 | inline auto
|
---|
426 | __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
|
---|
427 | _Args&&... __args)
|
---|
428 | -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
|
---|
429 | { return __fallback_fun(static_cast<_Args&&>(__args)...); }
|
---|
430 |
|
---|
431 | template <typename... _Args>
|
---|
432 | _GLIBCXX_SIMD_INTRINSIC auto
|
---|
433 | __impl_or_fallback(_Args&&... __args)
|
---|
434 | {
|
---|
435 | return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
|
---|
436 | }
|
---|
437 | //}}}
|
---|
438 |
|
---|
439 | // trigonometric functions {{{
|
---|
440 | _GLIBCXX_SIMD_MATH_CALL_(acos)
|
---|
441 | _GLIBCXX_SIMD_MATH_CALL_(asin)
|
---|
442 | _GLIBCXX_SIMD_MATH_CALL_(atan)
|
---|
443 | _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
|
---|
444 |
|
---|
445 | /*
|
---|
446 | * algorithm for sine and cosine:
|
---|
447 | *
|
---|
448 | * The result can be calculated with sine or cosine depending on the π/4 section
|
---|
449 | * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
|
---|
450 | *
|
---|
451 | * sine:
|
---|
452 | * Map -__x to __x and invert the output
|
---|
453 | * Extend precision of __x - n * π/4 by calculating
|
---|
454 | * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
|
---|
455 | *
|
---|
456 | * Calculate Taylor series with tuned coefficients.
|
---|
457 | * Fix sign.
|
---|
458 | */
|
---|
459 | // cos{{{
|
---|
460 | template <typename _Tp, typename _Abi>
|
---|
461 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
462 | cos(const simd<_Tp, _Abi>& __x)
|
---|
463 | {
|
---|
464 | using _V = simd<_Tp, _Abi>;
|
---|
465 | if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
|
---|
466 | return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
|
---|
467 | else
|
---|
468 | {
|
---|
469 | if constexpr (is_same_v<_Tp, float>)
|
---|
470 | if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
|
---|
471 | return static_simd_cast<_V>(
|
---|
472 | cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
|
---|
473 |
|
---|
474 | const auto __f = __fold_input(__x);
|
---|
475 | // quadrant | effect
|
---|
476 | // 0 | cosSeries, +
|
---|
477 | // 1 | sinSeries, -
|
---|
478 | // 2 | cosSeries, -
|
---|
479 | // 3 | sinSeries, +
|
---|
480 | using namespace std::experimental::__float_bitwise_operators;
|
---|
481 | const _V __sign_flip
|
---|
482 | = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
|
---|
483 |
|
---|
484 | const auto __need_cos = (__f._M_quadrant & 1) == 0;
|
---|
485 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
|
---|
486 | return __sign_flip ^ __cosSeries(__f._M_x);
|
---|
487 | else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
|
---|
488 | return __sign_flip ^ __sinSeries(__f._M_x);
|
---|
489 | else // some_of(__need_cos)
|
---|
490 | {
|
---|
491 | _V __r = __sinSeries(__f._M_x);
|
---|
492 | where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
|
---|
493 | return __r ^ __sign_flip;
|
---|
494 | }
|
---|
495 | }
|
---|
496 | }
|
---|
497 |
|
---|
498 | template <typename _Tp>
|
---|
499 | _GLIBCXX_SIMD_ALWAYS_INLINE
|
---|
500 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
|
---|
501 | cos(simd<_Tp, simd_abi::scalar> __x)
|
---|
502 | { return std::cos(__data(__x)); }
|
---|
503 |
|
---|
504 | //}}}
|
---|
505 | // sin{{{
|
---|
506 | template <typename _Tp, typename _Abi>
|
---|
507 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
508 | sin(const simd<_Tp, _Abi>& __x)
|
---|
509 | {
|
---|
510 | using _V = simd<_Tp, _Abi>;
|
---|
511 | if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
|
---|
512 | return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
|
---|
513 | else
|
---|
514 | {
|
---|
515 | if constexpr (is_same_v<_Tp, float>)
|
---|
516 | if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
|
---|
517 | return static_simd_cast<_V>(
|
---|
518 | sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
|
---|
519 |
|
---|
520 | const auto __f = __fold_input(__x);
|
---|
521 | // quadrant | effect
|
---|
522 | // 0 | sinSeries
|
---|
523 | // 1 | cosSeries
|
---|
524 | // 2 | sinSeries, sign flip
|
---|
525 | // 3 | cosSeries, sign flip
|
---|
526 | using namespace std::experimental::__float_bitwise_operators;
|
---|
527 | const auto __sign_flip
|
---|
528 | = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
|
---|
529 |
|
---|
530 | const auto __need_sin = (__f._M_quadrant & 1) == 0;
|
---|
531 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
|
---|
532 | return __sign_flip ^ __sinSeries(__f._M_x);
|
---|
533 | else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
|
---|
534 | return __sign_flip ^ __cosSeries(__f._M_x);
|
---|
535 | else // some_of(__need_sin)
|
---|
536 | {
|
---|
537 | _V __r = __cosSeries(__f._M_x);
|
---|
538 | where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
|
---|
539 | return __sign_flip ^ __r;
|
---|
540 | }
|
---|
541 | }
|
---|
542 | }
|
---|
543 |
|
---|
544 | template <typename _Tp>
|
---|
545 | _GLIBCXX_SIMD_ALWAYS_INLINE
|
---|
546 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
|
---|
547 | sin(simd<_Tp, simd_abi::scalar> __x)
|
---|
548 | { return std::sin(__data(__x)); }
|
---|
549 |
|
---|
550 | //}}}
|
---|
551 | _GLIBCXX_SIMD_MATH_CALL_(tan)
|
---|
552 | _GLIBCXX_SIMD_MATH_CALL_(acosh)
|
---|
553 | _GLIBCXX_SIMD_MATH_CALL_(asinh)
|
---|
554 | _GLIBCXX_SIMD_MATH_CALL_(atanh)
|
---|
555 | _GLIBCXX_SIMD_MATH_CALL_(cosh)
|
---|
556 | _GLIBCXX_SIMD_MATH_CALL_(sinh)
|
---|
557 | _GLIBCXX_SIMD_MATH_CALL_(tanh)
|
---|
558 | // }}}
|
---|
559 | // exponential functions {{{
|
---|
560 | _GLIBCXX_SIMD_MATH_CALL_(exp)
|
---|
561 | _GLIBCXX_SIMD_MATH_CALL_(exp2)
|
---|
562 | _GLIBCXX_SIMD_MATH_CALL_(expm1)
|
---|
563 |
|
---|
564 | // }}}
|
---|
565 | // frexp {{{
|
---|
566 | #if _GLIBCXX_SIMD_X86INTRIN
|
---|
567 | template <typename _Tp, size_t _Np>
|
---|
568 | _SimdWrapper<_Tp, _Np>
|
---|
569 | __getexp(_SimdWrapper<_Tp, _Np> __x)
|
---|
570 | {
|
---|
571 | if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
|
---|
572 | return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
|
---|
573 | else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
|
---|
574 | return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
|
---|
575 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
|
---|
576 | return _mm_getexp_pd(__x);
|
---|
577 | else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
|
---|
578 | return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
|
---|
579 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
|
---|
580 | return _mm256_getexp_ps(__x);
|
---|
581 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
|
---|
582 | return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
|
---|
583 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
|
---|
584 | return _mm256_getexp_pd(__x);
|
---|
585 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
|
---|
586 | return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
|
---|
587 | else if constexpr (__is_avx512_ps<_Tp, _Np>())
|
---|
588 | return _mm512_getexp_ps(__x);
|
---|
589 | else if constexpr (__is_avx512_pd<_Tp, _Np>())
|
---|
590 | return _mm512_getexp_pd(__x);
|
---|
591 | else
|
---|
592 | __assert_unreachable<_Tp>();
|
---|
593 | }
|
---|
594 |
|
---|
595 | template <typename _Tp, size_t _Np>
|
---|
596 | _SimdWrapper<_Tp, _Np>
|
---|
597 | __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
|
---|
598 | {
|
---|
599 | if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
|
---|
600 | return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
|
---|
601 | _MM_MANT_SIGN_src));
|
---|
602 | else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
|
---|
603 | return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
|
---|
604 | _MM_MANT_NORM_p5_1,
|
---|
605 | _MM_MANT_SIGN_src));
|
---|
606 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
|
---|
607 | return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
608 | else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
|
---|
609 | return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
|
---|
610 | _MM_MANT_SIGN_src));
|
---|
611 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
|
---|
612 | return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
613 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
|
---|
614 | return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
|
---|
615 | _MM_MANT_SIGN_src));
|
---|
616 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
|
---|
617 | return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
618 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
|
---|
619 | return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
|
---|
620 | _MM_MANT_SIGN_src));
|
---|
621 | else if constexpr (__is_avx512_ps<_Tp, _Np>())
|
---|
622 | return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
623 | else if constexpr (__is_avx512_pd<_Tp, _Np>())
|
---|
624 | return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
625 | else
|
---|
626 | __assert_unreachable<_Tp>();
|
---|
627 | }
|
---|
628 | #endif // _GLIBCXX_SIMD_X86INTRIN
|
---|
629 |
|
---|
630 | /**
|
---|
631 | * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
|
---|
632 | *
|
---|
633 | * The return value will be in the range [0.5, 1.0[
|
---|
634 | * The @p __e value will be an integer defining the power-of-two exponent
|
---|
635 | */
|
---|
636 | template <typename _Tp, typename _Abi>
|
---|
637 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
638 | frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
|
---|
639 | {
|
---|
640 | if constexpr (simd_size_v<_Tp, _Abi> == 1)
|
---|
641 | {
|
---|
642 | int __tmp;
|
---|
643 | const auto __r = std::frexp(__x[0], &__tmp);
|
---|
644 | (*__exp)[0] = __tmp;
|
---|
645 | return __r;
|
---|
646 | }
|
---|
647 | else if constexpr (__is_fixed_size_abi_v<_Abi>)
|
---|
648 | {
|
---|
649 | return {__private_init,
|
---|
650 | _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
|
---|
651 | #if _GLIBCXX_SIMD_X86INTRIN
|
---|
652 | }
|
---|
653 | else if constexpr (__have_avx512f)
|
---|
654 | {
|
---|
655 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
|
---|
656 | constexpr size_t _NI = _Np < 4 ? 4 : _Np;
|
---|
657 | const auto __v = __data(__x);
|
---|
658 | const auto __isnonzero
|
---|
659 | = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
|
---|
660 | const _SimdWrapper<int, _NI> __exp_plus1
|
---|
661 | = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
|
---|
662 | const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
|
---|
663 | _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
|
---|
664 | _SimdWrapper<int, _NI>(), __exp_plus1));
|
---|
665 | simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
|
---|
666 | return {__private_init,
|
---|
667 | _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
|
---|
668 | __isnonzero),
|
---|
669 | __v, __getmant_avx512(__v))};
|
---|
670 | #endif // _GLIBCXX_SIMD_X86INTRIN
|
---|
671 | }
|
---|
672 | else
|
---|
673 | {
|
---|
674 | // fallback implementation
|
---|
675 | static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
|
---|
676 | using _V = simd<_Tp, _Abi>;
|
---|
677 | using _IV = rebind_simd_t<int, _V>;
|
---|
678 | using namespace std::experimental::__proposed;
|
---|
679 | using namespace std::experimental::__float_bitwise_operators;
|
---|
680 |
|
---|
681 | constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
|
---|
682 | constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
|
---|
683 | constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
|
---|
684 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
|
---|
685 | = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
|
---|
686 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
|
---|
687 | = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
|
---|
688 |
|
---|
689 | _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
|
---|
690 | const _IV __exponent_bits = __extract_exponent_as_int(__x);
|
---|
691 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
|
---|
692 | {
|
---|
693 | *__exp
|
---|
694 | = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
|
---|
695 | return __mant;
|
---|
696 | }
|
---|
697 |
|
---|
698 | #if __FINITE_MATH_ONLY__
|
---|
699 | // at least one element of __x is 0 or subnormal, the rest is normal
|
---|
700 | // (inf and NaN are excluded by -ffinite-math-only)
|
---|
701 | const auto __iszero_inf_nan = __x == 0;
|
---|
702 | #else
|
---|
703 | const auto __as_int
|
---|
704 | = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
|
---|
705 | const auto __inf
|
---|
706 | = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
|
---|
707 | _V(__infinity_v<_Tp>));
|
---|
708 | const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
|
---|
709 | __as_int == 0 || __as_int >= __inf);
|
---|
710 | #endif
|
---|
711 |
|
---|
712 | const _V __scaled_subnormal = __x * __subnorm_scale;
|
---|
713 | const _V __mant_subnormal
|
---|
714 | = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
|
---|
715 | where(!isnormal(__x), __mant) = __mant_subnormal;
|
---|
716 | where(__iszero_inf_nan, __mant) = __x;
|
---|
717 | _IV __e = __extract_exponent_as_int(__scaled_subnormal);
|
---|
718 | using _MaskType =
|
---|
719 | typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
|
---|
720 | _V, _IV>::mask_type;
|
---|
721 | const _MaskType __value_isnormal = isnormal(__x).__cvt();
|
---|
722 | where(__value_isnormal.__cvt(), __e) = __exponent_bits;
|
---|
723 | static_assert(sizeof(_IV) == sizeof(__value_isnormal));
|
---|
724 | const _IV __offset
|
---|
725 | = (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
|
---|
726 | | (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
|
---|
727 | & static_simd_cast<_MaskType>(__x != 0))
|
---|
728 | & _IV(__exp_adjust + __exp_offset));
|
---|
729 | *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
|
---|
730 | return __mant;
|
---|
731 | }
|
---|
732 | }
|
---|
733 |
|
---|
734 | // }}}
|
---|
735 | _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
|
---|
736 | _GLIBCXX_SIMD_MATH_CALL_(ilogb)
|
---|
737 |
|
---|
738 | // logarithms {{{
|
---|
739 | _GLIBCXX_SIMD_MATH_CALL_(log)
|
---|
740 | _GLIBCXX_SIMD_MATH_CALL_(log10)
|
---|
741 | _GLIBCXX_SIMD_MATH_CALL_(log1p)
|
---|
742 | _GLIBCXX_SIMD_MATH_CALL_(log2)
|
---|
743 |
|
---|
744 | //}}}
|
---|
745 | // logb{{{
|
---|
746 | template <typename _Tp, typename _Abi>
|
---|
747 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
|
---|
748 | logb(const simd<_Tp, _Abi>& __x)
|
---|
749 | {
|
---|
750 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
|
---|
751 | if constexpr (_Np == 1)
|
---|
752 | return std::logb(__x[0]);
|
---|
753 | else if constexpr (__is_fixed_size_abi_v<_Abi>)
|
---|
754 | {
|
---|
755 | return {__private_init,
|
---|
756 | __data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
|
---|
757 | using _V = typename decltype(__impl)::simd_type;
|
---|
758 | return __data(
|
---|
759 | std::experimental::logb(_V(__private_init, __xx)));
|
---|
760 | })};
|
---|
761 | }
|
---|
762 | #if _GLIBCXX_SIMD_X86INTRIN // {{{
|
---|
763 | else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
|
---|
764 | return {__private_init,
|
---|
765 | __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
|
---|
766 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
|
---|
767 | return {__private_init, _mm_getexp_pd(__data(__x))};
|
---|
768 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
|
---|
769 | return {__private_init, _mm256_getexp_ps(__data(__x))};
|
---|
770 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
|
---|
771 | return {__private_init, _mm256_getexp_pd(__data(__x))};
|
---|
772 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
|
---|
773 | return {__private_init,
|
---|
774 | __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
|
---|
775 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
|
---|
776 | return {__private_init,
|
---|
777 | __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
|
---|
778 | else if constexpr (__is_avx512_ps<_Tp, _Np>())
|
---|
779 | return {__private_init, _mm512_getexp_ps(__data(__x))};
|
---|
780 | else if constexpr (__is_avx512_pd<_Tp, _Np>())
|
---|
781 | return {__private_init, _mm512_getexp_pd(__data(__x))};
|
---|
782 | #endif // _GLIBCXX_SIMD_X86INTRIN }}}
|
---|
783 | else
|
---|
784 | {
|
---|
785 | using _V = simd<_Tp, _Abi>;
|
---|
786 | using namespace std::experimental::__proposed;
|
---|
787 | auto __is_normal = isnormal(__x);
|
---|
788 |
|
---|
789 | // work on abs(__x) to reflect the return value on Linux for negative
|
---|
790 | // inputs (domain-error => implementation-defined value is returned)
|
---|
791 | const _V abs_x = abs(__x);
|
---|
792 |
|
---|
793 | // __exponent(__x) returns the exponent value (bias removed) as
|
---|
794 | // simd<_Up> with integral _Up
|
---|
795 | auto&& __exponent = [](const _V& __v) {
|
---|
796 | using namespace std::experimental::__proposed;
|
---|
797 | using _IV = rebind_simd_t<
|
---|
798 | conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
|
---|
799 | return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
|
---|
800 | - (__max_exponent_v<_Tp> - 1);
|
---|
801 | };
|
---|
802 | _V __r = static_simd_cast<_V>(__exponent(abs_x));
|
---|
803 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
|
---|
804 | // without corner cases (nan, inf, subnormal, zero) we have our
|
---|
805 | // answer:
|
---|
806 | return __r;
|
---|
807 | const auto __is_zero = __x == 0;
|
---|
808 | const auto __is_nan = isnan(__x);
|
---|
809 | const auto __is_inf = isinf(__x);
|
---|
810 | where(__is_zero, __r) = -__infinity_v<_Tp>;
|
---|
811 | where(__is_nan, __r) = __x;
|
---|
812 | where(__is_inf, __r) = __infinity_v<_Tp>;
|
---|
813 | __is_normal |= __is_zero || __is_nan || __is_inf;
|
---|
814 | if (all_of(__is_normal))
|
---|
815 | // at this point everything but subnormals is handled
|
---|
816 | return __r;
|
---|
817 | // subnormals repeat the exponent extraction after multiplication of the
|
---|
818 | // input with __a floating point value that has 112 (0x70) in its exponent
|
---|
819 | // (not too big for sp and large enough for dp)
|
---|
820 | const _V __scaled = abs_x * _Tp(0x1p112);
|
---|
821 | _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
|
---|
822 | where(__is_normal, __scaled_exp) = __r;
|
---|
823 | return __scaled_exp;
|
---|
824 | }
|
---|
825 | }
|
---|
826 |
|
---|
827 | //}}}
|
---|
828 | template <typename _Tp, typename _Abi>
|
---|
829 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
830 | modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
|
---|
831 | {
|
---|
832 | if constexpr (__is_scalar_abi<_Abi>()
|
---|
833 | || (__is_fixed_size_abi_v<
|
---|
834 | _Abi> && simd_size_v<_Tp, _Abi> == 1))
|
---|
835 | {
|
---|
836 | _Tp __tmp;
|
---|
837 | _Tp __r = std::modf(__x[0], &__tmp);
|
---|
838 | __iptr[0] = __tmp;
|
---|
839 | return __r;
|
---|
840 | }
|
---|
841 | else
|
---|
842 | {
|
---|
843 | const auto __integral = trunc(__x);
|
---|
844 | *__iptr = __integral;
|
---|
845 | auto __r = __x - __integral;
|
---|
846 | #if !__FINITE_MATH_ONLY__
|
---|
847 | where(isinf(__x), __r) = _Tp();
|
---|
848 | #endif
|
---|
849 | return copysign(__r, __x);
|
---|
850 | }
|
---|
851 | }
|
---|
852 |
|
---|
853 | _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
|
---|
854 | _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
|
---|
855 |
|
---|
856 | _GLIBCXX_SIMD_MATH_CALL_(cbrt)
|
---|
857 |
|
---|
858 | _GLIBCXX_SIMD_MATH_CALL_(abs)
|
---|
859 | _GLIBCXX_SIMD_MATH_CALL_(fabs)
|
---|
860 |
|
---|
861 | // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
|
---|
862 | // allow signed integral _Tp
|
---|
863 | template <typename _Tp, typename _Abi>
|
---|
864 | enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
|
---|
865 | abs(const simd<_Tp, _Abi>& __x)
|
---|
866 | { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
|
---|
867 |
|
---|
868 | template <typename _Tp, typename _Abi>
|
---|
869 | enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
|
---|
870 | fabs(const simd<_Tp, _Abi>& __x)
|
---|
871 | { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
|
---|
872 |
|
---|
873 | // the following are overloads for functions in <cstdlib> and not covered by
|
---|
874 | // [parallel.simd.math]. I don't see much value in making them work, though
|
---|
875 | /*
|
---|
876 | template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
|
---|
877 | { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
|
---|
878 |
|
---|
879 | template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
|
---|
880 | &__x)
|
---|
881 | { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
|
---|
882 | */
|
---|
883 |
|
---|
884 | #define _GLIBCXX_SIMD_CVTING2(_NAME) \
|
---|
885 | template <typename _Tp, typename _Abi> \
|
---|
886 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
887 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
|
---|
888 | { \
|
---|
889 | return _NAME(__x, __y); \
|
---|
890 | } \
|
---|
891 | \
|
---|
892 | template <typename _Tp, typename _Abi> \
|
---|
893 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
894 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
|
---|
895 | { \
|
---|
896 | return _NAME(__x, __y); \
|
---|
897 | }
|
---|
898 |
|
---|
899 | #define _GLIBCXX_SIMD_CVTING3(_NAME) \
|
---|
900 | template <typename _Tp, typename _Abi> \
|
---|
901 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
902 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
|
---|
903 | const simd<_Tp, _Abi>& __z) \
|
---|
904 | { \
|
---|
905 | return _NAME(__x, __y, __z); \
|
---|
906 | } \
|
---|
907 | \
|
---|
908 | template <typename _Tp, typename _Abi> \
|
---|
909 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
910 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
|
---|
911 | const simd<_Tp, _Abi>& __z) \
|
---|
912 | { \
|
---|
913 | return _NAME(__x, __y, __z); \
|
---|
914 | } \
|
---|
915 | \
|
---|
916 | template <typename _Tp, typename _Abi> \
|
---|
917 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
918 | const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
|
---|
919 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \
|
---|
920 | { \
|
---|
921 | return _NAME(__x, __y, __z); \
|
---|
922 | } \
|
---|
923 | \
|
---|
924 | template <typename _Tp, typename _Abi> \
|
---|
925 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
926 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
|
---|
927 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \
|
---|
928 | { \
|
---|
929 | return _NAME(__x, __y, __z); \
|
---|
930 | } \
|
---|
931 | \
|
---|
932 | template <typename _Tp, typename _Abi> \
|
---|
933 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
934 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
|
---|
935 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \
|
---|
936 | { \
|
---|
937 | return _NAME(__x, __y, __z); \
|
---|
938 | } \
|
---|
939 | \
|
---|
940 | template <typename _Tp, typename _Abi> \
|
---|
941 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
942 | const __type_identity_t<simd<_Tp, _Abi>>& __x, \
|
---|
943 | const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
|
---|
944 | { \
|
---|
945 | return _NAME(__x, __y, __z); \
|
---|
946 | }
|
---|
947 |
|
---|
948 | template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
|
---|
949 | _GLIBCXX_SIMD_INTRINSIC _R
|
---|
950 | __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
|
---|
951 | const _Tps&... __args)
|
---|
952 | {
|
---|
953 | return {__private_init,
|
---|
954 | __data(__arg0)._M_apply_per_chunk(
|
---|
955 | [&](auto __impl, const auto&... __inner) {
|
---|
956 | using _V = typename decltype(__impl)::simd_type;
|
---|
957 | return __data(__apply(_V(__private_init, __inner)...));
|
---|
958 | },
|
---|
959 | __data(__args)...)};
|
---|
960 | }
|
---|
961 |
|
---|
962 | template <typename _VV>
|
---|
963 | __remove_cvref_t<_VV>
|
---|
964 | __hypot(_VV __x, _VV __y)
|
---|
965 | {
|
---|
966 | using _V = __remove_cvref_t<_VV>;
|
---|
967 | using _Tp = typename _V::value_type;
|
---|
968 | if constexpr (_V::size() == 1)
|
---|
969 | return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
|
---|
970 | else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
|
---|
971 | {
|
---|
972 | return __fixed_size_apply<_V>([](auto __a,
|
---|
973 | auto __b) { return hypot(__a, __b); },
|
---|
974 | __x, __y);
|
---|
975 | }
|
---|
976 | else
|
---|
977 | {
|
---|
978 | // A simple solution for _Tp == float would be to cast to double and
|
---|
979 | // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
|
---|
980 | // dp. It still needs the Annex F fixups though and isn't faster on
|
---|
981 | // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
|
---|
982 | // AVX-512).
|
---|
983 | using namespace __float_bitwise_operators;
|
---|
984 | _V __absx = abs(__x); // no error
|
---|
985 | _V __absy = abs(__y); // no error
|
---|
986 | _V __hi = max(__absx, __absy); // no error
|
---|
987 | _V __lo = min(__absy, __absx); // no error
|
---|
988 |
|
---|
989 | // round __hi down to the next power-of-2:
|
---|
990 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
|
---|
991 |
|
---|
992 | #ifndef __FAST_MATH__
|
---|
993 | if constexpr (__have_neon && !__have_neon_a32)
|
---|
994 | { // With ARMv7 NEON, we have no subnormals and must use slightly
|
---|
995 | // different strategy
|
---|
996 | const _V __hi_exp = __hi & __inf;
|
---|
997 | _V __scale_back = __hi_exp;
|
---|
998 | // For large exponents (max & max/2) the inversion comes too close
|
---|
999 | // to subnormals. Subtract 3 from the exponent:
|
---|
1000 | where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
|
---|
1001 | // Invert and adjust for the off-by-one error of inversion via xor:
|
---|
1002 | const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
|
---|
1003 | const _V __h1 = __hi * __scale;
|
---|
1004 | const _V __l1 = __lo * __scale;
|
---|
1005 | _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
|
---|
1006 | // Fix up hypot(0, 0) to not be NaN:
|
---|
1007 | where(__hi == 0, __r) = 0;
|
---|
1008 | return __r;
|
---|
1009 | }
|
---|
1010 | #endif
|
---|
1011 |
|
---|
1012 | #ifdef __FAST_MATH__
|
---|
1013 | // With fast-math, ignore precision of subnormals and inputs from
|
---|
1014 | // __finite_max_v/2 to __finite_max_v. This removes all
|
---|
1015 | // branching/masking.
|
---|
1016 | if constexpr (true)
|
---|
1017 | #else
|
---|
1018 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
|
---|
1019 | && all_of(isnormal(__y))))
|
---|
1020 | #endif
|
---|
1021 | {
|
---|
1022 | const _V __hi_exp = __hi & __inf;
|
---|
1023 | //((__hi + __hi) & __inf) ^ __inf almost works for computing
|
---|
1024 | //__scale,
|
---|
1025 | // except when (__hi + __hi) & __inf == __inf, in which case __scale
|
---|
1026 | // becomes 0 (should be min/2 instead) and thus loses the
|
---|
1027 | // information from __lo.
|
---|
1028 | #ifdef __FAST_MATH__
|
---|
1029 | using _Ip = __int_for_sizeof_t<_Tp>;
|
---|
1030 | using _IV = rebind_simd_t<_Ip, _V>;
|
---|
1031 | const auto __as_int = __bit_cast<_IV>(__hi_exp);
|
---|
1032 | const _V __scale
|
---|
1033 | = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
|
---|
1034 | #else
|
---|
1035 | const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
|
---|
1036 | #endif
|
---|
1037 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
|
---|
1038 | = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
|
---|
1039 | const _V __h1 = (__hi & __mant_mask) | _V(1);
|
---|
1040 | const _V __l1 = __lo * __scale;
|
---|
1041 | return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
|
---|
1042 | }
|
---|
1043 | else
|
---|
1044 | {
|
---|
1045 | // slower path to support subnormals
|
---|
1046 | // if __hi is subnormal, avoid scaling by inf & final mul by 0
|
---|
1047 | // (which yields NaN) by using min()
|
---|
1048 | _V __scale = _V(1 / __norm_min_v<_Tp>);
|
---|
1049 | // invert exponent w/o error and w/o using the slow divider unit:
|
---|
1050 | // xor inverts the exponent but off by 1. Multiplication with .5
|
---|
1051 | // adjusts for the discrepancy.
|
---|
1052 | where(__hi >= __norm_min_v<_Tp>, __scale)
|
---|
1053 | = ((__hi & __inf) ^ __inf) * _Tp(.5);
|
---|
1054 | // adjust final exponent for subnormal inputs
|
---|
1055 | _V __hi_exp = __norm_min_v<_Tp>;
|
---|
1056 | where(__hi >= __norm_min_v<_Tp>, __hi_exp)
|
---|
1057 | = __hi & __inf; // no error
|
---|
1058 | _V __h1 = __hi * __scale; // no error
|
---|
1059 | _V __l1 = __lo * __scale; // no error
|
---|
1060 |
|
---|
1061 | // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
|
---|
1062 | // this ensures no overflow in the argument to sqrt
|
---|
1063 | _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
|
---|
1064 | #ifdef __STDC_IEC_559__
|
---|
1065 | // fixup for Annex F requirements
|
---|
1066 | // the naive fixup goes like this:
|
---|
1067 | //
|
---|
1068 | // where(__l1 == 0, __r) = __hi;
|
---|
1069 | // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
|
---|
1070 | // where(isinf(__absx) || isinf(__absy), __r) = __inf;
|
---|
1071 | //
|
---|
1072 | // The fixup can be prepared in parallel with the sqrt, requiring a
|
---|
1073 | // single blend step after hi_exp * sqrt, reducing latency and
|
---|
1074 | // throughput:
|
---|
1075 | _V __fixup = __hi; // __lo == 0
|
---|
1076 | where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
|
---|
1077 | where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
|
---|
1078 | where(!(__lo == 0 || isunordered(__x, __y)
|
---|
1079 | || (isinf(__absx) || isinf(__absy))),
|
---|
1080 | __fixup)
|
---|
1081 | = __r;
|
---|
1082 | __r = __fixup;
|
---|
1083 | #endif
|
---|
1084 | return __r;
|
---|
1085 | }
|
---|
1086 | }
|
---|
1087 | }
|
---|
1088 |
|
---|
1089 | template <typename _Tp, typename _Abi>
|
---|
1090 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
|
---|
1091 | hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
|
---|
1092 | {
|
---|
1093 | return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
|
---|
1094 | const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
|
---|
1095 | __y);
|
---|
1096 | }
|
---|
1097 |
|
---|
1098 | _GLIBCXX_SIMD_CVTING2(hypot)
|
---|
1099 |
|
---|
1100 | template <typename _VV>
|
---|
1101 | __remove_cvref_t<_VV>
|
---|
1102 | __hypot(_VV __x, _VV __y, _VV __z)
|
---|
1103 | {
|
---|
1104 | using _V = __remove_cvref_t<_VV>;
|
---|
1105 | using _Abi = typename _V::abi_type;
|
---|
1106 | using _Tp = typename _V::value_type;
|
---|
1107 | /* FIXME: enable after PR77776 is resolved
|
---|
1108 | if constexpr (_V::size() == 1)
|
---|
1109 | return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
|
---|
1110 | else
|
---|
1111 | */
|
---|
1112 | if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
|
---|
1113 | {
|
---|
1114 | return __fixed_size_apply<simd<_Tp, _Abi>>(
|
---|
1115 | [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
|
---|
1116 | __x, __y, __z);
|
---|
1117 | }
|
---|
1118 | else
|
---|
1119 | {
|
---|
1120 | using namespace __float_bitwise_operators;
|
---|
1121 | const _V __absx = abs(__x); // no error
|
---|
1122 | const _V __absy = abs(__y); // no error
|
---|
1123 | const _V __absz = abs(__z); // no error
|
---|
1124 | _V __hi = max(max(__absx, __absy), __absz); // no error
|
---|
1125 | _V __l0 = min(__absz, max(__absx, __absy)); // no error
|
---|
1126 | _V __l1 = min(__absy, __absx); // no error
|
---|
1127 | if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
|
---|
1128 | && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
|
---|
1129 | { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
|
---|
1130 | // NaN. In this case the bit-tricks don't work, they require IEC559
|
---|
1131 | // binary32 or binary64 format.
|
---|
1132 | #ifdef __STDC_IEC_559__
|
---|
1133 | // fixup for Annex F requirements
|
---|
1134 | if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
|
---|
1135 | return __infinity_v<_Tp>;
|
---|
1136 | else if (isunordered(__absx[0], __absy[0] + __absz[0]))
|
---|
1137 | return __quiet_NaN_v<_Tp>;
|
---|
1138 | else if (__l0[0] == 0 && __l1[0] == 0)
|
---|
1139 | return __hi;
|
---|
1140 | #endif
|
---|
1141 | _V __hi_exp = __hi;
|
---|
1142 | const _ULLong __tmp = 0x8000'0000'0000'0000ull;
|
---|
1143 | __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
|
---|
1144 | const _V __scale = 1 / __hi_exp;
|
---|
1145 | __hi *= __scale;
|
---|
1146 | __l0 *= __scale;
|
---|
1147 | __l1 *= __scale;
|
---|
1148 | return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
|
---|
1149 | }
|
---|
1150 | else
|
---|
1151 | {
|
---|
1152 | // round __hi down to the next power-of-2:
|
---|
1153 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
|
---|
1154 |
|
---|
1155 | #ifndef __FAST_MATH__
|
---|
1156 | if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
|
---|
1157 | { // With ARMv7 NEON, we have no subnormals and must use slightly
|
---|
1158 | // different strategy
|
---|
1159 | const _V __hi_exp = __hi & __inf;
|
---|
1160 | _V __scale_back = __hi_exp;
|
---|
1161 | // For large exponents (max & max/2) the inversion comes too
|
---|
1162 | // close to subnormals. Subtract 3 from the exponent:
|
---|
1163 | where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
|
---|
1164 | // Invert and adjust for the off-by-one error of inversion via
|
---|
1165 | // xor:
|
---|
1166 | const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
|
---|
1167 | const _V __h1 = __hi * __scale;
|
---|
1168 | __l0 *= __scale;
|
---|
1169 | __l1 *= __scale;
|
---|
1170 | _V __lo = __l0 * __l0
|
---|
1171 | + __l1 * __l1; // add the two smaller values first
|
---|
1172 | asm("" : "+m"(__lo));
|
---|
1173 | _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
|
---|
1174 | // Fix up hypot(0, 0, 0) to not be NaN:
|
---|
1175 | where(__hi == 0, __r) = 0;
|
---|
1176 | return __r;
|
---|
1177 | }
|
---|
1178 | #endif
|
---|
1179 |
|
---|
1180 | #ifdef __FAST_MATH__
|
---|
1181 | // With fast-math, ignore precision of subnormals and inputs from
|
---|
1182 | // __finite_max_v/2 to __finite_max_v. This removes all
|
---|
1183 | // branching/masking.
|
---|
1184 | if constexpr (true)
|
---|
1185 | #else
|
---|
1186 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
|
---|
1187 | && all_of(isnormal(__y))
|
---|
1188 | && all_of(isnormal(__z))))
|
---|
1189 | #endif
|
---|
1190 | {
|
---|
1191 | const _V __hi_exp = __hi & __inf;
|
---|
1192 | //((__hi + __hi) & __inf) ^ __inf almost works for computing
|
---|
1193 | //__scale, except when (__hi + __hi) & __inf == __inf, in which
|
---|
1194 | // case __scale
|
---|
1195 | // becomes 0 (should be min/2 instead) and thus loses the
|
---|
1196 | // information from __lo.
|
---|
1197 | #ifdef __FAST_MATH__
|
---|
1198 | using _Ip = __int_for_sizeof_t<_Tp>;
|
---|
1199 | using _IV = rebind_simd_t<_Ip, _V>;
|
---|
1200 | const auto __as_int = __bit_cast<_IV>(__hi_exp);
|
---|
1201 | const _V __scale
|
---|
1202 | = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
|
---|
1203 | #else
|
---|
1204 | const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
|
---|
1205 | #endif
|
---|
1206 | constexpr _Tp __mant_mask
|
---|
1207 | = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
|
---|
1208 | const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
|
---|
1209 | __l0 *= __scale;
|
---|
1210 | __l1 *= __scale;
|
---|
1211 | const _V __lo
|
---|
1212 | = __l0 * __l0
|
---|
1213 | + __l1 * __l1; // add the two smaller values first
|
---|
1214 | return __hi_exp * sqrt(__lo + __h1 * __h1);
|
---|
1215 | }
|
---|
1216 | else
|
---|
1217 | {
|
---|
1218 | // slower path to support subnormals
|
---|
1219 | // if __hi is subnormal, avoid scaling by inf & final mul by 0
|
---|
1220 | // (which yields NaN) by using min()
|
---|
1221 | _V __scale = _V(1 / __norm_min_v<_Tp>);
|
---|
1222 | // invert exponent w/o error and w/o using the slow divider
|
---|
1223 | // unit: xor inverts the exponent but off by 1. Multiplication
|
---|
1224 | // with .5 adjusts for the discrepancy.
|
---|
1225 | where(__hi >= __norm_min_v<_Tp>, __scale)
|
---|
1226 | = ((__hi & __inf) ^ __inf) * _Tp(.5);
|
---|
1227 | // adjust final exponent for subnormal inputs
|
---|
1228 | _V __hi_exp = __norm_min_v<_Tp>;
|
---|
1229 | where(__hi >= __norm_min_v<_Tp>, __hi_exp)
|
---|
1230 | = __hi & __inf; // no error
|
---|
1231 | _V __h1 = __hi * __scale; // no error
|
---|
1232 | __l0 *= __scale; // no error
|
---|
1233 | __l1 *= __scale; // no error
|
---|
1234 | _V __lo = __l0 * __l0
|
---|
1235 | + __l1 * __l1; // add the two smaller values first
|
---|
1236 | _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
|
---|
1237 | #ifdef __STDC_IEC_559__
|
---|
1238 | // fixup for Annex F requirements
|
---|
1239 | _V __fixup = __hi; // __lo == 0
|
---|
1240 | // where(__lo == 0, __fixup) = __hi;
|
---|
1241 | where(isunordered(__x, __y + __z), __fixup)
|
---|
1242 | = __quiet_NaN_v<_Tp>;
|
---|
1243 | where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
|
---|
1244 | = __inf;
|
---|
1245 | // Instead of __lo == 0, the following could depend on __h1² ==
|
---|
1246 | // __h1² + __lo (i.e. __hi is so much larger than the other two
|
---|
1247 | // inputs that the result is exactly __hi). While this may
|
---|
1248 | // improve precision, it is likely to reduce efficiency if the
|
---|
1249 | // ISA has FMAs (because __h1² + __lo is an FMA, but the
|
---|
1250 | // intermediate
|
---|
1251 | // __h1² must be kept)
|
---|
1252 | where(!(__lo == 0 || isunordered(__x, __y + __z)
|
---|
1253 | || isinf(__absx) || isinf(__absy) || isinf(__absz)),
|
---|
1254 | __fixup)
|
---|
1255 | = __r;
|
---|
1256 | __r = __fixup;
|
---|
1257 | #endif
|
---|
1258 | return __r;
|
---|
1259 | }
|
---|
1260 | }
|
---|
1261 | }
|
---|
1262 | }
|
---|
1263 |
|
---|
1264 | template <typename _Tp, typename _Abi>
|
---|
1265 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
|
---|
1266 | hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
|
---|
1267 | const simd<_Tp, _Abi>& __z)
|
---|
1268 | {
|
---|
1269 | return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
|
---|
1270 | const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
|
---|
1271 | __y,
|
---|
1272 | __z);
|
---|
1273 | }
|
---|
1274 |
|
---|
1275 | _GLIBCXX_SIMD_CVTING3(hypot)
|
---|
1276 |
|
---|
1277 | _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
|
---|
1278 |
|
---|
1279 | _GLIBCXX_SIMD_MATH_CALL_(sqrt)
|
---|
1280 | _GLIBCXX_SIMD_MATH_CALL_(erf)
|
---|
1281 | _GLIBCXX_SIMD_MATH_CALL_(erfc)
|
---|
1282 | _GLIBCXX_SIMD_MATH_CALL_(lgamma)
|
---|
1283 | _GLIBCXX_SIMD_MATH_CALL_(tgamma)
|
---|
1284 | _GLIBCXX_SIMD_MATH_CALL_(ceil)
|
---|
1285 | _GLIBCXX_SIMD_MATH_CALL_(floor)
|
---|
1286 | _GLIBCXX_SIMD_MATH_CALL_(nearbyint)
|
---|
1287 | _GLIBCXX_SIMD_MATH_CALL_(rint)
|
---|
1288 | _GLIBCXX_SIMD_MATH_CALL_(lrint)
|
---|
1289 | _GLIBCXX_SIMD_MATH_CALL_(llrint)
|
---|
1290 |
|
---|
1291 | _GLIBCXX_SIMD_MATH_CALL_(round)
|
---|
1292 | _GLIBCXX_SIMD_MATH_CALL_(lround)
|
---|
1293 | _GLIBCXX_SIMD_MATH_CALL_(llround)
|
---|
1294 |
|
---|
1295 | _GLIBCXX_SIMD_MATH_CALL_(trunc)
|
---|
1296 |
|
---|
1297 | _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
|
---|
1298 | _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
|
---|
1299 | _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
|
---|
1300 |
|
---|
1301 | template <typename _Tp, typename _Abi>
|
---|
1302 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1303 | copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
|
---|
1304 | {
|
---|
1305 | if constexpr (simd_size_v<_Tp, _Abi> == 1)
|
---|
1306 | return std::copysign(__x[0], __y[0]);
|
---|
1307 | else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
|
---|
1308 | // Remove this case once __bit_cast is implemented via __builtin_bit_cast.
|
---|
1309 | // It is necessary, because __signmask below cannot be computed at compile
|
---|
1310 | // time.
|
---|
1311 | return simd<_Tp, _Abi>(
|
---|
1312 | [&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
|
---|
1313 | else
|
---|
1314 | {
|
---|
1315 | using _V = simd<_Tp, _Abi>;
|
---|
1316 | using namespace std::experimental::__float_bitwise_operators;
|
---|
1317 | _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
|
---|
1318 | return (__x & (__x ^ __signmask)) | (__y & __signmask);
|
---|
1319 | }
|
---|
1320 | }
|
---|
1321 |
|
---|
1322 | _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
|
---|
1323 | // not covered in [parallel.simd.math]:
|
---|
1324 | // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
|
---|
1325 | _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
|
---|
1326 | _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
|
---|
1327 | _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
|
---|
1328 |
|
---|
1329 | _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
|
---|
1330 | _GLIBCXX_SIMD_MATH_CALL_(fpclassify)
|
---|
1331 | _GLIBCXX_SIMD_MATH_CALL_(isfinite)
|
---|
1332 |
|
---|
1333 | // isnan and isinf require special treatment because old glibc may declare
|
---|
1334 | // `int isinf(double)`.
|
---|
1335 | template <typename _Tp, typename _Abi, typename...,
|
---|
1336 | typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
|
---|
1337 | enable_if_t<is_floating_point_v<_Tp>, _R>
|
---|
1338 | isinf(simd<_Tp, _Abi> __x)
|
---|
1339 | { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
|
---|
1340 |
|
---|
1341 | template <typename _Tp, typename _Abi, typename...,
|
---|
1342 | typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
|
---|
1343 | enable_if_t<is_floating_point_v<_Tp>, _R>
|
---|
1344 | isnan(simd<_Tp, _Abi> __x)
|
---|
1345 | { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
|
---|
1346 |
|
---|
1347 | _GLIBCXX_SIMD_MATH_CALL_(isnormal)
|
---|
1348 |
|
---|
1349 | template <typename..., typename _Tp, typename _Abi>
|
---|
1350 | simd_mask<_Tp, _Abi>
|
---|
1351 | signbit(simd<_Tp, _Abi> __x)
|
---|
1352 | {
|
---|
1353 | if constexpr (is_integral_v<_Tp>)
|
---|
1354 | {
|
---|
1355 | if constexpr (is_unsigned_v<_Tp>)
|
---|
1356 | return simd_mask<_Tp, _Abi>{}; // false
|
---|
1357 | else
|
---|
1358 | return __x < 0;
|
---|
1359 | }
|
---|
1360 | else
|
---|
1361 | return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
|
---|
1362 | }
|
---|
1363 |
|
---|
1364 | _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
|
---|
1365 | _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
|
---|
1366 | _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
|
---|
1367 | _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
|
---|
1368 | _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
|
---|
1369 | _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
|
---|
1370 |
|
---|
1371 | /* not covered in [parallel.simd.math]
|
---|
1372 | template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
|
---|
1373 | template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
|
---|
1374 | template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
|
---|
1375 |
|
---|
1376 | template <typename _V> struct simd_div_t {
|
---|
1377 | _V quot, rem;
|
---|
1378 | };
|
---|
1379 |
|
---|
1380 | template <typename _Abi>
|
---|
1381 | simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
|
---|
1382 | _SCharv<_Abi> denom);
|
---|
1383 | template <typename _Abi>
|
---|
1384 | simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
|
---|
1385 | __shortv<_Abi> denom);
|
---|
1386 | template <typename _Abi>
|
---|
1387 | simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
|
---|
1388 | template <typename _Abi>
|
---|
1389 | simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
|
---|
1390 | __longv<_Abi> denom);
|
---|
1391 | template <typename _Abi>
|
---|
1392 | simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
|
---|
1393 | __llongv<_Abi> denom);
|
---|
1394 | */
|
---|
1395 |
|
---|
1396 | // special math {{{
|
---|
1397 | template <typename _Tp, typename _Abi>
|
---|
1398 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1399 | assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
1400 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
|
---|
1401 | const simd<_Tp, _Abi>& __x)
|
---|
1402 | {
|
---|
1403 | return simd<_Tp, _Abi>([&](auto __i) {
|
---|
1404 | return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
|
---|
1405 | });
|
---|
1406 | }
|
---|
1407 |
|
---|
1408 | template <typename _Tp, typename _Abi>
|
---|
1409 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1410 | assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
1411 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
|
---|
1412 | const simd<_Tp, _Abi>& __x)
|
---|
1413 | {
|
---|
1414 | return simd<_Tp, _Abi>([&](auto __i) {
|
---|
1415 | return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
|
---|
1416 | });
|
---|
1417 | }
|
---|
1418 |
|
---|
1419 | _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
|
---|
1420 | _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
|
---|
1421 | _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
|
---|
1422 | _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
|
---|
1423 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
|
---|
1424 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
|
---|
1425 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
|
---|
1426 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
|
---|
1427 | _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
|
---|
1428 | _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
|
---|
1429 | _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
|
---|
1430 | _GLIBCXX_SIMD_MATH_CALL_(expint)
|
---|
1431 |
|
---|
1432 | template <typename _Tp, typename _Abi>
|
---|
1433 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1434 | hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
1435 | const simd<_Tp, _Abi>& __x)
|
---|
1436 | {
|
---|
1437 | return simd<_Tp, _Abi>(
|
---|
1438 | [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
|
---|
1439 | }
|
---|
1440 |
|
---|
1441 | template <typename _Tp, typename _Abi>
|
---|
1442 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1443 | laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
1444 | const simd<_Tp, _Abi>& __x)
|
---|
1445 | {
|
---|
1446 | return simd<_Tp, _Abi>(
|
---|
1447 | [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
|
---|
1448 | }
|
---|
1449 |
|
---|
1450 | template <typename _Tp, typename _Abi>
|
---|
1451 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1452 | legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
1453 | const simd<_Tp, _Abi>& __x)
|
---|
1454 | {
|
---|
1455 | return simd<_Tp, _Abi>(
|
---|
1456 | [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
|
---|
1457 | }
|
---|
1458 |
|
---|
1459 | _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
|
---|
1460 |
|
---|
1461 | template <typename _Tp, typename _Abi>
|
---|
1462 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1463 | sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
1464 | const simd<_Tp, _Abi>& __x)
|
---|
1465 | {
|
---|
1466 | return simd<_Tp, _Abi>(
|
---|
1467 | [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
|
---|
1468 | }
|
---|
1469 |
|
---|
1470 | template <typename _Tp, typename _Abi>
|
---|
1471 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1472 | sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
|
---|
1473 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
|
---|
1474 | const simd<_Tp, _Abi>& theta)
|
---|
1475 | {
|
---|
1476 | return simd<_Tp, _Abi>([&](auto __i) {
|
---|
1477 | return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
|
---|
1478 | });
|
---|
1479 | }
|
---|
1480 |
|
---|
1481 | template <typename _Tp, typename _Abi>
|
---|
1482 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
1483 | sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
1484 | const simd<_Tp, _Abi>& __x)
|
---|
1485 | {
|
---|
1486 | return simd<_Tp, _Abi>(
|
---|
1487 | [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
|
---|
1488 | }
|
---|
1489 | // }}}
|
---|
1490 |
|
---|
1491 | #undef _GLIBCXX_SIMD_MATH_CALL_
|
---|
1492 | #undef _GLIBCXX_SIMD_MATH_CALL2_
|
---|
1493 | #undef _GLIBCXX_SIMD_MATH_CALL3_
|
---|
1494 |
|
---|
1495 | _GLIBCXX_SIMD_END_NAMESPACE
|
---|
1496 |
|
---|
1497 | #endif // __cplusplus >= 201703L
|
---|
1498 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
|
---|
1499 |
|
---|
1500 | // vim: foldmethod=marker sw=2 ts=8 noet sts=2
|
---|