[1166] | 1 | // Math overloads for simd -*- C++ -*-
|
---|
| 2 |
|
---|
| 3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
---|
| 4 | //
|
---|
| 5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
| 6 | // software; you can redistribute it and/or modify it under the
|
---|
| 7 | // terms of the GNU General Public License as published by the
|
---|
| 8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
| 9 | // any later version.
|
---|
| 10 |
|
---|
| 11 | // This library is distributed in the hope that it will be useful,
|
---|
| 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | // GNU General Public License for more details.
|
---|
| 15 |
|
---|
| 16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
| 17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
| 18 | // 3.1, as published by the Free Software Foundation.
|
---|
| 19 |
|
---|
| 20 | // You should have received a copy of the GNU General Public License and
|
---|
| 21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
| 22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
| 23 | // <http://www.gnu.org/licenses/>.
|
---|
| 24 |
|
---|
| 25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
|
---|
| 26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
|
---|
| 27 |
|
---|
| 28 | #if __cplusplus >= 201703L
|
---|
| 29 |
|
---|
| 30 | #include <utility>
|
---|
| 31 | #include <iomanip>
|
---|
| 32 |
|
---|
| 33 | _GLIBCXX_SIMD_BEGIN_NAMESPACE
|
---|
| 34 | template <typename _Tp, typename _V>
|
---|
| 35 | using _Samesize = fixed_size_simd<_Tp, _V::size()>;
|
---|
| 36 |
|
---|
| 37 | // _Math_return_type {{{
|
---|
| 38 | template <typename _DoubleR, typename _Tp, typename _Abi>
|
---|
| 39 | struct _Math_return_type;
|
---|
| 40 |
|
---|
| 41 | template <typename _DoubleR, typename _Tp, typename _Abi>
|
---|
| 42 | using _Math_return_type_t =
|
---|
| 43 | typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
|
---|
| 44 |
|
---|
| 45 | template <typename _Tp, typename _Abi>
|
---|
| 46 | struct _Math_return_type<double, _Tp, _Abi>
|
---|
| 47 | { using type = simd<_Tp, _Abi>; };
|
---|
| 48 |
|
---|
| 49 | template <typename _Tp, typename _Abi>
|
---|
| 50 | struct _Math_return_type<bool, _Tp, _Abi>
|
---|
| 51 | { using type = simd_mask<_Tp, _Abi>; };
|
---|
| 52 |
|
---|
| 53 | template <typename _DoubleR, typename _Tp, typename _Abi>
|
---|
| 54 | struct _Math_return_type
|
---|
| 55 | { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
|
---|
| 56 |
|
---|
| 57 | //}}}
|
---|
| 58 | // _GLIBCXX_SIMD_MATH_CALL_ {{{
|
---|
| 59 | #define _GLIBCXX_SIMD_MATH_CALL_(__name) \
|
---|
| 60 | template <typename _Tp, typename _Abi, typename..., \
|
---|
| 61 | typename _R = _Math_return_type_t< \
|
---|
| 62 | decltype(std::__name(declval<double>())), _Tp, _Abi>> \
|
---|
| 63 | enable_if_t<is_floating_point_v<_Tp>, _R> \
|
---|
| 64 | __name(simd<_Tp, _Abi> __x) \
|
---|
| 65 | { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
|
---|
| 66 |
|
---|
| 67 | // }}}
|
---|
| 68 | //_Extra_argument_type{{{
|
---|
| 69 | template <typename _Up, typename _Tp, typename _Abi>
|
---|
| 70 | struct _Extra_argument_type;
|
---|
| 71 |
|
---|
| 72 | template <typename _Tp, typename _Abi>
|
---|
| 73 | struct _Extra_argument_type<_Tp*, _Tp, _Abi>
|
---|
| 74 | {
|
---|
| 75 | using type = simd<_Tp, _Abi>*;
|
---|
| 76 | static constexpr double* declval();
|
---|
| 77 | static constexpr bool __needs_temporary_scalar = true;
|
---|
| 78 |
|
---|
| 79 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
|
---|
| 80 | { return &__data(*__x); }
|
---|
| 81 | };
|
---|
| 82 |
|
---|
| 83 | template <typename _Up, typename _Tp, typename _Abi>
|
---|
| 84 | struct _Extra_argument_type<_Up*, _Tp, _Abi>
|
---|
| 85 | {
|
---|
| 86 | static_assert(is_integral_v<_Up>);
|
---|
| 87 | using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
|
---|
| 88 | static constexpr _Up* declval();
|
---|
| 89 | static constexpr bool __needs_temporary_scalar = true;
|
---|
| 90 |
|
---|
| 91 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
|
---|
| 92 | { return &__data(*__x); }
|
---|
| 93 | };
|
---|
| 94 |
|
---|
| 95 | template <typename _Tp, typename _Abi>
|
---|
| 96 | struct _Extra_argument_type<_Tp, _Tp, _Abi>
|
---|
| 97 | {
|
---|
| 98 | using type = simd<_Tp, _Abi>;
|
---|
| 99 | static constexpr double declval();
|
---|
| 100 | static constexpr bool __needs_temporary_scalar = false;
|
---|
| 101 |
|
---|
| 102 | _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
|
---|
| 103 | _S_data(const type& __x)
|
---|
| 104 | { return __data(__x); }
|
---|
| 105 | };
|
---|
| 106 |
|
---|
| 107 | template <typename _Up, typename _Tp, typename _Abi>
|
---|
| 108 | struct _Extra_argument_type
|
---|
| 109 | {
|
---|
| 110 | static_assert(is_integral_v<_Up>);
|
---|
| 111 | using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
|
---|
| 112 | static constexpr _Up declval();
|
---|
| 113 | static constexpr bool __needs_temporary_scalar = false;
|
---|
| 114 |
|
---|
| 115 | _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
|
---|
| 116 | _S_data(const type& __x)
|
---|
| 117 | { return __data(__x); }
|
---|
| 118 | };
|
---|
| 119 |
|
---|
| 120 | //}}}
|
---|
| 121 | // _GLIBCXX_SIMD_MATH_CALL2_ {{{
|
---|
| 122 | #define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_) \
|
---|
| 123 | template < \
|
---|
| 124 | typename _Tp, typename _Abi, typename..., \
|
---|
| 125 | typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
|
---|
| 126 | typename _R = _Math_return_type_t< \
|
---|
| 127 | decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
|
---|
| 128 | enable_if_t<is_floating_point_v<_Tp>, _R> \
|
---|
| 129 | __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
|
---|
| 130 | { \
|
---|
| 131 | return {__private_init, \
|
---|
| 132 | _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
|
---|
| 133 | } \
|
---|
| 134 | template <typename _Up, typename _Tp, typename _Abi> \
|
---|
| 135 | _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
|
---|
| 136 | decltype(std::__name( \
|
---|
| 137 | declval<double>(), \
|
---|
| 138 | declval<enable_if_t< \
|
---|
| 139 | conjunction_v< \
|
---|
| 140 | is_same<arg2_, _Tp>, \
|
---|
| 141 | negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
|
---|
| 142 | is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
|
---|
| 143 | double>>())), \
|
---|
| 144 | _Tp, _Abi> \
|
---|
| 145 | __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
|
---|
| 146 | { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
|
---|
| 147 |
|
---|
| 148 | // }}}
|
---|
| 149 | // _GLIBCXX_SIMD_MATH_CALL3_ {{{
|
---|
| 150 | #define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_) \
|
---|
| 151 | template <typename _Tp, typename _Abi, typename..., \
|
---|
| 152 | typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
|
---|
| 153 | typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>, \
|
---|
| 154 | typename _R = _Math_return_type_t< \
|
---|
| 155 | decltype(std::__name(declval<double>(), _Arg2::declval(), \
|
---|
| 156 | _Arg3::declval())), \
|
---|
| 157 | _Tp, _Abi>> \
|
---|
| 158 | enable_if_t<is_floating_point_v<_Tp>, _R> \
|
---|
| 159 | __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
|
---|
| 160 | const typename _Arg3::type& __z) \
|
---|
| 161 | { \
|
---|
| 162 | return {__private_init, \
|
---|
| 163 | _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
|
---|
| 164 | _Arg3::_S_data(__z))}; \
|
---|
| 165 | } \
|
---|
| 166 | template < \
|
---|
| 167 | typename _T0, typename _T1, typename _T2, typename..., \
|
---|
| 168 | typename _U0 = __remove_cvref_t<_T0>, \
|
---|
| 169 | typename _U1 = __remove_cvref_t<_T1>, \
|
---|
| 170 | typename _U2 = __remove_cvref_t<_T2>, \
|
---|
| 171 | typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
|
---|
| 172 | typename = enable_if_t<conjunction_v< \
|
---|
| 173 | is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
|
---|
| 174 | is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
|
---|
| 175 | negation<conjunction< \
|
---|
| 176 | is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
|
---|
| 177 | _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
|
---|
| 178 | declval<const _Simd&>(), \
|
---|
| 179 | declval<const _Simd&>())) \
|
---|
| 180 | __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
|
---|
| 181 | { \
|
---|
| 182 | return __name(_Simd(static_cast<_T0&&>(__xx)), \
|
---|
| 183 | _Simd(static_cast<_T1&&>(__yy)), \
|
---|
| 184 | _Simd(static_cast<_T2&&>(__zz))); \
|
---|
| 185 | }
|
---|
| 186 |
|
---|
| 187 | // }}}
|
---|
| 188 | // __cosSeries {{{
|
---|
| 189 | template <typename _Abi>
|
---|
| 190 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
|
---|
| 191 | __cosSeries(const simd<float, _Abi>& __x)
|
---|
| 192 | {
|
---|
| 193 | const simd<float, _Abi> __x2 = __x * __x;
|
---|
| 194 | simd<float, _Abi> __y;
|
---|
| 195 | __y = 0x1.ap-16f; // 1/8!
|
---|
| 196 | __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
|
---|
| 197 | __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
|
---|
| 198 | return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | template <typename _Abi>
|
---|
| 202 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
|
---|
| 203 | __cosSeries(const simd<double, _Abi>& __x)
|
---|
| 204 | {
|
---|
| 205 | const simd<double, _Abi> __x2 = __x * __x;
|
---|
| 206 | simd<double, _Abi> __y;
|
---|
| 207 | __y = 0x1.AC00000000000p-45; // 1/16!
|
---|
| 208 | __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
|
---|
| 209 | __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
|
---|
| 210 | __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
|
---|
| 211 | __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
|
---|
| 212 | __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
|
---|
| 213 | __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
|
---|
| 214 | return (__y * __x2 - .5f) * __x2 + 1.f;
|
---|
| 215 | }
|
---|
| 216 |
|
---|
| 217 | // }}}
|
---|
| 218 | // __sinSeries {{{
|
---|
| 219 | template <typename _Abi>
|
---|
| 220 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
|
---|
| 221 | __sinSeries(const simd<float, _Abi>& __x)
|
---|
| 222 | {
|
---|
| 223 | const simd<float, _Abi> __x2 = __x * __x;
|
---|
| 224 | simd<float, _Abi> __y;
|
---|
| 225 | __y = -0x1.9CC000p-13f; // -1/7!
|
---|
| 226 | __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
|
---|
| 227 | __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
|
---|
| 228 | return __y * (__x2 * __x) + __x;
|
---|
| 229 | }
|
---|
| 230 |
|
---|
| 231 | template <typename _Abi>
|
---|
| 232 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
|
---|
| 233 | __sinSeries(const simd<double, _Abi>& __x)
|
---|
| 234 | {
|
---|
| 235 | // __x = [0, 0.7854 = pi/4]
|
---|
| 236 | // __x² = [0, 0.6169 = pi²/8]
|
---|
| 237 | const simd<double, _Abi> __x2 = __x * __x;
|
---|
| 238 | simd<double, _Abi> __y;
|
---|
| 239 | __y = -0x1.ACF0000000000p-41; // -1/15!
|
---|
| 240 | __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
|
---|
| 241 | __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
|
---|
| 242 | __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
|
---|
| 243 | __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
|
---|
| 244 | __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
|
---|
| 245 | __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
|
---|
| 246 | return __y * (__x2 * __x) + __x;
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | // }}}
|
---|
| 250 | // __zero_low_bits {{{
|
---|
| 251 | template <int _Bits, typename _Tp, typename _Abi>
|
---|
| 252 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
|
---|
| 253 | __zero_low_bits(simd<_Tp, _Abi> __x)
|
---|
| 254 | {
|
---|
| 255 | const simd<_Tp, _Abi> __bitmask
|
---|
| 256 | = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
|
---|
| 257 | return {__private_init,
|
---|
| 258 | _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
|
---|
| 259 | }
|
---|
| 260 |
|
---|
| 261 | // }}}
|
---|
| 262 | // __fold_input {{{
|
---|
| 263 |
|
---|
| 264 | /**@internal
|
---|
| 265 | * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
|
---|
| 266 | * quadrant 0: [-¼π, ¼π]
|
---|
| 267 | * quadrant 1: [ ¼π, ¾π]
|
---|
| 268 | * quadrant 2: [ ¾π, 1¼π]
|
---|
| 269 | * quadrant 3: [1¼π, 1¾π]
|
---|
| 270 | *
|
---|
| 271 | * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
|
---|
| 272 | * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
|
---|
| 273 | * ```
|
---|
| 274 | * y = trunc(x / ¼π);
|
---|
| 275 | * y += fmod(y, 2);
|
---|
| 276 | * ```
|
---|
| 277 | * This can be simplified by moving the (implicit) division by 2 into the
|
---|
| 278 | * truncation expression. The `+= fmod` effect can the be achieved by using
|
---|
| 279 | * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
|
---|
| 280 | * `2/π * x` is better (faster).
|
---|
| 281 | */
|
---|
| 282 | template <typename _Tp, typename _Abi>
|
---|
| 283 | struct _Folded
|
---|
| 284 | {
|
---|
| 285 | simd<_Tp, _Abi> _M_x;
|
---|
| 286 | rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
|
---|
| 287 | };
|
---|
| 288 |
|
---|
| 289 | namespace __math_float {
|
---|
| 290 | inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
|
---|
| 291 | inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
|
---|
| 292 | inline constexpr float __pi_2_5bits0
|
---|
| 293 | = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
|
---|
| 294 | inline constexpr float __pi_2_5bits0_rem
|
---|
| 295 | = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
|
---|
| 296 | } // namespace __math_float
|
---|
| 297 | namespace __math_double {
|
---|
| 298 | inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
|
---|
| 299 | inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
|
---|
| 300 | inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
|
---|
| 301 | } // namespace __math_double
|
---|
| 302 |
|
---|
| 303 | template <typename _Abi>
|
---|
| 304 | _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
|
---|
| 305 | __fold_input(const simd<float, _Abi>& __x)
|
---|
| 306 | {
|
---|
| 307 | using _V = simd<float, _Abi>;
|
---|
| 308 | using _IV = rebind_simd_t<int, _V>;
|
---|
| 309 | using namespace __math_float;
|
---|
| 310 | _Folded<float, _Abi> __r;
|
---|
| 311 | __r._M_x = abs(__x);
|
---|
| 312 | #if 0
|
---|
| 313 | // zero most mantissa bits:
|
---|
| 314 | constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
|
---|
| 315 | const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
|
---|
| 316 | // split π into 4 parts, the first three with 13 trailing zeros (to make the
|
---|
| 317 | // following multiplications precise):
|
---|
| 318 | constexpr float __pi0 = 0x1.920000p1f;
|
---|
| 319 | constexpr float __pi1 = 0x1.fb4000p-11f;
|
---|
| 320 | constexpr float __pi2 = 0x1.444000p-23f;
|
---|
| 321 | constexpr float __pi3 = 0x1.68c234p-38f;
|
---|
| 322 | __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
|
---|
| 323 | #else
|
---|
| 324 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
|
---|
| 325 | __r._M_quadrant = 0;
|
---|
| 326 | else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
|
---|
| 327 | {
|
---|
| 328 | const _V __y = nearbyint(__r._M_x * __2_over_pi);
|
---|
| 329 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
|
---|
| 330 | __r._M_x -= __y * __pi_2_5bits0;
|
---|
| 331 | __r._M_x -= __y * __pi_2_5bits0_rem;
|
---|
| 332 | }
|
---|
| 333 | else
|
---|
| 334 | {
|
---|
| 335 | using __math_double::__2_over_pi;
|
---|
| 336 | using __math_double::__pi_2;
|
---|
| 337 | using _VD = rebind_simd_t<double, _V>;
|
---|
| 338 | _VD __xd = static_simd_cast<_VD>(__r._M_x);
|
---|
| 339 | _VD __y = nearbyint(__xd * __2_over_pi);
|
---|
| 340 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
|
---|
| 341 | __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
|
---|
| 342 | }
|
---|
| 343 | #endif
|
---|
| 344 | return __r;
|
---|
| 345 | }
|
---|
| 346 |
|
---|
| 347 | template <typename _Abi>
|
---|
| 348 | _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
|
---|
| 349 | __fold_input(const simd<double, _Abi>& __x)
|
---|
| 350 | {
|
---|
| 351 | using _V = simd<double, _Abi>;
|
---|
| 352 | using _IV = rebind_simd_t<int, _V>;
|
---|
| 353 | using namespace __math_double;
|
---|
| 354 |
|
---|
| 355 | _Folded<double, _Abi> __r;
|
---|
| 356 | __r._M_x = abs(__x);
|
---|
| 357 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
|
---|
| 358 | {
|
---|
| 359 | __r._M_quadrant = 0;
|
---|
| 360 | return __r;
|
---|
| 361 | }
|
---|
| 362 | const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
|
---|
| 363 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
|
---|
| 364 |
|
---|
| 365 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
|
---|
| 366 | {
|
---|
| 367 | // x - y * pi/2, y uses no more than 11 mantissa bits
|
---|
| 368 | __r._M_x -= __y * 0x1.921FB54443000p0;
|
---|
| 369 | __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
|
---|
| 370 | __r._M_x -= __y * 0x1.45C06E0E68948p-86;
|
---|
| 371 | }
|
---|
| 372 | else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
|
---|
| 373 | {
|
---|
| 374 | // x - y * pi/2, y uses no more than 29 mantissa bits
|
---|
| 375 | __r._M_x -= __y * 0x1.921FB40000000p0;
|
---|
| 376 | __r._M_x -= __y * 0x1.4442D00000000p-24;
|
---|
| 377 | __r._M_x -= __y * 0x1.8469898CC5170p-48;
|
---|
| 378 | }
|
---|
| 379 | else
|
---|
| 380 | {
|
---|
| 381 | // x - y * pi/2, y may require all mantissa bits
|
---|
| 382 | const _V __y_hi = __zero_low_bits<26>(__y);
|
---|
| 383 | const _V __y_lo = __y - __y_hi;
|
---|
| 384 | const auto __pi_2_1 = 0x1.921FB50000000p0;
|
---|
| 385 | const auto __pi_2_2 = 0x1.110B460000000p-26;
|
---|
| 386 | const auto __pi_2_3 = 0x1.1A62630000000p-54;
|
---|
| 387 | const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
|
---|
| 388 | __r._M_x = __r._M_x - __y_hi * __pi_2_1
|
---|
| 389 | - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
|
---|
| 390 | - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
|
---|
| 391 | - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
|
---|
| 392 | - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
|
---|
| 393 | - max(__y * __pi_2_4, __y_lo * __pi_2_3)
|
---|
| 394 | - min(__y * __pi_2_4, __y_lo * __pi_2_3);
|
---|
| 395 | }
|
---|
| 396 | return __r;
|
---|
| 397 | }
|
---|
| 398 |
|
---|
| 399 | // }}}
|
---|
| 400 | // __extract_exponent_as_int {{{
|
---|
| 401 | template <typename _Tp, typename _Abi>
|
---|
| 402 | rebind_simd_t<int, simd<_Tp, _Abi>>
|
---|
| 403 | __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
|
---|
| 404 | {
|
---|
| 405 | using _Vp = simd<_Tp, _Abi>;
|
---|
| 406 | using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
|
---|
| 407 | using namespace std::experimental::__float_bitwise_operators;
|
---|
| 408 | const _Vp __exponent_mask
|
---|
| 409 | = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
|
---|
| 410 | return static_simd_cast<rebind_simd_t<int, _Vp>>(
|
---|
| 411 | __bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
|
---|
| 412 | >> (__digits_v<_Tp> - 1));
|
---|
| 413 | }
|
---|
| 414 |
|
---|
| 415 | // }}}
|
---|
| 416 | // __impl_or_fallback {{{
|
---|
| 417 | template <typename ImplFun, typename FallbackFun, typename... _Args>
|
---|
| 418 | _GLIBCXX_SIMD_INTRINSIC auto
|
---|
| 419 | __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
|
---|
| 420 | _Args&&... __args)
|
---|
| 421 | -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
|
---|
| 422 | { return __impl_fun(static_cast<_Args&&>(__args)...); }
|
---|
| 423 |
|
---|
| 424 | template <typename ImplFun, typename FallbackFun, typename... _Args>
|
---|
| 425 | inline auto
|
---|
| 426 | __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
|
---|
| 427 | _Args&&... __args)
|
---|
| 428 | -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
|
---|
| 429 | { return __fallback_fun(static_cast<_Args&&>(__args)...); }
|
---|
| 430 |
|
---|
| 431 | template <typename... _Args>
|
---|
| 432 | _GLIBCXX_SIMD_INTRINSIC auto
|
---|
| 433 | __impl_or_fallback(_Args&&... __args)
|
---|
| 434 | {
|
---|
| 435 | return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
|
---|
| 436 | }
|
---|
| 437 | //}}}
|
---|
| 438 |
|
---|
| 439 | // trigonometric functions {{{
|
---|
| 440 | _GLIBCXX_SIMD_MATH_CALL_(acos)
|
---|
| 441 | _GLIBCXX_SIMD_MATH_CALL_(asin)
|
---|
| 442 | _GLIBCXX_SIMD_MATH_CALL_(atan)
|
---|
| 443 | _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
|
---|
| 444 |
|
---|
| 445 | /*
|
---|
| 446 | * algorithm for sine and cosine:
|
---|
| 447 | *
|
---|
| 448 | * The result can be calculated with sine or cosine depending on the π/4 section
|
---|
| 449 | * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
|
---|
| 450 | *
|
---|
| 451 | * sine:
|
---|
| 452 | * Map -__x to __x and invert the output
|
---|
| 453 | * Extend precision of __x - n * π/4 by calculating
|
---|
| 454 | * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
|
---|
| 455 | *
|
---|
| 456 | * Calculate Taylor series with tuned coefficients.
|
---|
| 457 | * Fix sign.
|
---|
| 458 | */
|
---|
| 459 | // cos{{{
|
---|
| 460 | template <typename _Tp, typename _Abi>
|
---|
| 461 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 462 | cos(const simd<_Tp, _Abi>& __x)
|
---|
| 463 | {
|
---|
| 464 | using _V = simd<_Tp, _Abi>;
|
---|
| 465 | if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
|
---|
| 466 | return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
|
---|
| 467 | else
|
---|
| 468 | {
|
---|
| 469 | if constexpr (is_same_v<_Tp, float>)
|
---|
| 470 | if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
|
---|
| 471 | return static_simd_cast<_V>(
|
---|
| 472 | cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
|
---|
| 473 |
|
---|
| 474 | const auto __f = __fold_input(__x);
|
---|
| 475 | // quadrant | effect
|
---|
| 476 | // 0 | cosSeries, +
|
---|
| 477 | // 1 | sinSeries, -
|
---|
| 478 | // 2 | cosSeries, -
|
---|
| 479 | // 3 | sinSeries, +
|
---|
| 480 | using namespace std::experimental::__float_bitwise_operators;
|
---|
| 481 | const _V __sign_flip
|
---|
| 482 | = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
|
---|
| 483 |
|
---|
| 484 | const auto __need_cos = (__f._M_quadrant & 1) == 0;
|
---|
| 485 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
|
---|
| 486 | return __sign_flip ^ __cosSeries(__f._M_x);
|
---|
| 487 | else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
|
---|
| 488 | return __sign_flip ^ __sinSeries(__f._M_x);
|
---|
| 489 | else // some_of(__need_cos)
|
---|
| 490 | {
|
---|
| 491 | _V __r = __sinSeries(__f._M_x);
|
---|
| 492 | where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
|
---|
| 493 | return __r ^ __sign_flip;
|
---|
| 494 | }
|
---|
| 495 | }
|
---|
| 496 | }
|
---|
| 497 |
|
---|
| 498 | template <typename _Tp>
|
---|
| 499 | _GLIBCXX_SIMD_ALWAYS_INLINE
|
---|
| 500 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
|
---|
| 501 | cos(simd<_Tp, simd_abi::scalar> __x)
|
---|
| 502 | { return std::cos(__data(__x)); }
|
---|
| 503 |
|
---|
| 504 | //}}}
|
---|
| 505 | // sin{{{
|
---|
| 506 | template <typename _Tp, typename _Abi>
|
---|
| 507 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 508 | sin(const simd<_Tp, _Abi>& __x)
|
---|
| 509 | {
|
---|
| 510 | using _V = simd<_Tp, _Abi>;
|
---|
| 511 | if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
|
---|
| 512 | return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
|
---|
| 513 | else
|
---|
| 514 | {
|
---|
| 515 | if constexpr (is_same_v<_Tp, float>)
|
---|
| 516 | if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
|
---|
| 517 | return static_simd_cast<_V>(
|
---|
| 518 | sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
|
---|
| 519 |
|
---|
| 520 | const auto __f = __fold_input(__x);
|
---|
| 521 | // quadrant | effect
|
---|
| 522 | // 0 | sinSeries
|
---|
| 523 | // 1 | cosSeries
|
---|
| 524 | // 2 | sinSeries, sign flip
|
---|
| 525 | // 3 | cosSeries, sign flip
|
---|
| 526 | using namespace std::experimental::__float_bitwise_operators;
|
---|
| 527 | const auto __sign_flip
|
---|
| 528 | = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
|
---|
| 529 |
|
---|
| 530 | const auto __need_sin = (__f._M_quadrant & 1) == 0;
|
---|
| 531 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
|
---|
| 532 | return __sign_flip ^ __sinSeries(__f._M_x);
|
---|
| 533 | else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
|
---|
| 534 | return __sign_flip ^ __cosSeries(__f._M_x);
|
---|
| 535 | else // some_of(__need_sin)
|
---|
| 536 | {
|
---|
| 537 | _V __r = __cosSeries(__f._M_x);
|
---|
| 538 | where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
|
---|
| 539 | return __sign_flip ^ __r;
|
---|
| 540 | }
|
---|
| 541 | }
|
---|
| 542 | }
|
---|
| 543 |
|
---|
| 544 | template <typename _Tp>
|
---|
| 545 | _GLIBCXX_SIMD_ALWAYS_INLINE
|
---|
| 546 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
|
---|
| 547 | sin(simd<_Tp, simd_abi::scalar> __x)
|
---|
| 548 | { return std::sin(__data(__x)); }
|
---|
| 549 |
|
---|
| 550 | //}}}
|
---|
| 551 | _GLIBCXX_SIMD_MATH_CALL_(tan)
|
---|
| 552 | _GLIBCXX_SIMD_MATH_CALL_(acosh)
|
---|
| 553 | _GLIBCXX_SIMD_MATH_CALL_(asinh)
|
---|
| 554 | _GLIBCXX_SIMD_MATH_CALL_(atanh)
|
---|
| 555 | _GLIBCXX_SIMD_MATH_CALL_(cosh)
|
---|
| 556 | _GLIBCXX_SIMD_MATH_CALL_(sinh)
|
---|
| 557 | _GLIBCXX_SIMD_MATH_CALL_(tanh)
|
---|
| 558 | // }}}
|
---|
| 559 | // exponential functions {{{
|
---|
| 560 | _GLIBCXX_SIMD_MATH_CALL_(exp)
|
---|
| 561 | _GLIBCXX_SIMD_MATH_CALL_(exp2)
|
---|
| 562 | _GLIBCXX_SIMD_MATH_CALL_(expm1)
|
---|
| 563 |
|
---|
| 564 | // }}}
|
---|
| 565 | // frexp {{{
|
---|
| 566 | #if _GLIBCXX_SIMD_X86INTRIN
|
---|
| 567 | template <typename _Tp, size_t _Np>
|
---|
| 568 | _SimdWrapper<_Tp, _Np>
|
---|
| 569 | __getexp(_SimdWrapper<_Tp, _Np> __x)
|
---|
| 570 | {
|
---|
| 571 | if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
|
---|
| 572 | return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
|
---|
| 573 | else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
|
---|
| 574 | return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
|
---|
| 575 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
|
---|
| 576 | return _mm_getexp_pd(__x);
|
---|
| 577 | else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
|
---|
| 578 | return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
|
---|
| 579 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
|
---|
| 580 | return _mm256_getexp_ps(__x);
|
---|
| 581 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
|
---|
| 582 | return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
|
---|
| 583 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
|
---|
| 584 | return _mm256_getexp_pd(__x);
|
---|
| 585 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
|
---|
| 586 | return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
|
---|
| 587 | else if constexpr (__is_avx512_ps<_Tp, _Np>())
|
---|
| 588 | return _mm512_getexp_ps(__x);
|
---|
| 589 | else if constexpr (__is_avx512_pd<_Tp, _Np>())
|
---|
| 590 | return _mm512_getexp_pd(__x);
|
---|
| 591 | else
|
---|
| 592 | __assert_unreachable<_Tp>();
|
---|
| 593 | }
|
---|
| 594 |
|
---|
| 595 | template <typename _Tp, size_t _Np>
|
---|
| 596 | _SimdWrapper<_Tp, _Np>
|
---|
| 597 | __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
|
---|
| 598 | {
|
---|
| 599 | if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
|
---|
| 600 | return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
|
---|
| 601 | _MM_MANT_SIGN_src));
|
---|
| 602 | else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
|
---|
| 603 | return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
|
---|
| 604 | _MM_MANT_NORM_p5_1,
|
---|
| 605 | _MM_MANT_SIGN_src));
|
---|
| 606 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
|
---|
| 607 | return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
| 608 | else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
|
---|
| 609 | return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
|
---|
| 610 | _MM_MANT_SIGN_src));
|
---|
| 611 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
|
---|
| 612 | return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
| 613 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
|
---|
| 614 | return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
|
---|
| 615 | _MM_MANT_SIGN_src));
|
---|
| 616 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
|
---|
| 617 | return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
| 618 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
|
---|
| 619 | return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
|
---|
| 620 | _MM_MANT_SIGN_src));
|
---|
| 621 | else if constexpr (__is_avx512_ps<_Tp, _Np>())
|
---|
| 622 | return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
| 623 | else if constexpr (__is_avx512_pd<_Tp, _Np>())
|
---|
| 624 | return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
|
---|
| 625 | else
|
---|
| 626 | __assert_unreachable<_Tp>();
|
---|
| 627 | }
|
---|
| 628 | #endif // _GLIBCXX_SIMD_X86INTRIN
|
---|
| 629 |
|
---|
| 630 | /**
|
---|
| 631 | * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
|
---|
| 632 | *
|
---|
| 633 | * The return value will be in the range [0.5, 1.0[
|
---|
| 634 | * The @p __e value will be an integer defining the power-of-two exponent
|
---|
| 635 | */
|
---|
| 636 | template <typename _Tp, typename _Abi>
|
---|
| 637 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 638 | frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
|
---|
| 639 | {
|
---|
| 640 | if constexpr (simd_size_v<_Tp, _Abi> == 1)
|
---|
| 641 | {
|
---|
| 642 | int __tmp;
|
---|
| 643 | const auto __r = std::frexp(__x[0], &__tmp);
|
---|
| 644 | (*__exp)[0] = __tmp;
|
---|
| 645 | return __r;
|
---|
| 646 | }
|
---|
| 647 | else if constexpr (__is_fixed_size_abi_v<_Abi>)
|
---|
| 648 | {
|
---|
| 649 | return {__private_init,
|
---|
| 650 | _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
|
---|
| 651 | #if _GLIBCXX_SIMD_X86INTRIN
|
---|
| 652 | }
|
---|
| 653 | else if constexpr (__have_avx512f)
|
---|
| 654 | {
|
---|
| 655 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
|
---|
| 656 | constexpr size_t _NI = _Np < 4 ? 4 : _Np;
|
---|
| 657 | const auto __v = __data(__x);
|
---|
| 658 | const auto __isnonzero
|
---|
| 659 | = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
|
---|
| 660 | const _SimdWrapper<int, _NI> __exp_plus1
|
---|
| 661 | = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
|
---|
| 662 | const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
|
---|
| 663 | _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
|
---|
| 664 | _SimdWrapper<int, _NI>(), __exp_plus1));
|
---|
| 665 | simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
|
---|
| 666 | return {__private_init,
|
---|
| 667 | _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
|
---|
| 668 | __isnonzero),
|
---|
| 669 | __v, __getmant_avx512(__v))};
|
---|
| 670 | #endif // _GLIBCXX_SIMD_X86INTRIN
|
---|
| 671 | }
|
---|
| 672 | else
|
---|
| 673 | {
|
---|
| 674 | // fallback implementation
|
---|
| 675 | static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
|
---|
| 676 | using _V = simd<_Tp, _Abi>;
|
---|
| 677 | using _IV = rebind_simd_t<int, _V>;
|
---|
| 678 | using namespace std::experimental::__proposed;
|
---|
| 679 | using namespace std::experimental::__float_bitwise_operators;
|
---|
| 680 |
|
---|
| 681 | constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
|
---|
| 682 | constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
|
---|
| 683 | constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
|
---|
| 684 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
|
---|
| 685 | = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
|
---|
| 686 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
|
---|
| 687 | = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
|
---|
| 688 |
|
---|
| 689 | _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
|
---|
| 690 | const _IV __exponent_bits = __extract_exponent_as_int(__x);
|
---|
| 691 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
|
---|
| 692 | {
|
---|
| 693 | *__exp
|
---|
| 694 | = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
|
---|
| 695 | return __mant;
|
---|
| 696 | }
|
---|
| 697 |
|
---|
| 698 | #if __FINITE_MATH_ONLY__
|
---|
| 699 | // at least one element of __x is 0 or subnormal, the rest is normal
|
---|
| 700 | // (inf and NaN are excluded by -ffinite-math-only)
|
---|
| 701 | const auto __iszero_inf_nan = __x == 0;
|
---|
| 702 | #else
|
---|
| 703 | const auto __as_int
|
---|
| 704 | = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
|
---|
| 705 | const auto __inf
|
---|
| 706 | = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
|
---|
| 707 | _V(__infinity_v<_Tp>));
|
---|
| 708 | const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
|
---|
| 709 | __as_int == 0 || __as_int >= __inf);
|
---|
| 710 | #endif
|
---|
| 711 |
|
---|
| 712 | const _V __scaled_subnormal = __x * __subnorm_scale;
|
---|
| 713 | const _V __mant_subnormal
|
---|
| 714 | = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
|
---|
| 715 | where(!isnormal(__x), __mant) = __mant_subnormal;
|
---|
| 716 | where(__iszero_inf_nan, __mant) = __x;
|
---|
| 717 | _IV __e = __extract_exponent_as_int(__scaled_subnormal);
|
---|
| 718 | using _MaskType =
|
---|
| 719 | typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
|
---|
| 720 | _V, _IV>::mask_type;
|
---|
| 721 | const _MaskType __value_isnormal = isnormal(__x).__cvt();
|
---|
| 722 | where(__value_isnormal.__cvt(), __e) = __exponent_bits;
|
---|
| 723 | static_assert(sizeof(_IV) == sizeof(__value_isnormal));
|
---|
| 724 | const _IV __offset
|
---|
| 725 | = (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
|
---|
| 726 | | (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
|
---|
| 727 | & static_simd_cast<_MaskType>(__x != 0))
|
---|
| 728 | & _IV(__exp_adjust + __exp_offset));
|
---|
| 729 | *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
|
---|
| 730 | return __mant;
|
---|
| 731 | }
|
---|
| 732 | }
|
---|
| 733 |
|
---|
| 734 | // }}}
|
---|
| 735 | _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
|
---|
| 736 | _GLIBCXX_SIMD_MATH_CALL_(ilogb)
|
---|
| 737 |
|
---|
| 738 | // logarithms {{{
|
---|
| 739 | _GLIBCXX_SIMD_MATH_CALL_(log)
|
---|
| 740 | _GLIBCXX_SIMD_MATH_CALL_(log10)
|
---|
| 741 | _GLIBCXX_SIMD_MATH_CALL_(log1p)
|
---|
| 742 | _GLIBCXX_SIMD_MATH_CALL_(log2)
|
---|
| 743 |
|
---|
| 744 | //}}}
|
---|
| 745 | // logb{{{
|
---|
| 746 | template <typename _Tp, typename _Abi>
|
---|
| 747 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
|
---|
| 748 | logb(const simd<_Tp, _Abi>& __x)
|
---|
| 749 | {
|
---|
| 750 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
|
---|
| 751 | if constexpr (_Np == 1)
|
---|
| 752 | return std::logb(__x[0]);
|
---|
| 753 | else if constexpr (__is_fixed_size_abi_v<_Abi>)
|
---|
| 754 | {
|
---|
| 755 | return {__private_init,
|
---|
| 756 | __data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
|
---|
| 757 | using _V = typename decltype(__impl)::simd_type;
|
---|
| 758 | return __data(
|
---|
| 759 | std::experimental::logb(_V(__private_init, __xx)));
|
---|
| 760 | })};
|
---|
| 761 | }
|
---|
| 762 | #if _GLIBCXX_SIMD_X86INTRIN // {{{
|
---|
| 763 | else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
|
---|
| 764 | return {__private_init,
|
---|
| 765 | __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
|
---|
| 766 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
|
---|
| 767 | return {__private_init, _mm_getexp_pd(__data(__x))};
|
---|
| 768 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
|
---|
| 769 | return {__private_init, _mm256_getexp_ps(__data(__x))};
|
---|
| 770 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
|
---|
| 771 | return {__private_init, _mm256_getexp_pd(__data(__x))};
|
---|
| 772 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
|
---|
| 773 | return {__private_init,
|
---|
| 774 | __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
|
---|
| 775 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
|
---|
| 776 | return {__private_init,
|
---|
| 777 | __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
|
---|
| 778 | else if constexpr (__is_avx512_ps<_Tp, _Np>())
|
---|
| 779 | return {__private_init, _mm512_getexp_ps(__data(__x))};
|
---|
| 780 | else if constexpr (__is_avx512_pd<_Tp, _Np>())
|
---|
| 781 | return {__private_init, _mm512_getexp_pd(__data(__x))};
|
---|
| 782 | #endif // _GLIBCXX_SIMD_X86INTRIN }}}
|
---|
| 783 | else
|
---|
| 784 | {
|
---|
| 785 | using _V = simd<_Tp, _Abi>;
|
---|
| 786 | using namespace std::experimental::__proposed;
|
---|
| 787 | auto __is_normal = isnormal(__x);
|
---|
| 788 |
|
---|
| 789 | // work on abs(__x) to reflect the return value on Linux for negative
|
---|
| 790 | // inputs (domain-error => implementation-defined value is returned)
|
---|
| 791 | const _V abs_x = abs(__x);
|
---|
| 792 |
|
---|
| 793 | // __exponent(__x) returns the exponent value (bias removed) as
|
---|
| 794 | // simd<_Up> with integral _Up
|
---|
| 795 | auto&& __exponent = [](const _V& __v) {
|
---|
| 796 | using namespace std::experimental::__proposed;
|
---|
| 797 | using _IV = rebind_simd_t<
|
---|
| 798 | conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
|
---|
| 799 | return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
|
---|
| 800 | - (__max_exponent_v<_Tp> - 1);
|
---|
| 801 | };
|
---|
| 802 | _V __r = static_simd_cast<_V>(__exponent(abs_x));
|
---|
| 803 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
|
---|
| 804 | // without corner cases (nan, inf, subnormal, zero) we have our
|
---|
| 805 | // answer:
|
---|
| 806 | return __r;
|
---|
| 807 | const auto __is_zero = __x == 0;
|
---|
| 808 | const auto __is_nan = isnan(__x);
|
---|
| 809 | const auto __is_inf = isinf(__x);
|
---|
| 810 | where(__is_zero, __r) = -__infinity_v<_Tp>;
|
---|
| 811 | where(__is_nan, __r) = __x;
|
---|
| 812 | where(__is_inf, __r) = __infinity_v<_Tp>;
|
---|
| 813 | __is_normal |= __is_zero || __is_nan || __is_inf;
|
---|
| 814 | if (all_of(__is_normal))
|
---|
| 815 | // at this point everything but subnormals is handled
|
---|
| 816 | return __r;
|
---|
| 817 | // subnormals repeat the exponent extraction after multiplication of the
|
---|
| 818 | // input with __a floating point value that has 112 (0x70) in its exponent
|
---|
| 819 | // (not too big for sp and large enough for dp)
|
---|
| 820 | const _V __scaled = abs_x * _Tp(0x1p112);
|
---|
| 821 | _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
|
---|
| 822 | where(__is_normal, __scaled_exp) = __r;
|
---|
| 823 | return __scaled_exp;
|
---|
| 824 | }
|
---|
| 825 | }
|
---|
| 826 |
|
---|
| 827 | //}}}
|
---|
| 828 | template <typename _Tp, typename _Abi>
|
---|
| 829 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 830 | modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
|
---|
| 831 | {
|
---|
| 832 | if constexpr (__is_scalar_abi<_Abi>()
|
---|
| 833 | || (__is_fixed_size_abi_v<
|
---|
| 834 | _Abi> && simd_size_v<_Tp, _Abi> == 1))
|
---|
| 835 | {
|
---|
| 836 | _Tp __tmp;
|
---|
| 837 | _Tp __r = std::modf(__x[0], &__tmp);
|
---|
| 838 | __iptr[0] = __tmp;
|
---|
| 839 | return __r;
|
---|
| 840 | }
|
---|
| 841 | else
|
---|
| 842 | {
|
---|
| 843 | const auto __integral = trunc(__x);
|
---|
| 844 | *__iptr = __integral;
|
---|
| 845 | auto __r = __x - __integral;
|
---|
| 846 | #if !__FINITE_MATH_ONLY__
|
---|
| 847 | where(isinf(__x), __r) = _Tp();
|
---|
| 848 | #endif
|
---|
| 849 | return copysign(__r, __x);
|
---|
| 850 | }
|
---|
| 851 | }
|
---|
| 852 |
|
---|
| 853 | _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
|
---|
| 854 | _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
|
---|
| 855 |
|
---|
| 856 | _GLIBCXX_SIMD_MATH_CALL_(cbrt)
|
---|
| 857 |
|
---|
| 858 | _GLIBCXX_SIMD_MATH_CALL_(abs)
|
---|
| 859 | _GLIBCXX_SIMD_MATH_CALL_(fabs)
|
---|
| 860 |
|
---|
| 861 | // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
|
---|
| 862 | // allow signed integral _Tp
|
---|
| 863 | template <typename _Tp, typename _Abi>
|
---|
| 864 | enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 865 | abs(const simd<_Tp, _Abi>& __x)
|
---|
| 866 | { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
|
---|
| 867 |
|
---|
| 868 | template <typename _Tp, typename _Abi>
|
---|
| 869 | enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 870 | fabs(const simd<_Tp, _Abi>& __x)
|
---|
| 871 | { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
|
---|
| 872 |
|
---|
| 873 | // the following are overloads for functions in <cstdlib> and not covered by
|
---|
| 874 | // [parallel.simd.math]. I don't see much value in making them work, though
|
---|
| 875 | /*
|
---|
| 876 | template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
|
---|
| 877 | { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
|
---|
| 878 |
|
---|
| 879 | template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
|
---|
| 880 | &__x)
|
---|
| 881 | { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
|
---|
| 882 | */
|
---|
| 883 |
|
---|
| 884 | #define _GLIBCXX_SIMD_CVTING2(_NAME) \
|
---|
| 885 | template <typename _Tp, typename _Abi> \
|
---|
| 886 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 887 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
|
---|
| 888 | { \
|
---|
| 889 | return _NAME(__x, __y); \
|
---|
| 890 | } \
|
---|
| 891 | \
|
---|
| 892 | template <typename _Tp, typename _Abi> \
|
---|
| 893 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 894 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
|
---|
| 895 | { \
|
---|
| 896 | return _NAME(__x, __y); \
|
---|
| 897 | }
|
---|
| 898 |
|
---|
| 899 | #define _GLIBCXX_SIMD_CVTING3(_NAME) \
|
---|
| 900 | template <typename _Tp, typename _Abi> \
|
---|
| 901 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 902 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
|
---|
| 903 | const simd<_Tp, _Abi>& __z) \
|
---|
| 904 | { \
|
---|
| 905 | return _NAME(__x, __y, __z); \
|
---|
| 906 | } \
|
---|
| 907 | \
|
---|
| 908 | template <typename _Tp, typename _Abi> \
|
---|
| 909 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 910 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
|
---|
| 911 | const simd<_Tp, _Abi>& __z) \
|
---|
| 912 | { \
|
---|
| 913 | return _NAME(__x, __y, __z); \
|
---|
| 914 | } \
|
---|
| 915 | \
|
---|
| 916 | template <typename _Tp, typename _Abi> \
|
---|
| 917 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 918 | const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
|
---|
| 919 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \
|
---|
| 920 | { \
|
---|
| 921 | return _NAME(__x, __y, __z); \
|
---|
| 922 | } \
|
---|
| 923 | \
|
---|
| 924 | template <typename _Tp, typename _Abi> \
|
---|
| 925 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 926 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
|
---|
| 927 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \
|
---|
| 928 | { \
|
---|
| 929 | return _NAME(__x, __y, __z); \
|
---|
| 930 | } \
|
---|
| 931 | \
|
---|
| 932 | template <typename _Tp, typename _Abi> \
|
---|
| 933 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 934 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
|
---|
| 935 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \
|
---|
| 936 | { \
|
---|
| 937 | return _NAME(__x, __y, __z); \
|
---|
| 938 | } \
|
---|
| 939 | \
|
---|
| 940 | template <typename _Tp, typename _Abi> \
|
---|
| 941 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
|
---|
| 942 | const __type_identity_t<simd<_Tp, _Abi>>& __x, \
|
---|
| 943 | const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
|
---|
| 944 | { \
|
---|
| 945 | return _NAME(__x, __y, __z); \
|
---|
| 946 | }
|
---|
| 947 |
|
---|
| 948 | template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
|
---|
| 949 | _GLIBCXX_SIMD_INTRINSIC _R
|
---|
| 950 | __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
|
---|
| 951 | const _Tps&... __args)
|
---|
| 952 | {
|
---|
| 953 | return {__private_init,
|
---|
| 954 | __data(__arg0)._M_apply_per_chunk(
|
---|
| 955 | [&](auto __impl, const auto&... __inner) {
|
---|
| 956 | using _V = typename decltype(__impl)::simd_type;
|
---|
| 957 | return __data(__apply(_V(__private_init, __inner)...));
|
---|
| 958 | },
|
---|
| 959 | __data(__args)...)};
|
---|
| 960 | }
|
---|
| 961 |
|
---|
| 962 | template <typename _VV>
|
---|
| 963 | __remove_cvref_t<_VV>
|
---|
| 964 | __hypot(_VV __x, _VV __y)
|
---|
| 965 | {
|
---|
| 966 | using _V = __remove_cvref_t<_VV>;
|
---|
| 967 | using _Tp = typename _V::value_type;
|
---|
| 968 | if constexpr (_V::size() == 1)
|
---|
| 969 | return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
|
---|
| 970 | else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
|
---|
| 971 | {
|
---|
| 972 | return __fixed_size_apply<_V>([](auto __a,
|
---|
| 973 | auto __b) { return hypot(__a, __b); },
|
---|
| 974 | __x, __y);
|
---|
| 975 | }
|
---|
| 976 | else
|
---|
| 977 | {
|
---|
| 978 | // A simple solution for _Tp == float would be to cast to double and
|
---|
| 979 | // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
|
---|
| 980 | // dp. It still needs the Annex F fixups though and isn't faster on
|
---|
| 981 | // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
|
---|
| 982 | // AVX-512).
|
---|
| 983 | using namespace __float_bitwise_operators;
|
---|
| 984 | _V __absx = abs(__x); // no error
|
---|
| 985 | _V __absy = abs(__y); // no error
|
---|
| 986 | _V __hi = max(__absx, __absy); // no error
|
---|
| 987 | _V __lo = min(__absy, __absx); // no error
|
---|
| 988 |
|
---|
| 989 | // round __hi down to the next power-of-2:
|
---|
| 990 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
|
---|
| 991 |
|
---|
| 992 | #ifndef __FAST_MATH__
|
---|
| 993 | if constexpr (__have_neon && !__have_neon_a32)
|
---|
| 994 | { // With ARMv7 NEON, we have no subnormals and must use slightly
|
---|
| 995 | // different strategy
|
---|
| 996 | const _V __hi_exp = __hi & __inf;
|
---|
| 997 | _V __scale_back = __hi_exp;
|
---|
| 998 | // For large exponents (max & max/2) the inversion comes too close
|
---|
| 999 | // to subnormals. Subtract 3 from the exponent:
|
---|
| 1000 | where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
|
---|
| 1001 | // Invert and adjust for the off-by-one error of inversion via xor:
|
---|
| 1002 | const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
|
---|
| 1003 | const _V __h1 = __hi * __scale;
|
---|
| 1004 | const _V __l1 = __lo * __scale;
|
---|
| 1005 | _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
|
---|
| 1006 | // Fix up hypot(0, 0) to not be NaN:
|
---|
| 1007 | where(__hi == 0, __r) = 0;
|
---|
| 1008 | return __r;
|
---|
| 1009 | }
|
---|
| 1010 | #endif
|
---|
| 1011 |
|
---|
| 1012 | #ifdef __FAST_MATH__
|
---|
| 1013 | // With fast-math, ignore precision of subnormals and inputs from
|
---|
| 1014 | // __finite_max_v/2 to __finite_max_v. This removes all
|
---|
| 1015 | // branching/masking.
|
---|
| 1016 | if constexpr (true)
|
---|
| 1017 | #else
|
---|
| 1018 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
|
---|
| 1019 | && all_of(isnormal(__y))))
|
---|
| 1020 | #endif
|
---|
| 1021 | {
|
---|
| 1022 | const _V __hi_exp = __hi & __inf;
|
---|
| 1023 | //((__hi + __hi) & __inf) ^ __inf almost works for computing
|
---|
| 1024 | //__scale,
|
---|
| 1025 | // except when (__hi + __hi) & __inf == __inf, in which case __scale
|
---|
| 1026 | // becomes 0 (should be min/2 instead) and thus loses the
|
---|
| 1027 | // information from __lo.
|
---|
| 1028 | #ifdef __FAST_MATH__
|
---|
| 1029 | using _Ip = __int_for_sizeof_t<_Tp>;
|
---|
| 1030 | using _IV = rebind_simd_t<_Ip, _V>;
|
---|
| 1031 | const auto __as_int = __bit_cast<_IV>(__hi_exp);
|
---|
| 1032 | const _V __scale
|
---|
| 1033 | = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
|
---|
| 1034 | #else
|
---|
| 1035 | const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
|
---|
| 1036 | #endif
|
---|
| 1037 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
|
---|
| 1038 | = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
|
---|
| 1039 | const _V __h1 = (__hi & __mant_mask) | _V(1);
|
---|
| 1040 | const _V __l1 = __lo * __scale;
|
---|
| 1041 | return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
|
---|
| 1042 | }
|
---|
| 1043 | else
|
---|
| 1044 | {
|
---|
| 1045 | // slower path to support subnormals
|
---|
| 1046 | // if __hi is subnormal, avoid scaling by inf & final mul by 0
|
---|
| 1047 | // (which yields NaN) by using min()
|
---|
| 1048 | _V __scale = _V(1 / __norm_min_v<_Tp>);
|
---|
| 1049 | // invert exponent w/o error and w/o using the slow divider unit:
|
---|
| 1050 | // xor inverts the exponent but off by 1. Multiplication with .5
|
---|
| 1051 | // adjusts for the discrepancy.
|
---|
| 1052 | where(__hi >= __norm_min_v<_Tp>, __scale)
|
---|
| 1053 | = ((__hi & __inf) ^ __inf) * _Tp(.5);
|
---|
| 1054 | // adjust final exponent for subnormal inputs
|
---|
| 1055 | _V __hi_exp = __norm_min_v<_Tp>;
|
---|
| 1056 | where(__hi >= __norm_min_v<_Tp>, __hi_exp)
|
---|
| 1057 | = __hi & __inf; // no error
|
---|
| 1058 | _V __h1 = __hi * __scale; // no error
|
---|
| 1059 | _V __l1 = __lo * __scale; // no error
|
---|
| 1060 |
|
---|
| 1061 | // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
|
---|
| 1062 | // this ensures no overflow in the argument to sqrt
|
---|
| 1063 | _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
|
---|
| 1064 | #ifdef __STDC_IEC_559__
|
---|
| 1065 | // fixup for Annex F requirements
|
---|
| 1066 | // the naive fixup goes like this:
|
---|
| 1067 | //
|
---|
| 1068 | // where(__l1 == 0, __r) = __hi;
|
---|
| 1069 | // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
|
---|
| 1070 | // where(isinf(__absx) || isinf(__absy), __r) = __inf;
|
---|
| 1071 | //
|
---|
| 1072 | // The fixup can be prepared in parallel with the sqrt, requiring a
|
---|
| 1073 | // single blend step after hi_exp * sqrt, reducing latency and
|
---|
| 1074 | // throughput:
|
---|
| 1075 | _V __fixup = __hi; // __lo == 0
|
---|
| 1076 | where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
|
---|
| 1077 | where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
|
---|
| 1078 | where(!(__lo == 0 || isunordered(__x, __y)
|
---|
| 1079 | || (isinf(__absx) || isinf(__absy))),
|
---|
| 1080 | __fixup)
|
---|
| 1081 | = __r;
|
---|
| 1082 | __r = __fixup;
|
---|
| 1083 | #endif
|
---|
| 1084 | return __r;
|
---|
| 1085 | }
|
---|
| 1086 | }
|
---|
| 1087 | }
|
---|
| 1088 |
|
---|
| 1089 | template <typename _Tp, typename _Abi>
|
---|
| 1090 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
|
---|
| 1091 | hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
|
---|
| 1092 | {
|
---|
| 1093 | return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
|
---|
| 1094 | const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
|
---|
| 1095 | __y);
|
---|
| 1096 | }
|
---|
| 1097 |
|
---|
| 1098 | _GLIBCXX_SIMD_CVTING2(hypot)
|
---|
| 1099 |
|
---|
| 1100 | template <typename _VV>
|
---|
| 1101 | __remove_cvref_t<_VV>
|
---|
| 1102 | __hypot(_VV __x, _VV __y, _VV __z)
|
---|
| 1103 | {
|
---|
| 1104 | using _V = __remove_cvref_t<_VV>;
|
---|
| 1105 | using _Abi = typename _V::abi_type;
|
---|
| 1106 | using _Tp = typename _V::value_type;
|
---|
| 1107 | /* FIXME: enable after PR77776 is resolved
|
---|
| 1108 | if constexpr (_V::size() == 1)
|
---|
| 1109 | return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
|
---|
| 1110 | else
|
---|
| 1111 | */
|
---|
| 1112 | if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
|
---|
| 1113 | {
|
---|
| 1114 | return __fixed_size_apply<simd<_Tp, _Abi>>(
|
---|
| 1115 | [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
|
---|
| 1116 | __x, __y, __z);
|
---|
| 1117 | }
|
---|
| 1118 | else
|
---|
| 1119 | {
|
---|
| 1120 | using namespace __float_bitwise_operators;
|
---|
| 1121 | const _V __absx = abs(__x); // no error
|
---|
| 1122 | const _V __absy = abs(__y); // no error
|
---|
| 1123 | const _V __absz = abs(__z); // no error
|
---|
| 1124 | _V __hi = max(max(__absx, __absy), __absz); // no error
|
---|
| 1125 | _V __l0 = min(__absz, max(__absx, __absy)); // no error
|
---|
| 1126 | _V __l1 = min(__absy, __absx); // no error
|
---|
| 1127 | if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
|
---|
| 1128 | && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
|
---|
| 1129 | { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
|
---|
| 1130 | // NaN. In this case the bit-tricks don't work, they require IEC559
|
---|
| 1131 | // binary32 or binary64 format.
|
---|
| 1132 | #ifdef __STDC_IEC_559__
|
---|
| 1133 | // fixup for Annex F requirements
|
---|
| 1134 | if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
|
---|
| 1135 | return __infinity_v<_Tp>;
|
---|
| 1136 | else if (isunordered(__absx[0], __absy[0] + __absz[0]))
|
---|
| 1137 | return __quiet_NaN_v<_Tp>;
|
---|
| 1138 | else if (__l0[0] == 0 && __l1[0] == 0)
|
---|
| 1139 | return __hi;
|
---|
| 1140 | #endif
|
---|
| 1141 | _V __hi_exp = __hi;
|
---|
| 1142 | const _ULLong __tmp = 0x8000'0000'0000'0000ull;
|
---|
| 1143 | __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
|
---|
| 1144 | const _V __scale = 1 / __hi_exp;
|
---|
| 1145 | __hi *= __scale;
|
---|
| 1146 | __l0 *= __scale;
|
---|
| 1147 | __l1 *= __scale;
|
---|
| 1148 | return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
|
---|
| 1149 | }
|
---|
| 1150 | else
|
---|
| 1151 | {
|
---|
| 1152 | // round __hi down to the next power-of-2:
|
---|
| 1153 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
|
---|
| 1154 |
|
---|
| 1155 | #ifndef __FAST_MATH__
|
---|
| 1156 | if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
|
---|
| 1157 | { // With ARMv7 NEON, we have no subnormals and must use slightly
|
---|
| 1158 | // different strategy
|
---|
| 1159 | const _V __hi_exp = __hi & __inf;
|
---|
| 1160 | _V __scale_back = __hi_exp;
|
---|
| 1161 | // For large exponents (max & max/2) the inversion comes too
|
---|
| 1162 | // close to subnormals. Subtract 3 from the exponent:
|
---|
| 1163 | where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
|
---|
| 1164 | // Invert and adjust for the off-by-one error of inversion via
|
---|
| 1165 | // xor:
|
---|
| 1166 | const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
|
---|
| 1167 | const _V __h1 = __hi * __scale;
|
---|
| 1168 | __l0 *= __scale;
|
---|
| 1169 | __l1 *= __scale;
|
---|
| 1170 | _V __lo = __l0 * __l0
|
---|
| 1171 | + __l1 * __l1; // add the two smaller values first
|
---|
| 1172 | asm("" : "+m"(__lo));
|
---|
| 1173 | _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
|
---|
| 1174 | // Fix up hypot(0, 0, 0) to not be NaN:
|
---|
| 1175 | where(__hi == 0, __r) = 0;
|
---|
| 1176 | return __r;
|
---|
| 1177 | }
|
---|
| 1178 | #endif
|
---|
| 1179 |
|
---|
| 1180 | #ifdef __FAST_MATH__
|
---|
| 1181 | // With fast-math, ignore precision of subnormals and inputs from
|
---|
| 1182 | // __finite_max_v/2 to __finite_max_v. This removes all
|
---|
| 1183 | // branching/masking.
|
---|
| 1184 | if constexpr (true)
|
---|
| 1185 | #else
|
---|
| 1186 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
|
---|
| 1187 | && all_of(isnormal(__y))
|
---|
| 1188 | && all_of(isnormal(__z))))
|
---|
| 1189 | #endif
|
---|
| 1190 | {
|
---|
| 1191 | const _V __hi_exp = __hi & __inf;
|
---|
| 1192 | //((__hi + __hi) & __inf) ^ __inf almost works for computing
|
---|
| 1193 | //__scale, except when (__hi + __hi) & __inf == __inf, in which
|
---|
| 1194 | // case __scale
|
---|
| 1195 | // becomes 0 (should be min/2 instead) and thus loses the
|
---|
| 1196 | // information from __lo.
|
---|
| 1197 | #ifdef __FAST_MATH__
|
---|
| 1198 | using _Ip = __int_for_sizeof_t<_Tp>;
|
---|
| 1199 | using _IV = rebind_simd_t<_Ip, _V>;
|
---|
| 1200 | const auto __as_int = __bit_cast<_IV>(__hi_exp);
|
---|
| 1201 | const _V __scale
|
---|
| 1202 | = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
|
---|
| 1203 | #else
|
---|
| 1204 | const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
|
---|
| 1205 | #endif
|
---|
| 1206 | constexpr _Tp __mant_mask
|
---|
| 1207 | = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
|
---|
| 1208 | const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
|
---|
| 1209 | __l0 *= __scale;
|
---|
| 1210 | __l1 *= __scale;
|
---|
| 1211 | const _V __lo
|
---|
| 1212 | = __l0 * __l0
|
---|
| 1213 | + __l1 * __l1; // add the two smaller values first
|
---|
| 1214 | return __hi_exp * sqrt(__lo + __h1 * __h1);
|
---|
| 1215 | }
|
---|
| 1216 | else
|
---|
| 1217 | {
|
---|
| 1218 | // slower path to support subnormals
|
---|
| 1219 | // if __hi is subnormal, avoid scaling by inf & final mul by 0
|
---|
| 1220 | // (which yields NaN) by using min()
|
---|
| 1221 | _V __scale = _V(1 / __norm_min_v<_Tp>);
|
---|
| 1222 | // invert exponent w/o error and w/o using the slow divider
|
---|
| 1223 | // unit: xor inverts the exponent but off by 1. Multiplication
|
---|
| 1224 | // with .5 adjusts for the discrepancy.
|
---|
| 1225 | where(__hi >= __norm_min_v<_Tp>, __scale)
|
---|
| 1226 | = ((__hi & __inf) ^ __inf) * _Tp(.5);
|
---|
| 1227 | // adjust final exponent for subnormal inputs
|
---|
| 1228 | _V __hi_exp = __norm_min_v<_Tp>;
|
---|
| 1229 | where(__hi >= __norm_min_v<_Tp>, __hi_exp)
|
---|
| 1230 | = __hi & __inf; // no error
|
---|
| 1231 | _V __h1 = __hi * __scale; // no error
|
---|
| 1232 | __l0 *= __scale; // no error
|
---|
| 1233 | __l1 *= __scale; // no error
|
---|
| 1234 | _V __lo = __l0 * __l0
|
---|
| 1235 | + __l1 * __l1; // add the two smaller values first
|
---|
| 1236 | _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
|
---|
| 1237 | #ifdef __STDC_IEC_559__
|
---|
| 1238 | // fixup for Annex F requirements
|
---|
| 1239 | _V __fixup = __hi; // __lo == 0
|
---|
| 1240 | // where(__lo == 0, __fixup) = __hi;
|
---|
| 1241 | where(isunordered(__x, __y + __z), __fixup)
|
---|
| 1242 | = __quiet_NaN_v<_Tp>;
|
---|
| 1243 | where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
|
---|
| 1244 | = __inf;
|
---|
| 1245 | // Instead of __lo == 0, the following could depend on __h1² ==
|
---|
| 1246 | // __h1² + __lo (i.e. __hi is so much larger than the other two
|
---|
| 1247 | // inputs that the result is exactly __hi). While this may
|
---|
| 1248 | // improve precision, it is likely to reduce efficiency if the
|
---|
| 1249 | // ISA has FMAs (because __h1² + __lo is an FMA, but the
|
---|
| 1250 | // intermediate
|
---|
| 1251 | // __h1² must be kept)
|
---|
| 1252 | where(!(__lo == 0 || isunordered(__x, __y + __z)
|
---|
| 1253 | || isinf(__absx) || isinf(__absy) || isinf(__absz)),
|
---|
| 1254 | __fixup)
|
---|
| 1255 | = __r;
|
---|
| 1256 | __r = __fixup;
|
---|
| 1257 | #endif
|
---|
| 1258 | return __r;
|
---|
| 1259 | }
|
---|
| 1260 | }
|
---|
| 1261 | }
|
---|
| 1262 | }
|
---|
| 1263 |
|
---|
| 1264 | template <typename _Tp, typename _Abi>
|
---|
| 1265 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
|
---|
| 1266 | hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
|
---|
| 1267 | const simd<_Tp, _Abi>& __z)
|
---|
| 1268 | {
|
---|
| 1269 | return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
|
---|
| 1270 | const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
|
---|
| 1271 | __y,
|
---|
| 1272 | __z);
|
---|
| 1273 | }
|
---|
| 1274 |
|
---|
| 1275 | _GLIBCXX_SIMD_CVTING3(hypot)
|
---|
| 1276 |
|
---|
| 1277 | _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
|
---|
| 1278 |
|
---|
| 1279 | _GLIBCXX_SIMD_MATH_CALL_(sqrt)
|
---|
| 1280 | _GLIBCXX_SIMD_MATH_CALL_(erf)
|
---|
| 1281 | _GLIBCXX_SIMD_MATH_CALL_(erfc)
|
---|
| 1282 | _GLIBCXX_SIMD_MATH_CALL_(lgamma)
|
---|
| 1283 | _GLIBCXX_SIMD_MATH_CALL_(tgamma)
|
---|
| 1284 | _GLIBCXX_SIMD_MATH_CALL_(ceil)
|
---|
| 1285 | _GLIBCXX_SIMD_MATH_CALL_(floor)
|
---|
| 1286 | _GLIBCXX_SIMD_MATH_CALL_(nearbyint)
|
---|
| 1287 | _GLIBCXX_SIMD_MATH_CALL_(rint)
|
---|
| 1288 | _GLIBCXX_SIMD_MATH_CALL_(lrint)
|
---|
| 1289 | _GLIBCXX_SIMD_MATH_CALL_(llrint)
|
---|
| 1290 |
|
---|
| 1291 | _GLIBCXX_SIMD_MATH_CALL_(round)
|
---|
| 1292 | _GLIBCXX_SIMD_MATH_CALL_(lround)
|
---|
| 1293 | _GLIBCXX_SIMD_MATH_CALL_(llround)
|
---|
| 1294 |
|
---|
| 1295 | _GLIBCXX_SIMD_MATH_CALL_(trunc)
|
---|
| 1296 |
|
---|
| 1297 | _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
|
---|
| 1298 | _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
|
---|
| 1299 | _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
|
---|
| 1300 |
|
---|
| 1301 | template <typename _Tp, typename _Abi>
|
---|
| 1302 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1303 | copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
|
---|
| 1304 | {
|
---|
| 1305 | if constexpr (simd_size_v<_Tp, _Abi> == 1)
|
---|
| 1306 | return std::copysign(__x[0], __y[0]);
|
---|
| 1307 | else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
|
---|
| 1308 | // Remove this case once __bit_cast is implemented via __builtin_bit_cast.
|
---|
| 1309 | // It is necessary, because __signmask below cannot be computed at compile
|
---|
| 1310 | // time.
|
---|
| 1311 | return simd<_Tp, _Abi>(
|
---|
| 1312 | [&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
|
---|
| 1313 | else
|
---|
| 1314 | {
|
---|
| 1315 | using _V = simd<_Tp, _Abi>;
|
---|
| 1316 | using namespace std::experimental::__float_bitwise_operators;
|
---|
| 1317 | _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
|
---|
| 1318 | return (__x & (__x ^ __signmask)) | (__y & __signmask);
|
---|
| 1319 | }
|
---|
| 1320 | }
|
---|
| 1321 |
|
---|
| 1322 | _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
|
---|
| 1323 | // not covered in [parallel.simd.math]:
|
---|
| 1324 | // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
|
---|
| 1325 | _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
|
---|
| 1326 | _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
|
---|
| 1327 | _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
|
---|
| 1328 |
|
---|
| 1329 | _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
|
---|
| 1330 | _GLIBCXX_SIMD_MATH_CALL_(fpclassify)
|
---|
| 1331 | _GLIBCXX_SIMD_MATH_CALL_(isfinite)
|
---|
| 1332 |
|
---|
| 1333 | // isnan and isinf require special treatment because old glibc may declare
|
---|
| 1334 | // `int isinf(double)`.
|
---|
| 1335 | template <typename _Tp, typename _Abi, typename...,
|
---|
| 1336 | typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
|
---|
| 1337 | enable_if_t<is_floating_point_v<_Tp>, _R>
|
---|
| 1338 | isinf(simd<_Tp, _Abi> __x)
|
---|
| 1339 | { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
|
---|
| 1340 |
|
---|
| 1341 | template <typename _Tp, typename _Abi, typename...,
|
---|
| 1342 | typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
|
---|
| 1343 | enable_if_t<is_floating_point_v<_Tp>, _R>
|
---|
| 1344 | isnan(simd<_Tp, _Abi> __x)
|
---|
| 1345 | { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
|
---|
| 1346 |
|
---|
| 1347 | _GLIBCXX_SIMD_MATH_CALL_(isnormal)
|
---|
| 1348 |
|
---|
| 1349 | template <typename..., typename _Tp, typename _Abi>
|
---|
| 1350 | simd_mask<_Tp, _Abi>
|
---|
| 1351 | signbit(simd<_Tp, _Abi> __x)
|
---|
| 1352 | {
|
---|
| 1353 | if constexpr (is_integral_v<_Tp>)
|
---|
| 1354 | {
|
---|
| 1355 | if constexpr (is_unsigned_v<_Tp>)
|
---|
| 1356 | return simd_mask<_Tp, _Abi>{}; // false
|
---|
| 1357 | else
|
---|
| 1358 | return __x < 0;
|
---|
| 1359 | }
|
---|
| 1360 | else
|
---|
| 1361 | return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
|
---|
| 1362 | }
|
---|
| 1363 |
|
---|
| 1364 | _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
|
---|
| 1365 | _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
|
---|
| 1366 | _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
|
---|
| 1367 | _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
|
---|
| 1368 | _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
|
---|
| 1369 | _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
|
---|
| 1370 |
|
---|
| 1371 | /* not covered in [parallel.simd.math]
|
---|
| 1372 | template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
|
---|
| 1373 | template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
|
---|
| 1374 | template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
|
---|
| 1375 |
|
---|
| 1376 | template <typename _V> struct simd_div_t {
|
---|
| 1377 | _V quot, rem;
|
---|
| 1378 | };
|
---|
| 1379 |
|
---|
| 1380 | template <typename _Abi>
|
---|
| 1381 | simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
|
---|
| 1382 | _SCharv<_Abi> denom);
|
---|
| 1383 | template <typename _Abi>
|
---|
| 1384 | simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
|
---|
| 1385 | __shortv<_Abi> denom);
|
---|
| 1386 | template <typename _Abi>
|
---|
| 1387 | simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
|
---|
| 1388 | template <typename _Abi>
|
---|
| 1389 | simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
|
---|
| 1390 | __longv<_Abi> denom);
|
---|
| 1391 | template <typename _Abi>
|
---|
| 1392 | simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
|
---|
| 1393 | __llongv<_Abi> denom);
|
---|
| 1394 | */
|
---|
| 1395 |
|
---|
| 1396 | // special math {{{
|
---|
| 1397 | template <typename _Tp, typename _Abi>
|
---|
| 1398 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1399 | assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
| 1400 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
|
---|
| 1401 | const simd<_Tp, _Abi>& __x)
|
---|
| 1402 | {
|
---|
| 1403 | return simd<_Tp, _Abi>([&](auto __i) {
|
---|
| 1404 | return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
|
---|
| 1405 | });
|
---|
| 1406 | }
|
---|
| 1407 |
|
---|
| 1408 | template <typename _Tp, typename _Abi>
|
---|
| 1409 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1410 | assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
| 1411 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
|
---|
| 1412 | const simd<_Tp, _Abi>& __x)
|
---|
| 1413 | {
|
---|
| 1414 | return simd<_Tp, _Abi>([&](auto __i) {
|
---|
| 1415 | return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
|
---|
| 1416 | });
|
---|
| 1417 | }
|
---|
| 1418 |
|
---|
| 1419 | _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
|
---|
| 1420 | _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
|
---|
| 1421 | _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
|
---|
| 1422 | _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
|
---|
| 1423 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
|
---|
| 1424 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
|
---|
| 1425 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
|
---|
| 1426 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
|
---|
| 1427 | _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
|
---|
| 1428 | _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
|
---|
| 1429 | _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
|
---|
| 1430 | _GLIBCXX_SIMD_MATH_CALL_(expint)
|
---|
| 1431 |
|
---|
| 1432 | template <typename _Tp, typename _Abi>
|
---|
| 1433 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1434 | hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
| 1435 | const simd<_Tp, _Abi>& __x)
|
---|
| 1436 | {
|
---|
| 1437 | return simd<_Tp, _Abi>(
|
---|
| 1438 | [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
|
---|
| 1439 | }
|
---|
| 1440 |
|
---|
| 1441 | template <typename _Tp, typename _Abi>
|
---|
| 1442 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1443 | laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
| 1444 | const simd<_Tp, _Abi>& __x)
|
---|
| 1445 | {
|
---|
| 1446 | return simd<_Tp, _Abi>(
|
---|
| 1447 | [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
|
---|
| 1448 | }
|
---|
| 1449 |
|
---|
| 1450 | template <typename _Tp, typename _Abi>
|
---|
| 1451 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1452 | legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
| 1453 | const simd<_Tp, _Abi>& __x)
|
---|
| 1454 | {
|
---|
| 1455 | return simd<_Tp, _Abi>(
|
---|
| 1456 | [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
|
---|
| 1457 | }
|
---|
| 1458 |
|
---|
| 1459 | _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
|
---|
| 1460 |
|
---|
| 1461 | template <typename _Tp, typename _Abi>
|
---|
| 1462 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1463 | sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
| 1464 | const simd<_Tp, _Abi>& __x)
|
---|
| 1465 | {
|
---|
| 1466 | return simd<_Tp, _Abi>(
|
---|
| 1467 | [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
|
---|
| 1468 | }
|
---|
| 1469 |
|
---|
| 1470 | template <typename _Tp, typename _Abi>
|
---|
| 1471 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1472 | sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
|
---|
| 1473 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
|
---|
| 1474 | const simd<_Tp, _Abi>& theta)
|
---|
| 1475 | {
|
---|
| 1476 | return simd<_Tp, _Abi>([&](auto __i) {
|
---|
| 1477 | return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
|
---|
| 1478 | });
|
---|
| 1479 | }
|
---|
| 1480 |
|
---|
| 1481 | template <typename _Tp, typename _Abi>
|
---|
| 1482 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
|
---|
| 1483 | sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
|
---|
| 1484 | const simd<_Tp, _Abi>& __x)
|
---|
| 1485 | {
|
---|
| 1486 | return simd<_Tp, _Abi>(
|
---|
| 1487 | [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
|
---|
| 1488 | }
|
---|
| 1489 | // }}}
|
---|
| 1490 |
|
---|
| 1491 | #undef _GLIBCXX_SIMD_MATH_CALL_
|
---|
| 1492 | #undef _GLIBCXX_SIMD_MATH_CALL2_
|
---|
| 1493 | #undef _GLIBCXX_SIMD_MATH_CALL3_
|
---|
| 1494 |
|
---|
| 1495 | _GLIBCXX_SIMD_END_NAMESPACE
|
---|
| 1496 |
|
---|
| 1497 | #endif // __cplusplus >= 201703L
|
---|
| 1498 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
|
---|
| 1499 |
|
---|
| 1500 | // vim: foldmethod=marker sw=2 ts=8 noet sts=2
|
---|