Context Navigation

simd_math.h

Last change on this file was 1166, checked in by rossy, 4 years ago
Daodan: Replace MinGW build env with an up-to-date MSYS2 env
File size: 57.0 KB

Rev	Line
[1166]	1	// Math overloads for simd -- C++ --
	2
	3	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
	4	//
	5	// This file is part of the GNU ISO C++ Library. This library is free
	6	// software; you can redistribute it and/or modify it under the
	7	// terms of the GNU General Public License as published by the
	8	// Free Software Foundation; either version 3, or (at your option)
	9	// any later version.
	10
	11	// This library is distributed in the hope that it will be useful,
	12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	// GNU General Public License for more details.
	15
	16	// Under Section 7 of GPL version 3, you are granted additional
	17	// permissions described in the GCC Runtime Library Exception, version
	18	// 3.1, as published by the Free Software Foundation.
	19
	20	// You should have received a copy of the GNU General Public License and
	21	// a copy of the GCC Runtime Library Exception along with this program;
	22	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	23	// <http://www.gnu.org/licenses/>.
	24
	25	#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
	26	#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
	27
	28	#if __cplusplus >= 201703L
	29
	30	#include <utility>
	31	#include <iomanip>
	32
	33	_GLIBCXX_SIMD_BEGIN_NAMESPACE
	34	template <typename _Tp, typename _V>
	35	using _Samesize = fixed_size_simd<_Tp, _V::size()>;
	36
	37	// _Math_return_type {{{
	38	template <typename _DoubleR, typename _Tp, typename _Abi>
	39	struct _Math_return_type;
	40
	41	template <typename _DoubleR, typename _Tp, typename _Abi>
	42	using _Math_return_type_t =
	43	typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
	44
	45	template <typename _Tp, typename _Abi>
	46	struct _Math_return_type<double, _Tp, _Abi>
	47	{ using type = simd<_Tp, _Abi>; };
	48
	49	template <typename _Tp, typename _Abi>
	50	struct _Math_return_type<bool, _Tp, _Abi>
	51	{ using type = simd_mask<_Tp, _Abi>; };
	52
	53	template <typename _DoubleR, typename _Tp, typename _Abi>
	54	struct _Math_return_type
	55	{ using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
	56
	57	//}}}
	58	// _GLIBCXX_SIMD_MATH_CALL_ {{{
	59	#define _GLIBCXX_SIMD_MATH_CALL_(__name) \
	60	template <typename _Tp, typename _Abi, typename..., \
	61	typename _R = _Math_return_type_t< \
	62	decltype(std::__name(declval<double>())), _Tp, _Abi>> \
	63	enable_if_t<is_floating_point_v<_Tp>, _R> \
	64	__name(simd<_Tp, _Abi> __x) \
	65	{ return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
	66
	67	// }}}
	68	//_Extra_argument_type{{{
	69	template <typename _Up, typename _Tp, typename _Abi>
	70	struct _Extra_argument_type;
	71
	72	template <typename _Tp, typename _Abi>
	73	struct _Extra_argument_type<_Tp*, _Tp, _Abi>
	74	{
	75	using type = simd<_Tp, _Abi>*;
	76	static constexpr double* declval();
	77	static constexpr bool __needs_temporary_scalar = true;
	78
	79	_GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
	80	{ return &__data(*__x); }
	81	};
	82
	83	template <typename _Up, typename _Tp, typename _Abi>
	84	struct _Extra_argument_type<_Up*, _Tp, _Abi>
	85	{
	86	static_assert(is_integral_v<_Up>);
	87	using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
	88	static constexpr _Up* declval();
	89	static constexpr bool __needs_temporary_scalar = true;
	90
	91	_GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
	92	{ return &__data(*__x); }
	93	};
	94
	95	template <typename _Tp, typename _Abi>
	96	struct _Extra_argument_type<_Tp, _Tp, _Abi>
	97	{
	98	using type = simd<_Tp, _Abi>;
	99	static constexpr double declval();
	100	static constexpr bool __needs_temporary_scalar = false;
	101
	102	_GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
	103	_S_data(const type& __x)
	104	{ return __data(__x); }
	105	};
	106
	107	template <typename _Up, typename _Tp, typename _Abi>
	108	struct _Extra_argument_type
	109	{
	110	static_assert(is_integral_v<_Up>);
	111	using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
	112	static constexpr _Up declval();
	113	static constexpr bool __needs_temporary_scalar = false;
	114
	115	_GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
	116	_S_data(const type& __x)
	117	{ return __data(__x); }
	118	};
	119
	120	//}}}
	121	// _GLIBCXX_SIMD_MATH_CALL2_ {{{
	122	#define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_) \
	123	template < \
	124	typename _Tp, typename _Abi, typename..., \
	125	typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
	126	typename _R = _Math_return_type_t< \
	127	decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
	128	enable_if_t<is_floating_point_v<_Tp>, _R> \
	129	__name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
	130	{ \
	131	return {__private_init, \
	132	_Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
	133	} \
	134	template <typename _Up, typename _Tp, typename _Abi> \
	135	_GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
	136	decltype(std::__name( \
	137	declval<double>(), \
	138	declval<enable_if_t< \
	139	conjunction_v< \
	140	is_same<arg2_, _Tp>, \
	141	negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
	142	is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
	143	double>>())), \
	144	_Tp, _Abi> \
	145	__name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
	146	{ return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
	147
	148	// }}}
	149	// _GLIBCXX_SIMD_MATH_CALL3_ {{{
	150	#define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_) \
	151	template <typename _Tp, typename _Abi, typename..., \
	152	typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \
	153	typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>, \
	154	typename _R = _Math_return_type_t< \
	155	decltype(std::__name(declval<double>(), _Arg2::declval(), \
	156	_Arg3::declval())), \
	157	_Tp, _Abi>> \
	158	enable_if_t<is_floating_point_v<_Tp>, _R> \
	159	__name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
	160	const typename _Arg3::type& __z) \
	161	{ \
	162	return {__private_init, \
	163	_Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
	164	_Arg3::_S_data(__z))}; \
	165	} \
	166	template < \
	167	typename _T0, typename _T1, typename _T2, typename..., \
	168	typename _U0 = __remove_cvref_t<_T0>, \
	169	typename _U1 = __remove_cvref_t<_T1>, \
	170	typename _U2 = __remove_cvref_t<_T2>, \
	171	typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
	172	typename = enable_if_t<conjunction_v< \
	173	is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
	174	is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
	175	negation<conjunction< \
	176	is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
	177	_GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
	178	declval<const _Simd&>(), \
	179	declval<const _Simd&>())) \
	180	__name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
	181	{ \
	182	return __name(_Simd(static_cast<_T0&&>(__xx)), \
	183	_Simd(static_cast<_T1&&>(__yy)), \
	184	_Simd(static_cast<_T2&&>(__zz))); \
	185	}
	186
	187	// }}}
	188	// __cosSeries {{{
	189	template <typename _Abi>
	190	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
	191	__cosSeries(const simd<float, _Abi>& __x)
	192	{
	193	const simd<float, _Abi> __x2 = __x * __x;
	194	simd<float, _Abi> __y;
	195	__y = 0x1.ap-16f; // 1/8!
	196	__y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
	197	__y = __y * __x2 + 0x1.555556p-5f; // 1/4!
	198	return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
	199	}
	200
	201	template <typename _Abi>
	202	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
	203	__cosSeries(const simd<double, _Abi>& __x)
	204	{
	205	const simd<double, _Abi> __x2 = __x * __x;
	206	simd<double, _Abi> __y;
	207	__y = 0x1.AC00000000000p-45; // 1/16!
	208	__y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
	209	__y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
	210	__y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
	211	__y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
	212	__y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
	213	__y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
	214	return (__y * __x2 - .5f) * __x2 + 1.f;
	215	}
	216
	217	// }}}
	218	// __sinSeries {{{
	219	template <typename _Abi>
	220	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
	221	__sinSeries(const simd<float, _Abi>& __x)
	222	{
	223	const simd<float, _Abi> __x2 = __x * __x;
	224	simd<float, _Abi> __y;
	225	__y = -0x1.9CC000p-13f; // -1/7!
	226	__y = __y * __x2 + 0x1.111100p-7f; // 1/5!
	227	__y = __y * __x2 - 0x1.555556p-3f; // -1/3!
	228	return __y * (__x2 * __x) + __x;
	229	}
	230
	231	template <typename _Abi>
	232	_GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
	233	__sinSeries(const simd<double, _Abi>& __x)
	234	{
	235	// __x = [0, 0.7854 = pi/4]
	236	// __x² = [0, 0.6169 = pi²/8]
	237	const simd<double, _Abi> __x2 = __x * __x;
	238	simd<double, _Abi> __y;
	239	__y = -0x1.ACF0000000000p-41; // -1/15!
	240	__y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
	241	__y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
	242	__y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
	243	__y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
	244	__y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
	245	__y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
	246	return __y * (__x2 * __x) + __x;
	247	}
	248
	249	// }}}
	250	// __zero_low_bits {{{
	251	template <int _Bits, typename _Tp, typename _Abi>
	252	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
	253	__zero_low_bits(simd<_Tp, _Abi> __x)
	254	{
	255	const simd<_Tp, _Abi> __bitmask
	256	= __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
	257	return {__private_init,
	258	_Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
	259	}
	260
	261	// }}}
	262	// __fold_input {{{
	263
	264	/**@internal
	265	* Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
	266	* quadrant 0: [-¼π, ¼π]
	267	* quadrant 1: [ ¼π, ¾π]
	268	* quadrant 2: [ ¾π, 1¼π]
	269	* quadrant 3: [1¼π, 1¾π]
	270	*
	271	* The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
	272	* a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
	273	* ```
	274	* y = trunc(x / ¼π);
	275	* y += fmod(y, 2);
	276	* ```
	277	* This can be simplified by moving the (implicit) division by 2 into the
	278	* truncation expression. The `+= fmod` effect can the be achieved by using
	279	* rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
	280	* `2/π * x` is better (faster).
	281	*/
	282	template <typename _Tp, typename _Abi>
	283	struct _Folded
	284	{
	285	simd<_Tp, _Abi> _M_x;
	286	rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
	287	};
	288
	289	namespace __math_float {
	290	inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
	291	inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
	292	inline constexpr float __pi_2_5bits0
	293	= 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
	294	inline constexpr float __pi_2_5bits0_rem
	295	= -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
	296	} // namespace __math_float
	297	namespace __math_double {
	298	inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
	299	inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
	300	inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
	301	} // namespace __math_double
	302
	303	template <typename _Abi>
	304	_GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
	305	__fold_input(const simd<float, _Abi>& __x)
	306	{
	307	using _V = simd<float, _Abi>;
	308	using _IV = rebind_simd_t<int, _V>;
	309	using namespace __math_float;
	310	_Folded<float, _Abi> __r;
	311	__r._M_x = abs(__x);
	312	#if 0
	313	// zero most mantissa bits:
	314	constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
	315	const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
	316	// split π into 4 parts, the first three with 13 trailing zeros (to make the
	317	// following multiplications precise):
	318	constexpr float __pi0 = 0x1.920000p1f;
	319	constexpr float __pi1 = 0x1.fb4000p-11f;
	320	constexpr float __pi2 = 0x1.444000p-23f;
	321	constexpr float __pi3 = 0x1.68c234p-38f;
	322	__r._M_x - __y__pi0 - __y__pi1 - __y__pi2 - __y__pi3
	323	#else
	324	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
	325	__r._M_quadrant = 0;
	326	else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
	327	{
	328	const _V __y = nearbyint(__r._M_x * __2_over_pi);
	329	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
	330	__r._M_x -= __y * __pi_2_5bits0;
	331	__r._M_x -= __y * __pi_2_5bits0_rem;
	332	}
	333	else
	334	{
	335	using __math_double::__2_over_pi;
	336	using __math_double::__pi_2;
	337	using _VD = rebind_simd_t<double, _V>;
	338	_VD __xd = static_simd_cast<_VD>(__r._M_x);
	339	_VD __y = nearbyint(__xd * __2_over_pi);
	340	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
	341	__r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
	342	}
	343	#endif
	344	return __r;
	345	}
	346
	347	template <typename _Abi>
	348	_GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
	349	__fold_input(const simd<double, _Abi>& __x)
	350	{
	351	using _V = simd<double, _Abi>;
	352	using _IV = rebind_simd_t<int, _V>;
	353	using namespace __math_double;
	354
	355	_Folded<double, _Abi> __r;
	356	__r._M_x = abs(__x);
	357	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
	358	{
	359	__r._M_quadrant = 0;
	360	return __r;
	361	}
	362	const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
	363	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
	364
	365	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
	366	{
	367	// x - y * pi/2, y uses no more than 11 mantissa bits
	368	__r._M_x -= __y * 0x1.921FB54443000p0;
	369	__r._M_x -= __y * -0x1.73DCB3B39A000p-43;
	370	__r._M_x -= __y * 0x1.45C06E0E68948p-86;
	371	}
	372	else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
	373	{
	374	// x - y * pi/2, y uses no more than 29 mantissa bits
	375	__r._M_x -= __y * 0x1.921FB40000000p0;
	376	__r._M_x -= __y * 0x1.4442D00000000p-24;
	377	__r._M_x -= __y * 0x1.8469898CC5170p-48;
	378	}
	379	else
	380	{
	381	// x - y * pi/2, y may require all mantissa bits
	382	const _V __y_hi = __zero_low_bits<26>(__y);
	383	const _V __y_lo = __y - __y_hi;
	384	const auto __pi_2_1 = 0x1.921FB50000000p0;
	385	const auto __pi_2_2 = 0x1.110B460000000p-26;
	386	const auto __pi_2_3 = 0x1.1A62630000000p-54;
	387	const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
	388	__r._M_x = __r._M_x - __y_hi * __pi_2_1
	389	- max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
	390	- min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
	391	- max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
	392	- min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
	393	- max(__y * __pi_2_4, __y_lo * __pi_2_3)
	394	- min(__y * __pi_2_4, __y_lo * __pi_2_3);
	395	}
	396	return __r;
	397	}
	398
	399	// }}}
	400	// __extract_exponent_as_int {{{
	401	template <typename _Tp, typename _Abi>
	402	rebind_simd_t<int, simd<_Tp, _Abi>>
	403	__extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
	404	{
	405	using _Vp = simd<_Tp, _Abi>;
	406	using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
	407	using namespace std::experimental::__float_bitwise_operators;
	408	const _Vp __exponent_mask
	409	= __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
	410	return static_simd_cast<rebind_simd_t<int, _Vp>>(
	411	__bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
	412	>> (__digits_v<_Tp> - 1));
	413	}
	414
	415	// }}}
	416	// __impl_or_fallback {{{
	417	template <typename ImplFun, typename FallbackFun, typename... _Args>
	418	_GLIBCXX_SIMD_INTRINSIC auto
	419	__impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
	420	_Args&&... __args)
	421	-> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
	422	{ return __impl_fun(static_cast<_Args&&>(__args)...); }
	423
	424	template <typename ImplFun, typename FallbackFun, typename... _Args>
	425	inline auto
	426	__impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
	427	_Args&&... __args)
	428	-> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
	429	{ return __fallback_fun(static_cast<_Args&&>(__args)...); }
	430
	431	template <typename... _Args>
	432	_GLIBCXX_SIMD_INTRINSIC auto
	433	__impl_or_fallback(_Args&&... __args)
	434	{
	435	return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
	436	}
	437	//}}}
	438
	439	// trigonometric functions {{{
	440	_GLIBCXX_SIMD_MATH_CALL_(acos)
	441	_GLIBCXX_SIMD_MATH_CALL_(asin)
	442	_GLIBCXX_SIMD_MATH_CALL_(atan)
	443	_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
	444
	445	/*
	446	* algorithm for sine and cosine:
	447	*
	448	* The result can be calculated with sine or cosine depending on the π/4 section
	449	* the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
	450	*
	451	* sine:
	452	* Map -__x to __x and invert the output
	453	* Extend precision of __x - n * π/4 by calculating
	454	* ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
	455	*
	456	* Calculate Taylor series with tuned coefficients.
	457	* Fix sign.
	458	*/
	459	// cos{{{
	460	template <typename _Tp, typename _Abi>
	461	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	462	cos(const simd<_Tp, _Abi>& __x)
	463	{
	464	using _V = simd<_Tp, _Abi>;
	465	if constexpr (__is_scalar_abi<_Abi>() \|\| __is_fixed_size_abi_v<_Abi>)
	466	return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
	467	else
	468	{
	469	if constexpr (is_same_v<_Tp, float>)
	470	if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
	471	return static_simd_cast<_V>(
	472	cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
	473
	474	const auto __f = __fold_input(__x);
	475	// quadrant \| effect
	476	// 0 \| cosSeries, +
	477	// 1 \| sinSeries, -
	478	// 2 \| cosSeries, -
	479	// 3 \| sinSeries, +
	480	using namespace std::experimental::__float_bitwise_operators;
	481	const _V __sign_flip
	482	= _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
	483
	484	const auto __need_cos = (__f._M_quadrant & 1) == 0;
	485	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
	486	return __sign_flip ^ __cosSeries(__f._M_x);
	487	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
	488	return __sign_flip ^ __sinSeries(__f._M_x);
	489	else // some_of(__need_cos)
	490	{
	491	_V __r = __sinSeries(__f._M_x);
	492	where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
	493	return __r ^ __sign_flip;
	494	}
	495	}
	496	}
	497
	498	template <typename _Tp>
	499	_GLIBCXX_SIMD_ALWAYS_INLINE
	500	enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
	501	cos(simd<_Tp, simd_abi::scalar> __x)
	502	{ return std::cos(__data(__x)); }
	503
	504	//}}}
	505	// sin{{{
	506	template <typename _Tp, typename _Abi>
	507	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	508	sin(const simd<_Tp, _Abi>& __x)
	509	{
	510	using _V = simd<_Tp, _Abi>;
	511	if constexpr (__is_scalar_abi<_Abi>() \|\| __is_fixed_size_abi_v<_Abi>)
	512	return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
	513	else
	514	{
	515	if constexpr (is_same_v<_Tp, float>)
	516	if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
	517	return static_simd_cast<_V>(
	518	sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
	519
	520	const auto __f = __fold_input(__x);
	521	// quadrant \| effect
	522	// 0 \| sinSeries
	523	// 1 \| cosSeries
	524	// 2 \| sinSeries, sign flip
	525	// 3 \| cosSeries, sign flip
	526	using namespace std::experimental::__float_bitwise_operators;
	527	const auto __sign_flip
	528	= (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
	529
	530	const auto __need_sin = (__f._M_quadrant & 1) == 0;
	531	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
	532	return __sign_flip ^ __sinSeries(__f._M_x);
	533	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
	534	return __sign_flip ^ __cosSeries(__f._M_x);
	535	else // some_of(__need_sin)
	536	{
	537	_V __r = __cosSeries(__f._M_x);
	538	where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
	539	return __sign_flip ^ __r;
	540	}
	541	}
	542	}
	543
	544	template <typename _Tp>
	545	_GLIBCXX_SIMD_ALWAYS_INLINE
	546	enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
	547	sin(simd<_Tp, simd_abi::scalar> __x)
	548	{ return std::sin(__data(__x)); }
	549
	550	//}}}
	551	_GLIBCXX_SIMD_MATH_CALL_(tan)
	552	_GLIBCXX_SIMD_MATH_CALL_(acosh)
	553	_GLIBCXX_SIMD_MATH_CALL_(asinh)
	554	_GLIBCXX_SIMD_MATH_CALL_(atanh)
	555	_GLIBCXX_SIMD_MATH_CALL_(cosh)
	556	_GLIBCXX_SIMD_MATH_CALL_(sinh)
	557	_GLIBCXX_SIMD_MATH_CALL_(tanh)
	558	// }}}
	559	// exponential functions {{{
	560	_GLIBCXX_SIMD_MATH_CALL_(exp)
	561	_GLIBCXX_SIMD_MATH_CALL_(exp2)
	562	_GLIBCXX_SIMD_MATH_CALL_(expm1)
	563
	564	// }}}
	565	// frexp {{{
	566	#if _GLIBCXX_SIMD_X86INTRIN
	567	template <typename _Tp, size_t _Np>
	568	_SimdWrapper<_Tp, _Np>
	569	__getexp(_SimdWrapper<_Tp, _Np> __x)
	570	{
	571	if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
	572	return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
	573	else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
	574	return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
	575	else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
	576	return _mm_getexp_pd(__x);
	577	else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
	578	return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
	579	else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
	580	return _mm256_getexp_ps(__x);
	581	else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
	582	return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
	583	else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
	584	return _mm256_getexp_pd(__x);
	585	else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
	586	return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
	587	else if constexpr (__is_avx512_ps<_Tp, _Np>())
	588	return _mm512_getexp_ps(__x);
	589	else if constexpr (__is_avx512_pd<_Tp, _Np>())
	590	return _mm512_getexp_pd(__x);
	591	else
	592	__assert_unreachable<_Tp>();
	593	}
	594
	595	template <typename _Tp, size_t _Np>
	596	_SimdWrapper<_Tp, _Np>
	597	__getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
	598	{
	599	if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
	600	return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
	601	_MM_MANT_SIGN_src));
	602	else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
	603	return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
	604	_MM_MANT_NORM_p5_1,
	605	_MM_MANT_SIGN_src));
	606	else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
	607	return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
	608	else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
	609	return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
	610	_MM_MANT_SIGN_src));
	611	else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
	612	return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
	613	else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
	614	return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
	615	_MM_MANT_SIGN_src));
	616	else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
	617	return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
	618	else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
	619	return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
	620	_MM_MANT_SIGN_src));
	621	else if constexpr (__is_avx512_ps<_Tp, _Np>())
	622	return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
	623	else if constexpr (__is_avx512_pd<_Tp, _Np>())
	624	return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
	625	else
	626	__assert_unreachable<_Tp>();
	627	}
	628	#endif // _GLIBCXX_SIMD_X86INTRIN
	629
	630	/**
	631	* splits @p __v into exponent and mantissa, the sign is kept with the mantissa
	632	*
	633	* The return value will be in the range [0.5, 1.0[
	634	* The @p __e value will be an integer defining the power-of-two exponent
	635	*/
	636	template <typename _Tp, typename _Abi>
	637	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	638	frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
	639	{
	640	if constexpr (simd_size_v<_Tp, _Abi> == 1)
	641	{
	642	int __tmp;
	643	const auto __r = std::frexp(__x[0], &__tmp);
	644	(*__exp)[0] = __tmp;
	645	return __r;
	646	}
	647	else if constexpr (__is_fixed_size_abi_v<_Abi>)
	648	{
	649	return {__private_init,
	650	_Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
	651	#if _GLIBCXX_SIMD_X86INTRIN
	652	}
	653	else if constexpr (__have_avx512f)
	654	{
	655	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
	656	constexpr size_t _NI = _Np < 4 ? 4 : _Np;
	657	const auto __v = __data(__x);
	658	const auto __isnonzero
	659	= _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
	660	const _SimdWrapper<int, _NI> __exp_plus1
	661	= 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
	662	const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
	663	_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
	664	_SimdWrapper<int, _NI>(), __exp_plus1));
	665	simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
	666	return {__private_init,
	667	_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
	668	__isnonzero),
	669	__v, __getmant_avx512(__v))};
	670	#endif // _GLIBCXX_SIMD_X86INTRIN
	671	}
	672	else
	673	{
	674	// fallback implementation
	675	static_assert(sizeof(_Tp) == 4 \|\| sizeof(_Tp) == 8);
	676	using _V = simd<_Tp, _Abi>;
	677	using _IV = rebind_simd_t<int, _V>;
	678	using namespace std::experimental::__proposed;
	679	using namespace std::experimental::__float_bitwise_operators;
	680
	681	constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
	682	constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
	683	constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
	684	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
	685	= __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
	686	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
	687	= -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
	688
	689	_V __mant = __p5_1_exponent & (__exponent_mask \| __x); // +/-[.5, 1)
	690	const _IV __exponent_bits = __extract_exponent_as_int(__x);
	691	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
	692	{
	693	*__exp
	694	= simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
	695	return __mant;
	696	}
	697
	698	#if __FINITE_MATH_ONLY__
	699	// at least one element of __x is 0 or subnormal, the rest is normal
	700	// (inf and NaN are excluded by -ffinite-math-only)
	701	const auto __iszero_inf_nan = __x == 0;
	702	#else
	703	const auto __as_int
	704	= __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
	705	const auto __inf
	706	= __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
	707	_V(__infinity_v<_Tp>));
	708	const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
	709	__as_int == 0 \|\| __as_int >= __inf);
	710	#endif
	711
	712	const _V __scaled_subnormal = __x * __subnorm_scale;
	713	const _V __mant_subnormal
	714	= __p5_1_exponent & (__exponent_mask \| __scaled_subnormal);
	715	where(!isnormal(__x), __mant) = __mant_subnormal;
	716	where(__iszero_inf_nan, __mant) = __x;
	717	_IV __e = __extract_exponent_as_int(__scaled_subnormal);
	718	using _MaskType =
	719	typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
	720	_V, _IV>::mask_type;
	721	const _MaskType __value_isnormal = isnormal(__x).__cvt();
	722	where(__value_isnormal.__cvt(), __e) = __exponent_bits;
	723	static_assert(sizeof(_IV) == sizeof(__value_isnormal));
	724	const _IV __offset
	725	= (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
	726	\| (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
	727	& static_simd_cast<_MaskType>(__x != 0))
	728	& _IV(__exp_adjust + __exp_offset));
	729	*__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
	730	return __mant;
	731	}
	732	}
	733
	734	// }}}
	735	_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
	736	_GLIBCXX_SIMD_MATH_CALL_(ilogb)
	737
	738	// logarithms {{{
	739	_GLIBCXX_SIMD_MATH_CALL_(log)
	740	_GLIBCXX_SIMD_MATH_CALL_(log10)
	741	_GLIBCXX_SIMD_MATH_CALL_(log1p)
	742	_GLIBCXX_SIMD_MATH_CALL_(log2)
	743
	744	//}}}
	745	// logb{{{
	746	template <typename _Tp, typename _Abi>
	747	enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
	748	logb(const simd<_Tp, _Abi>& __x)
	749	{
	750	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
	751	if constexpr (_Np == 1)
	752	return std::logb(__x[0]);
	753	else if constexpr (__is_fixed_size_abi_v<_Abi>)
	754	{
	755	return {__private_init,
	756	__data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
	757	using _V = typename decltype(__impl)::simd_type;
	758	return __data(
	759	std::experimental::logb(_V(__private_init, __xx)));
	760	})};
	761	}
	762	#if _GLIBCXX_SIMD_X86INTRIN // {{{
	763	else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
	764	return {__private_init,
	765	__auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
	766	else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
	767	return {__private_init, _mm_getexp_pd(__data(__x))};
	768	else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
	769	return {__private_init, _mm256_getexp_ps(__data(__x))};
	770	else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
	771	return {__private_init, _mm256_getexp_pd(__data(__x))};
	772	else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
	773	return {__private_init,
	774	__lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
	775	else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
	776	return {__private_init,
	777	__lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
	778	else if constexpr (__is_avx512_ps<_Tp, _Np>())
	779	return {__private_init, _mm512_getexp_ps(__data(__x))};
	780	else if constexpr (__is_avx512_pd<_Tp, _Np>())
	781	return {__private_init, _mm512_getexp_pd(__data(__x))};
	782	#endif // _GLIBCXX_SIMD_X86INTRIN }}}
	783	else
	784	{
	785	using _V = simd<_Tp, _Abi>;
	786	using namespace std::experimental::__proposed;
	787	auto __is_normal = isnormal(__x);
	788
	789	// work on abs(__x) to reflect the return value on Linux for negative
	790	// inputs (domain-error => implementation-defined value is returned)
	791	const _V abs_x = abs(__x);
	792
	793	// __exponent(__x) returns the exponent value (bias removed) as
	794	// simd<_Up> with integral _Up
	795	auto&& __exponent = [](const _V& __v) {
	796	using namespace std::experimental::__proposed;
	797	using _IV = rebind_simd_t<
	798	conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
	799	return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
	800	- (__max_exponent_v<_Tp> - 1);
	801	};
	802	_V __r = static_simd_cast<_V>(__exponent(abs_x));
	803	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
	804	// without corner cases (nan, inf, subnormal, zero) we have our
	805	// answer:
	806	return __r;
	807	const auto __is_zero = __x == 0;
	808	const auto __is_nan = isnan(__x);
	809	const auto __is_inf = isinf(__x);
	810	where(__is_zero, __r) = -__infinity_v<_Tp>;
	811	where(__is_nan, __r) = __x;
	812	where(__is_inf, __r) = __infinity_v<_Tp>;
	813	__is_normal \|= __is_zero \|\| __is_nan \|\| __is_inf;
	814	if (all_of(__is_normal))
	815	// at this point everything but subnormals is handled
	816	return __r;
	817	// subnormals repeat the exponent extraction after multiplication of the
	818	// input with __a floating point value that has 112 (0x70) in its exponent
	819	// (not too big for sp and large enough for dp)
	820	const _V __scaled = abs_x * _Tp(0x1p112);
	821	_V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
	822	where(__is_normal, __scaled_exp) = __r;
	823	return __scaled_exp;
	824	}
	825	}
	826
	827	//}}}
	828	template <typename _Tp, typename _Abi>
	829	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	830	modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
	831	{
	832	if constexpr (__is_scalar_abi<_Abi>()
	833	\|\| (__is_fixed_size_abi_v<
	834	_Abi> && simd_size_v<_Tp, _Abi> == 1))
	835	{
	836	_Tp __tmp;
	837	_Tp __r = std::modf(__x[0], &__tmp);
	838	__iptr[0] = __tmp;
	839	return __r;
	840	}
	841	else
	842	{
	843	const auto __integral = trunc(__x);
	844	*__iptr = __integral;
	845	auto __r = __x - __integral;
	846	#if !__FINITE_MATH_ONLY__
	847	where(isinf(__x), __r) = _Tp();
	848	#endif
	849	return copysign(__r, __x);
	850	}
	851	}
	852
	853	_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
	854	_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
	855
	856	_GLIBCXX_SIMD_MATH_CALL_(cbrt)
	857
	858	_GLIBCXX_SIMD_MATH_CALL_(abs)
	859	_GLIBCXX_SIMD_MATH_CALL_(fabs)
	860
	861	// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
	862	// allow signed integral _Tp
	863	template <typename _Tp, typename _Abi>
	864	enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
	865	abs(const simd<_Tp, _Abi>& __x)
	866	{ return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
	867
	868	template <typename _Tp, typename _Abi>
	869	enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
	870	fabs(const simd<_Tp, _Abi>& __x)
	871	{ return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
	872
	873	// the following are overloads for functions in <cstdlib> and not covered by
	874	// [parallel.simd.math]. I don't see much value in making them work, though
	875	/*
	876	template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
	877	{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
	878
	879	template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
	880	&__x)
	881	{ return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
	882	*/
	883
	884	#define _GLIBCXX_SIMD_CVTING2(_NAME) \
	885	template <typename _Tp, typename _Abi> \
	886	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	887	const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
	888	{ \
	889	return _NAME(__x, __y); \
	890	} \
	891	\
	892	template <typename _Tp, typename _Abi> \
	893	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	894	const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
	895	{ \
	896	return _NAME(__x, __y); \
	897	}
	898
	899	#define _GLIBCXX_SIMD_CVTING3(_NAME) \
	900	template <typename _Tp, typename _Abi> \
	901	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	902	const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
	903	const simd<_Tp, _Abi>& __z) \
	904	{ \
	905	return _NAME(__x, __y, __z); \
	906	} \
	907	\
	908	template <typename _Tp, typename _Abi> \
	909	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	910	const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
	911	const simd<_Tp, _Abi>& __z) \
	912	{ \
	913	return _NAME(__x, __y, __z); \
	914	} \
	915	\
	916	template <typename _Tp, typename _Abi> \
	917	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	918	const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
	919	const __type_identity_t<simd<_Tp, _Abi>>& __z) \
	920	{ \
	921	return _NAME(__x, __y, __z); \
	922	} \
	923	\
	924	template <typename _Tp, typename _Abi> \
	925	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	926	const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
	927	const __type_identity_t<simd<_Tp, _Abi>>& __z) \
	928	{ \
	929	return _NAME(__x, __y, __z); \
	930	} \
	931	\
	932	template <typename _Tp, typename _Abi> \
	933	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	934	const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
	935	const __type_identity_t<simd<_Tp, _Abi>>& __z) \
	936	{ \
	937	return _NAME(__x, __y, __z); \
	938	} \
	939	\
	940	template <typename _Tp, typename _Abi> \
	941	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
	942	const __type_identity_t<simd<_Tp, _Abi>>& __x, \
	943	const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
	944	{ \
	945	return _NAME(__x, __y, __z); \
	946	}
	947
	948	template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
	949	_GLIBCXX_SIMD_INTRINSIC _R
	950	__fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
	951	const _Tps&... __args)
	952	{
	953	return {__private_init,
	954	__data(__arg0)._M_apply_per_chunk(
	955	[&](auto __impl, const auto&... __inner) {
	956	using _V = typename decltype(__impl)::simd_type;
	957	return __data(__apply(_V(__private_init, __inner)...));
	958	},
	959	__data(__args)...)};
	960	}
	961
	962	template <typename _VV>
	963	__remove_cvref_t<_VV>
	964	__hypot(_VV __x, _VV __y)
	965	{
	966	using _V = __remove_cvref_t<_VV>;
	967	using _Tp = typename _V::value_type;
	968	if constexpr (_V::size() == 1)
	969	return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
	970	else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
	971	{
	972	return __fixed_size_apply<_V>([](auto __a,
	973	auto __b) { return hypot(__a, __b); },
	974	__x, __y);
	975	}
	976	else
	977	{
	978	// A simple solution for _Tp == float would be to cast to double and
	979	// simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
	980	// dp. It still needs the Annex F fixups though and isn't faster on
	981	// Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
	982	// AVX-512).
	983	using namespace __float_bitwise_operators;
	984	_V __absx = abs(__x); // no error
	985	_V __absy = abs(__y); // no error
	986	_V __hi = max(__absx, __absy); // no error
	987	_V __lo = min(__absy, __absx); // no error
	988
	989	// round __hi down to the next power-of-2:
	990	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
	991
	992	#ifndef __FAST_MATH__
	993	if constexpr (__have_neon && !__have_neon_a32)
	994	{ // With ARMv7 NEON, we have no subnormals and must use slightly
	995	// different strategy
	996	const _V __hi_exp = __hi & __inf;
	997	_V __scale_back = __hi_exp;
	998	// For large exponents (max & max/2) the inversion comes too close
	999	// to subnormals. Subtract 3 from the exponent:
	1000	where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
	1001	// Invert and adjust for the off-by-one error of inversion via xor:
	1002	const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
	1003	const _V __h1 = __hi * __scale;
	1004	const _V __l1 = __lo * __scale;
	1005	_V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
	1006	// Fix up hypot(0, 0) to not be NaN:
	1007	where(__hi == 0, __r) = 0;
	1008	return __r;
	1009	}
	1010	#endif
	1011
	1012	#ifdef __FAST_MATH__
	1013	// With fast-math, ignore precision of subnormals and inputs from
	1014	// __finite_max_v/2 to __finite_max_v. This removes all
	1015	// branching/masking.
	1016	if constexpr (true)
	1017	#else
	1018	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
	1019	&& all_of(isnormal(__y))))
	1020	#endif
	1021	{
	1022	const _V __hi_exp = __hi & __inf;
	1023	//((__hi + __hi) & __inf) ^ __inf almost works for computing
	1024	//__scale,
	1025	// except when (__hi + __hi) & __inf == __inf, in which case __scale
	1026	// becomes 0 (should be min/2 instead) and thus loses the
	1027	// information from __lo.
	1028	#ifdef __FAST_MATH__
	1029	using _Ip = __int_for_sizeof_t<_Tp>;
	1030	using _IV = rebind_simd_t<_Ip, _V>;
	1031	const auto __as_int = __bit_cast<_IV>(__hi_exp);
	1032	const _V __scale
	1033	= __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
	1034	#else
	1035	const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
	1036	#endif
	1037	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
	1038	= __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
	1039	const _V __h1 = (__hi & __mant_mask) \| _V(1);
	1040	const _V __l1 = __lo * __scale;
	1041	return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
	1042	}
	1043	else
	1044	{
	1045	// slower path to support subnormals
	1046	// if __hi is subnormal, avoid scaling by inf & final mul by 0
	1047	// (which yields NaN) by using min()
	1048	_V __scale = _V(1 / __norm_min_v<_Tp>);
	1049	// invert exponent w/o error and w/o using the slow divider unit:
	1050	// xor inverts the exponent but off by 1. Multiplication with .5
	1051	// adjusts for the discrepancy.
	1052	where(__hi >= __norm_min_v<_Tp>, __scale)
	1053	= ((__hi & __inf) ^ __inf) * _Tp(.5);
	1054	// adjust final exponent for subnormal inputs
	1055	_V __hi_exp = __norm_min_v<_Tp>;
	1056	where(__hi >= __norm_min_v<_Tp>, __hi_exp)
	1057	= __hi & __inf; // no error
	1058	_V __h1 = __hi * __scale; // no error
	1059	_V __l1 = __lo * __scale; // no error
	1060
	1061	// sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
	1062	// this ensures no overflow in the argument to sqrt
	1063	_V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
	1064	#ifdef __STDC_IEC_559__
	1065	// fixup for Annex F requirements
	1066	// the naive fixup goes like this:
	1067	//
	1068	// where(__l1 == 0, __r) = __hi;
	1069	// where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
	1070	// where(isinf(__absx) \|\| isinf(__absy), __r) = __inf;
	1071	//
	1072	// The fixup can be prepared in parallel with the sqrt, requiring a
	1073	// single blend step after hi_exp * sqrt, reducing latency and
	1074	// throughput:
	1075	_V __fixup = __hi; // __lo == 0
	1076	where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
	1077	where(isinf(__absx) \|\| isinf(__absy), __fixup) = __inf;
	1078	where(!(__lo == 0 \|\| isunordered(__x, __y)
	1079	\|\| (isinf(__absx) \|\| isinf(__absy))),
	1080	__fixup)
	1081	= __r;
	1082	__r = __fixup;
	1083	#endif
	1084	return __r;
	1085	}
	1086	}
	1087	}
	1088
	1089	template <typename _Tp, typename _Abi>
	1090	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
	1091	hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
	1092	{
	1093	return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
	1094	const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
	1095	__y);
	1096	}
	1097
	1098	_GLIBCXX_SIMD_CVTING2(hypot)
	1099
	1100	template <typename _VV>
	1101	__remove_cvref_t<_VV>
	1102	__hypot(_VV __x, _VV __y, _VV __z)
	1103	{
	1104	using _V = __remove_cvref_t<_VV>;
	1105	using _Abi = typename _V::abi_type;
	1106	using _Tp = typename _V::value_type;
	1107	/* FIXME: enable after PR77776 is resolved
	1108	if constexpr (_V::size() == 1)
	1109	return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
	1110	else
	1111	*/
	1112	if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
	1113	{
	1114	return __fixed_size_apply<simd<_Tp, _Abi>>(
	1115	[](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
	1116	__x, __y, __z);
	1117	}
	1118	else
	1119	{
	1120	using namespace __float_bitwise_operators;
	1121	const _V __absx = abs(__x); // no error
	1122	const _V __absy = abs(__y); // no error
	1123	const _V __absz = abs(__z); // no error
	1124	_V __hi = max(max(__absx, __absy), __absz); // no error
	1125	_V __l0 = min(__absz, max(__absx, __absy)); // no error
	1126	_V __l1 = min(__absy, __absx); // no error
	1127	if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
	1128	&& __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
	1129	{ // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
	1130	// NaN. In this case the bit-tricks don't work, they require IEC559
	1131	// binary32 or binary64 format.
	1132	#ifdef __STDC_IEC_559__
	1133	// fixup for Annex F requirements
	1134	if (isinf(__absx[0]) \|\| isinf(__absy[0]) \|\| isinf(__absz[0]))
	1135	return __infinity_v<_Tp>;
	1136	else if (isunordered(__absx[0], __absy[0] + __absz[0]))
	1137	return __quiet_NaN_v<_Tp>;
	1138	else if (__l0[0] == 0 && __l1[0] == 0)
	1139	return __hi;
	1140	#endif
	1141	_V __hi_exp = __hi;
	1142	const _ULLong __tmp = 0x8000'0000'0000'0000ull;
	1143	__builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
	1144	const _V __scale = 1 / __hi_exp;
	1145	__hi *= __scale;
	1146	__l0 *= __scale;
	1147	__l1 *= __scale;
	1148	return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
	1149	}
	1150	else
	1151	{
	1152	// round __hi down to the next power-of-2:
	1153	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
	1154
	1155	#ifndef __FAST_MATH__
	1156	if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
	1157	{ // With ARMv7 NEON, we have no subnormals and must use slightly
	1158	// different strategy
	1159	const _V __hi_exp = __hi & __inf;
	1160	_V __scale_back = __hi_exp;
	1161	// For large exponents (max & max/2) the inversion comes too
	1162	// close to subnormals. Subtract 3 from the exponent:
	1163	where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
	1164	// Invert and adjust for the off-by-one error of inversion via
	1165	// xor:
	1166	const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
	1167	const _V __h1 = __hi * __scale;
	1168	__l0 *= __scale;
	1169	__l1 *= __scale;
	1170	_V __lo = __l0 * __l0
	1171	+ __l1 * __l1; // add the two smaller values first
	1172	asm("" : "+m"(__lo));
	1173	_V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
	1174	// Fix up hypot(0, 0, 0) to not be NaN:
	1175	where(__hi == 0, __r) = 0;
	1176	return __r;
	1177	}
	1178	#endif
	1179
	1180	#ifdef __FAST_MATH__
	1181	// With fast-math, ignore precision of subnormals and inputs from
	1182	// __finite_max_v/2 to __finite_max_v. This removes all
	1183	// branching/masking.
	1184	if constexpr (true)
	1185	#else
	1186	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
	1187	&& all_of(isnormal(__y))
	1188	&& all_of(isnormal(__z))))
	1189	#endif
	1190	{
	1191	const _V __hi_exp = __hi & __inf;
	1192	//((__hi + __hi) & __inf) ^ __inf almost works for computing
	1193	//__scale, except when (__hi + __hi) & __inf == __inf, in which
	1194	// case __scale
	1195	// becomes 0 (should be min/2 instead) and thus loses the
	1196	// information from __lo.
	1197	#ifdef __FAST_MATH__
	1198	using _Ip = __int_for_sizeof_t<_Tp>;
	1199	using _IV = rebind_simd_t<_Ip, _V>;
	1200	const auto __as_int = __bit_cast<_IV>(__hi_exp);
	1201	const _V __scale
	1202	= __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
	1203	#else
	1204	const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
	1205	#endif
	1206	constexpr _Tp __mant_mask
	1207	= __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
	1208	const _V __h1 = (__hi & _V(__mant_mask)) \| _V(1);
	1209	__l0 *= __scale;
	1210	__l1 *= __scale;
	1211	const _V __lo
	1212	= __l0 * __l0
	1213	+ __l1 * __l1; // add the two smaller values first
	1214	return __hi_exp * sqrt(__lo + __h1 * __h1);
	1215	}
	1216	else
	1217	{
	1218	// slower path to support subnormals
	1219	// if __hi is subnormal, avoid scaling by inf & final mul by 0
	1220	// (which yields NaN) by using min()
	1221	_V __scale = _V(1 / __norm_min_v<_Tp>);
	1222	// invert exponent w/o error and w/o using the slow divider
	1223	// unit: xor inverts the exponent but off by 1. Multiplication
	1224	// with .5 adjusts for the discrepancy.
	1225	where(__hi >= __norm_min_v<_Tp>, __scale)
	1226	= ((__hi & __inf) ^ __inf) * _Tp(.5);
	1227	// adjust final exponent for subnormal inputs
	1228	_V __hi_exp = __norm_min_v<_Tp>;
	1229	where(__hi >= __norm_min_v<_Tp>, __hi_exp)
	1230	= __hi & __inf; // no error
	1231	_V __h1 = __hi * __scale; // no error
	1232	__l0 *= __scale; // no error
	1233	__l1 *= __scale; // no error
	1234	_V __lo = __l0 * __l0
	1235	+ __l1 * __l1; // add the two smaller values first
	1236	_V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
	1237	#ifdef __STDC_IEC_559__
	1238	// fixup for Annex F requirements
	1239	_V __fixup = __hi; // __lo == 0
	1240	// where(__lo == 0, __fixup) = __hi;
	1241	where(isunordered(__x, __y + __z), __fixup)
	1242	= __quiet_NaN_v<_Tp>;
	1243	where(isinf(__absx) \|\| isinf(__absy) \|\| isinf(__absz), __fixup)
	1244	= __inf;
	1245	// Instead of __lo == 0, the following could depend on __h1² ==
	1246	// __h1² + __lo (i.e. __hi is so much larger than the other two
	1247	// inputs that the result is exactly __hi). While this may
	1248	// improve precision, it is likely to reduce efficiency if the
	1249	// ISA has FMAs (because __h1² + __lo is an FMA, but the
	1250	// intermediate
	1251	// __h1² must be kept)
	1252	where(!(__lo == 0 \|\| isunordered(__x, __y + __z)
	1253	\|\| isinf(__absx) \|\| isinf(__absy) \|\| isinf(__absz)),
	1254	__fixup)
	1255	= __r;
	1256	__r = __fixup;
	1257	#endif
	1258	return __r;
	1259	}
	1260	}
	1261	}
	1262	}
	1263
	1264	template <typename _Tp, typename _Abi>
	1265	_GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
	1266	hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
	1267	const simd<_Tp, _Abi>& __z)
	1268	{
	1269	return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
	1270	const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
	1271	__y,
	1272	__z);
	1273	}
	1274
	1275	_GLIBCXX_SIMD_CVTING3(hypot)
	1276
	1277	_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
	1278
	1279	_GLIBCXX_SIMD_MATH_CALL_(sqrt)
	1280	_GLIBCXX_SIMD_MATH_CALL_(erf)
	1281	_GLIBCXX_SIMD_MATH_CALL_(erfc)
	1282	_GLIBCXX_SIMD_MATH_CALL_(lgamma)
	1283	_GLIBCXX_SIMD_MATH_CALL_(tgamma)
	1284	_GLIBCXX_SIMD_MATH_CALL_(ceil)
	1285	_GLIBCXX_SIMD_MATH_CALL_(floor)
	1286	_GLIBCXX_SIMD_MATH_CALL_(nearbyint)
	1287	_GLIBCXX_SIMD_MATH_CALL_(rint)
	1288	_GLIBCXX_SIMD_MATH_CALL_(lrint)
	1289	_GLIBCXX_SIMD_MATH_CALL_(llrint)
	1290
	1291	_GLIBCXX_SIMD_MATH_CALL_(round)
	1292	_GLIBCXX_SIMD_MATH_CALL_(lround)
	1293	_GLIBCXX_SIMD_MATH_CALL_(llround)
	1294
	1295	_GLIBCXX_SIMD_MATH_CALL_(trunc)
	1296
	1297	_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
	1298	_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
	1299	_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
	1300
	1301	template <typename _Tp, typename _Abi>
	1302	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1303	copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
	1304	{
	1305	if constexpr (simd_size_v<_Tp, _Abi> == 1)
	1306	return std::copysign(__x[0], __y[0]);
	1307	else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
	1308	// Remove this case once __bit_cast is implemented via __builtin_bit_cast.
	1309	// It is necessary, because __signmask below cannot be computed at compile
	1310	// time.
	1311	return simd<_Tp, _Abi>(
	1312	[&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
	1313	else
	1314	{
	1315	using _V = simd<_Tp, _Abi>;
	1316	using namespace std::experimental::__float_bitwise_operators;
	1317	_GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
	1318	return (__x & (__x ^ __signmask)) \| (__y & __signmask);
	1319	}
	1320	}
	1321
	1322	_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
	1323	// not covered in [parallel.simd.math]:
	1324	// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
	1325	_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
	1326	_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
	1327	_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
	1328
	1329	_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
	1330	_GLIBCXX_SIMD_MATH_CALL_(fpclassify)
	1331	_GLIBCXX_SIMD_MATH_CALL_(isfinite)
	1332
	1333	// isnan and isinf require special treatment because old glibc may declare
	1334	// `int isinf(double)`.
	1335	template <typename _Tp, typename _Abi, typename...,
	1336	typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
	1337	enable_if_t<is_floating_point_v<_Tp>, _R>
	1338	isinf(simd<_Tp, _Abi> __x)
	1339	{ return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
	1340
	1341	template <typename _Tp, typename _Abi, typename...,
	1342	typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
	1343	enable_if_t<is_floating_point_v<_Tp>, _R>
	1344	isnan(simd<_Tp, _Abi> __x)
	1345	{ return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
	1346
	1347	_GLIBCXX_SIMD_MATH_CALL_(isnormal)
	1348
	1349	template <typename..., typename _Tp, typename _Abi>
	1350	simd_mask<_Tp, _Abi>
	1351	signbit(simd<_Tp, _Abi> __x)
	1352	{
	1353	if constexpr (is_integral_v<_Tp>)
	1354	{
	1355	if constexpr (is_unsigned_v<_Tp>)
	1356	return simd_mask<_Tp, _Abi>{}; // false
	1357	else
	1358	return __x < 0;
	1359	}
	1360	else
	1361	return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
	1362	}
	1363
	1364	_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
	1365	_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
	1366	_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
	1367	_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
	1368	_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
	1369	_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
	1370
	1371	/* not covered in [parallel.simd.math]
	1372	template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
	1373	template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
	1374	template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
	1375
	1376	template <typename _V> struct simd_div_t {
	1377	_V quot, rem;
	1378	};
	1379
	1380	template <typename _Abi>
	1381	simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
	1382	_SCharv<_Abi> denom);
	1383	template <typename _Abi>
	1384	simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
	1385	__shortv<_Abi> denom);
	1386	template <typename _Abi>
	1387	simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
	1388	template <typename _Abi>
	1389	simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
	1390	__longv<_Abi> denom);
	1391	template <typename _Abi>
	1392	simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
	1393	__llongv<_Abi> denom);
	1394	*/
	1395
	1396	// special math {{{
	1397	template <typename _Tp, typename _Abi>
	1398	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1399	assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
	1400	const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
	1401	const simd<_Tp, _Abi>& __x)
	1402	{
	1403	return simd<_Tp, _Abi>([&](auto __i) {
	1404	return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
	1405	});
	1406	}
	1407
	1408	template <typename _Tp, typename _Abi>
	1409	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1410	assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
	1411	const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
	1412	const simd<_Tp, _Abi>& __x)
	1413	{
	1414	return simd<_Tp, _Abi>([&](auto __i) {
	1415	return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
	1416	});
	1417	}
	1418
	1419	_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
	1420	_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
	1421	_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
	1422	_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
	1423	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
	1424	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
	1425	_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
	1426	_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
	1427	_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
	1428	_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
	1429	_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
	1430	_GLIBCXX_SIMD_MATH_CALL_(expint)
	1431
	1432	template <typename _Tp, typename _Abi>
	1433	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1434	hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
	1435	const simd<_Tp, _Abi>& __x)
	1436	{
	1437	return simd<_Tp, _Abi>(
	1438	[&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
	1439	}
	1440
	1441	template <typename _Tp, typename _Abi>
	1442	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1443	laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
	1444	const simd<_Tp, _Abi>& __x)
	1445	{
	1446	return simd<_Tp, _Abi>(
	1447	[&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
	1448	}
	1449
	1450	template <typename _Tp, typename _Abi>
	1451	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1452	legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
	1453	const simd<_Tp, _Abi>& __x)
	1454	{
	1455	return simd<_Tp, _Abi>(
	1456	[&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
	1457	}
	1458
	1459	_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
	1460
	1461	template <typename _Tp, typename _Abi>
	1462	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1463	sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
	1464	const simd<_Tp, _Abi>& __x)
	1465	{
	1466	return simd<_Tp, _Abi>(
	1467	[&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
	1468	}
	1469
	1470	template <typename _Tp, typename _Abi>
	1471	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1472	sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
	1473	const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
	1474	const simd<_Tp, _Abi>& theta)
	1475	{
	1476	return simd<_Tp, _Abi>([&](auto __i) {
	1477	return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
	1478	});
	1479	}
	1480
	1481	template <typename _Tp, typename _Abi>
	1482	enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
	1483	sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
	1484	const simd<_Tp, _Abi>& __x)
	1485	{
	1486	return simd<_Tp, _Abi>(
	1487	[&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
	1488	}
	1489	// }}}
	1490
	1491	#undef _GLIBCXX_SIMD_MATH_CALL_
	1492	#undef _GLIBCXX_SIMD_MATH_CALL2_
	1493	#undef _GLIBCXX_SIMD_MATH_CALL3_
	1494
	1495	_GLIBCXX_SIMD_END_NAMESPACE
	1496
	1497	#endif // __cplusplus >= 201703L
	1498	#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
	1499
	1500	// vim: foldmethod=marker sw=2 ts=8 noet sts=2

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: Daodan/MSYS2/mingw32/include/c++/11.2.0/experimental/bits/simd_math.h

Download in other formats: