Context Navigation

simd.h

Last change on this file was 1166, checked in by rossy, 3 years ago
Daodan: Replace MinGW build env with an up-to-date MSYS2 env
File size: 161.1 KB

Line
1	// Definition of the public simd interfaces -- C++ --
2
3	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4	//
5	// This file is part of the GNU ISO C++ Library. This library is free
6	// software; you can redistribute it and/or modify it under the
7	// terms of the GNU General Public License as published by the
8	// Free Software Foundation; either version 3, or (at your option)
9	// any later version.
10
11	// This library is distributed in the hope that it will be useful,
12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	// GNU General Public License for more details.
15
16	// Under Section 7 of GPL version 3, you are granted additional
17	// permissions described in the GCC Runtime Library Exception, version
18	// 3.1, as published by the Free Software Foundation.
19
20	// You should have received a copy of the GNU General Public License and
21	// a copy of the GCC Runtime Library Exception along with this program;
22	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23	// <http://www.gnu.org/licenses/>.
24
25	#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26	#define _GLIBCXX_EXPERIMENTAL_SIMD_H
27
28	#if __cplusplus >= 201703L
29
30	#include "simd_detail.h"
31	#include "numeric_traits.h"
32	#include <bit>
33	#include <bitset>
34	#ifdef _GLIBCXX_DEBUG_UB
35	#include <cstdio> // for stderr
36	#endif
37	#include <cstring>
38	#include <functional>
39	#include <iosfwd>
40	#include <utility>
41
42	#if _GLIBCXX_SIMD_X86INTRIN
43	#include <x86intrin.h>
44	#elif _GLIBCXX_SIMD_HAVE_NEON
45	#include <arm_neon.h>
46	#endif
47
48	/* There are several closely related types, with the following naming
49	* convention:
50	* _Tp: vectorizable (arithmetic) type (or any type)
51	* _TV: __vector_type_t<_Tp, _Np>
52	* _TW: _SimdWrapper<_Tp, _Np>
53	* _TI: __intrinsic_type_t<_Tp, _Np>
54	* _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
55	* If one additional type is needed use _U instead of _T.
56	* Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
57	*
58	* More naming conventions:
59	* _Ap or _Abi: An ABI tag from the simd_abi namespace
60	* _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
61	* _IV, _IW as for _TV, _TW
62	* _Np: number of elements (not bytes)
63	* _Bytes: number of bytes
64	*
65	* Variable names:
66	* __k: mask object (vector- or bitmask)
67	*/
68	_GLIBCXX_SIMD_BEGIN_NAMESPACE
69
70	#if !_GLIBCXX_SIMD_X86INTRIN
71	using __m128 [[__gnu__::__vector_size__(16)]] = float;
72	using __m128d [[__gnu__::__vector_size__(16)]] = double;
73	using __m128i [[__gnu__::__vector_size__(16)]] = long long;
74	using __m256 [[__gnu__::__vector_size__(32)]] = float;
75	using __m256d [[__gnu__::__vector_size__(32)]] = double;
76	using __m256i [[__gnu__::__vector_size__(32)]] = long long;
77	using __m512 [[__gnu__::__vector_size__(64)]] = float;
78	using __m512d [[__gnu__::__vector_size__(64)]] = double;
79	using __m512i [[__gnu__::__vector_size__(64)]] = long long;
80	#endif
81
82	namespace simd_abi {
83	// simd_abi forward declarations {{{
84	// implementation details:
85	struct _Scalar;
86
87	template <int _Np>
88	struct _Fixed;
89
90	// There are two major ABIs that appear on different architectures.
91	// Both have non-boolean values packed into an N Byte register
92	// -> #elements = N / sizeof(T)
93	// Masks differ:
94	// 1. Use value vector registers for masks (all 0 or all 1)
95	// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
96	// value vector
97	//
98	// Both can be partially used, masking off the rest when doing horizontal
99	// operations or operations that can trap (e.g. FP_INVALID or integer division
100	// by 0). This is encoded as the number of used bytes.
101	template <int _UsedBytes>
102	struct _VecBuiltin;
103
104	template <int _UsedBytes>
105	struct _VecBltnBtmsk;
106
107	template <typename _Tp, int _Np>
108	using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
109
110	template <int _UsedBytes = 16>
111	using _Sse = _VecBuiltin<_UsedBytes>;
112
113	template <int _UsedBytes = 32>
114	using _Avx = _VecBuiltin<_UsedBytes>;
115
116	template <int _UsedBytes = 64>
117	using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
118
119	template <int _UsedBytes = 16>
120	using _Neon = _VecBuiltin<_UsedBytes>;
121
122	// implementation-defined:
123	using __sse = _Sse<>;
124	using __avx = _Avx<>;
125	using __avx512 = _Avx512<>;
126	using __neon = _Neon<>;
127	using __neon128 = _Neon<16>;
128	using __neon64 = _Neon<8>;
129
130	// standard:
131	template <typename _Tp, size_t _Np, typename...>
132	struct deduce;
133
134	template <int _Np>
135	using fixed_size = _Fixed<_Np>;
136
137	using scalar = _Scalar;
138
139	// }}}
140	} // namespace simd_abi
141	// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
142	template <typename _Tp>
143	struct is_simd;
144
145	template <typename _Tp>
146	struct is_simd_mask;
147
148	template <typename _Tp, typename _Abi>
149	class simd;
150
151	template <typename _Tp, typename _Abi>
152	class simd_mask;
153
154	template <typename _Tp, typename _Abi>
155	struct simd_size;
156
157	// }}}
158	// load/store flags {{{
159	struct element_aligned_tag
160	{
161	template <typename _Tp, typename _Up = typename _Tp::value_type>
162	static constexpr size_t _S_alignment = alignof(_Up);
163
164	template <typename _Tp, typename _Up>
165	_GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
166	_S_apply(_Up* __ptr)
167	{ return __ptr; }
168	};
169
170	struct vector_aligned_tag
171	{
172	template <typename _Tp, typename _Up = typename _Tp::value_type>
173	static constexpr size_t _S_alignment
174	= std::__bit_ceil(sizeof(_Up) * _Tp::size());
175
176	template <typename _Tp, typename _Up>
177	_GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
178	_S_apply(_Up* __ptr)
179	{
180	return static_cast<_Up*>(
181	__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>));
182	}
183	};
184
185	template <size_t _Np> struct overaligned_tag
186	{
187	template <typename _Tp, typename _Up = typename _Tp::value_type>
188	static constexpr size_t _S_alignment = _Np;
189
190	template <typename _Tp, typename _Up>
191	_GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
192	_S_apply(_Up* __ptr)
193	{ return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
194	};
195
196	inline constexpr element_aligned_tag element_aligned = {};
197
198	inline constexpr vector_aligned_tag vector_aligned = {};
199
200	template <size_t _Np>
201	inline constexpr overaligned_tag<_Np> overaligned = {};
202
203	// }}}
204	template <size_t _Xp>
205	using _SizeConstant = integral_constant<size_t, _Xp>;
206
207	namespace __detail
208	{
209	struct _Minimum
210	{
211	template <typename _Tp>
212	_GLIBCXX_SIMD_INTRINSIC constexpr
213	_Tp
214	operator()(_Tp __a, _Tp __b) const
215	{
216	using std::min;
217	return min(__a, __b);
218	}
219	};
220
221	struct _Maximum
222	{
223	template <typename _Tp>
224	_GLIBCXX_SIMD_INTRINSIC constexpr
225	_Tp
226	operator()(_Tp __a, _Tp __b) const
227	{
228	using std::max;
229	return max(__a, __b);
230	}
231	};
232	} // namespace __detail
233
234	// unrolled/pack execution helpers
235	// __execute_n_times{{{
236	template <typename _Fp, size_t... _I>
237	_GLIBCXX_SIMD_INTRINSIC constexpr void
238	__execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
239	{ ((void)__f(_SizeConstant<_I>()), ...); }
240
241	template <typename _Fp>
242	_GLIBCXX_SIMD_INTRINSIC constexpr void
243	__execute_on_index_sequence(_Fp&&, index_sequence<>)
244	{ }
245
246	template <size_t _Np, typename _Fp>
247	_GLIBCXX_SIMD_INTRINSIC constexpr void
248	__execute_n_times(_Fp&& __f)
249	{
250	__execute_on_index_sequence(static_cast<_Fp&&>(__f),
251	make_index_sequence<_Np>{});
252	}
253
254	// }}}
255	// __generate_from_n_evaluations{{{
256	template <typename _R, typename _Fp, size_t... _I>
257	_GLIBCXX_SIMD_INTRINSIC constexpr _R
258	__execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
259	{ return _R{__f(_SizeConstant<_I>())...}; }
260
261	template <size_t _Np, typename _R, typename _Fp>
262	_GLIBCXX_SIMD_INTRINSIC constexpr _R
263	__generate_from_n_evaluations(_Fp&& __f)
264	{
265	return __execute_on_index_sequence_with_return<_R>(
266	static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
267	}
268
269	// }}}
270	// __call_with_n_evaluations{{{
271	template <size_t... _I, typename _F0, typename _FArgs>
272	_GLIBCXX_SIMD_INTRINSIC constexpr auto
273	__call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
274	{ return __f0(__fargs(_SizeConstant<_I>())...); }
275
276	template <size_t _Np, typename _F0, typename _FArgs>
277	_GLIBCXX_SIMD_INTRINSIC constexpr auto
278	__call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
279	{
280	return __call_with_n_evaluations(make_index_sequence<_Np>{},
281	static_cast<_F0&&>(__f0),
282	static_cast<_FArgs&&>(__fargs));
283	}
284
285	// }}}
286	// __call_with_subscripts{{{
287	template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
288	_GLIBCXX_SIMD_INTRINSIC constexpr auto
289	__call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
290	{ return __fun(__x[_First + _It]...); }
291
292	template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
293	_GLIBCXX_SIMD_INTRINSIC constexpr auto
294	__call_with_subscripts(_Tp&& __x, _Fp&& __fun)
295	{
296	return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
297	make_index_sequence<_Np>(),
298	static_cast<_Fp&&>(__fun));
299	}
300
301	// }}}
302
303	// vvv ---- type traits ---- vvv
304	// integer type aliases{{{
305	using _UChar = unsigned char;
306	using _SChar = signed char;
307	using _UShort = unsigned short;
308	using _UInt = unsigned int;
309	using _ULong = unsigned long;
310	using _ULLong = unsigned long long;
311	using _LLong = long long;
312
313	//}}}
314	// __first_of_pack{{{
315	template <typename _T0, typename...>
316	struct __first_of_pack
317	{ using type = _T0; };
318
319	template <typename... _Ts>
320	using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
321
322	//}}}
323	// __value_type_or_identity_t {{{
324	template <typename _Tp>
325	typename _Tp::value_type
326	__value_type_or_identity_impl(int);
327
328	template <typename _Tp>
329	_Tp
330	__value_type_or_identity_impl(float);
331
332	template <typename _Tp>
333	using __value_type_or_identity_t
334	= decltype(__value_type_or_identity_impl<_Tp>(int()));
335
336	// }}}
337	// __is_vectorizable {{{
338	template <typename _Tp>
339	struct __is_vectorizable : public is_arithmetic<_Tp> {};
340
341	template <>
342	struct __is_vectorizable<bool> : public false_type {};
343
344	template <typename _Tp>
345	inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
346
347	// Deduces to a vectorizable type
348	template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
349	using _Vectorizable = _Tp;
350
351	// }}}
352	// _LoadStorePtr / __is_possible_loadstore_conversion {{{
353	template <typename _Ptr, typename _ValueType>
354	struct __is_possible_loadstore_conversion
355	: conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
356
357	template <>
358	struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
359
360	// Deduces to a type allowed for load/store with the given value type.
361	template <typename _Ptr, typename _ValueType,
362	typename = enable_if_t<
363	__is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
364	using _LoadStorePtr = _Ptr;
365
366	// }}}
367	// __is_bitmask{{{
368	template <typename _Tp, typename = void_t<>>
369	struct __is_bitmask : false_type {};
370
371	template <typename _Tp>
372	inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
373
374	// the __mmaskXX case:
375	template <typename _Tp>
376	struct __is_bitmask<_Tp,
377	void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
378	: true_type {};
379
380	// }}}
381	// __int_for_sizeof{{{
382	#pragma GCC diagnostic push
383	#pragma GCC diagnostic ignored "-Wpedantic"
384	template <size_t _Bytes>
385	constexpr auto
386	__int_for_sizeof()
387	{
388	if constexpr (_Bytes == sizeof(int))
389	return int();
390	#ifdef __clang__
391	else if constexpr (_Bytes == sizeof(char))
392	return char();
393	#else
394	else if constexpr (_Bytes == sizeof(_SChar))
395	return _SChar();
396	#endif
397	else if constexpr (_Bytes == sizeof(short))
398	return short();
399	#ifndef __clang__
400	else if constexpr (_Bytes == sizeof(long))
401	return long();
402	#endif
403	else if constexpr (_Bytes == sizeof(_LLong))
404	return _LLong();
405	#ifdef __SIZEOF_INT128__
406	else if constexpr (_Bytes == sizeof(__int128))
407	return __int128();
408	#endif // __SIZEOF_INT128__
409	else if constexpr (_Bytes % sizeof(int) == 0)
410	{
411	constexpr size_t _Np = _Bytes / sizeof(int);
412	struct _Ip
413	{
414	int _M_data[_Np];
415
416	_GLIBCXX_SIMD_INTRINSIC constexpr _Ip
417	operator&(_Ip __rhs) const
418	{
419	return __generate_from_n_evaluations<_Np, _Ip>(
420	[&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
421	}
422
423	_GLIBCXX_SIMD_INTRINSIC constexpr _Ip
424	operator\|(_Ip __rhs) const
425	{
426	return __generate_from_n_evaluations<_Np, _Ip>(
427	[&](auto __i) { return __rhs._M_data[__i] \| _M_data[__i]; });
428	}
429
430	_GLIBCXX_SIMD_INTRINSIC constexpr _Ip
431	operator^(_Ip __rhs) const
432	{
433	return __generate_from_n_evaluations<_Np, _Ip>(
434	[&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
435	}
436
437	_GLIBCXX_SIMD_INTRINSIC constexpr _Ip
438	operator~() const
439	{
440	return __generate_from_n_evaluations<_Np, _Ip>(
441	[&](auto __i) { return ~_M_data[__i]; });
442	}
443	};
444	return _Ip{};
445	}
446	else
447	static_assert(_Bytes != _Bytes, "this should be unreachable");
448	}
449	#pragma GCC diagnostic pop
450
451	template <typename _Tp>
452	using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
453
454	template <size_t _Np>
455	using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
456
457	// }}}
458	// __is_fixed_size_abi{{{
459	template <typename _Tp>
460	struct __is_fixed_size_abi : false_type {};
461
462	template <int _Np>
463	struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
464
465	template <typename _Tp>
466	inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
467
468	// }}}
469	// constexpr feature detection{{{
470	constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
471	constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
472	constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
473	constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
474	constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
475	constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
476	constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
477	constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
478	constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
479	constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
480	constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
481	constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
482	constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
483	constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
484	constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
485	constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
486	constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
487	constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
488	constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
489	constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
490	constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
491	constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
492	constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
493	constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
494
495	constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
496	constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
497	constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
498	constexpr inline bool __support_neon_float =
499	#if defined __GCC_IEC_559
500	__GCC_IEC_559 == 0;
501	#elif defined __FAST_MATH__
502	true;
503	#else
504	false;
505	#endif
506
507	#ifdef _ARCH_PWR10
508	constexpr inline bool __have_power10vec = true;
509	#else
510	constexpr inline bool __have_power10vec = false;
511	#endif
512	#ifdef __POWER9_VECTOR__
513	constexpr inline bool __have_power9vec = true;
514	#else
515	constexpr inline bool __have_power9vec = false;
516	#endif
517	#if defined __POWER8_VECTOR__
518	constexpr inline bool __have_power8vec = true;
519	#else
520	constexpr inline bool __have_power8vec = __have_power9vec;
521	#endif
522	#if defined __VSX__
523	constexpr inline bool __have_power_vsx = true;
524	#else
525	constexpr inline bool __have_power_vsx = __have_power8vec;
526	#endif
527	#if defined __ALTIVEC__
528	constexpr inline bool __have_power_vmx = true;
529	#else
530	constexpr inline bool __have_power_vmx = __have_power_vsx;
531	#endif
532
533	// }}}
534	// __is_scalar_abi {{{
535	template <typename _Abi>
536	constexpr bool
537	__is_scalar_abi()
538	{ return is_same_v<simd_abi::scalar, _Abi>; }
539
540	// }}}
541	// __abi_bytes_v {{{
542	template <template <int> class _Abi, int _Bytes>
543	constexpr int
544	__abi_bytes_impl(_Abi<_Bytes>*)
545	{ return _Bytes; }
546
547	template <typename _Tp>
548	constexpr int
549	__abi_bytes_impl(_Tp*)
550	{ return -1; }
551
552	template <typename _Abi>
553	inline constexpr int __abi_bytes_v
554	= __abi_bytes_impl(static_cast<_Abi*>(nullptr));
555
556	// }}}
557	// __is_builtin_bitmask_abi {{{
558	template <typename _Abi>
559	constexpr bool
560	__is_builtin_bitmask_abi()
561	{ return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
562
563	// }}}
564	// __is_sse_abi {{{
565	template <typename _Abi>
566	constexpr bool
567	__is_sse_abi()
568	{
569	constexpr auto _Bytes = __abi_bytes_v<_Abi>;
570	return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
571	}
572
573	// }}}
574	// __is_avx_abi {{{
575	template <typename _Abi>
576	constexpr bool
577	__is_avx_abi()
578	{
579	constexpr auto _Bytes = __abi_bytes_v<_Abi>;
580	return _Bytes > 16 && _Bytes <= 32
581	&& is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
582	}
583
584	// }}}
585	// __is_avx512_abi {{{
586	template <typename _Abi>
587	constexpr bool
588	__is_avx512_abi()
589	{
590	constexpr auto _Bytes = __abi_bytes_v<_Abi>;
591	return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
592	}
593
594	// }}}
595	// __is_neon_abi {{{
596	template <typename _Abi>
597	constexpr bool
598	__is_neon_abi()
599	{
600	constexpr auto _Bytes = __abi_bytes_v<_Abi>;
601	return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
602	}
603
604	// }}}
605	// __make_dependent_t {{{
606	template <typename, typename _Up>
607	struct __make_dependent
608	{ using type = _Up; };
609
610	template <typename _Tp, typename _Up>
611	using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
612
613	// }}}
614	// ^^^ ---- type traits ---- ^^^
615
616	// __invoke_ub{{{
617	template <typename... _Args>
618	[[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
619	__invoke_ub([[maybe_unused]] const char* __msg,
620	[[maybe_unused]] const _Args&... __args)
621	{
622	#ifdef _GLIBCXX_DEBUG_UB
623	__builtin_fprintf(stderr, __msg, __args...);
624	__builtin_trap();
625	#else
626	__builtin_unreachable();
627	#endif
628	}
629
630	// }}}
631	// __assert_unreachable{{{
632	template <typename _Tp>
633	struct __assert_unreachable
634	{ static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
635
636	// }}}
637	// __size_or_zero_v {{{
638	template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
639	constexpr size_t
640	__size_or_zero_dispatch(int)
641	{ return _Np; }
642
643	template <typename _Tp, typename _Ap>
644	constexpr size_t
645	__size_or_zero_dispatch(float)
646	{ return 0; }
647
648	template <typename _Tp, typename _Ap>
649	inline constexpr size_t __size_or_zero_v
650	= __size_or_zero_dispatch<_Tp, _Ap>(0);
651
652	// }}}
653	// __div_roundup {{{
654	inline constexpr size_t
655	__div_roundup(size_t __a, size_t __b)
656	{ return (__a + __b - 1) / __b; }
657
658	// }}}
659	// _ExactBool{{{
660	class _ExactBool
661	{
662	const bool _M_data;
663
664	public:
665	_GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {}
666
667	_ExactBool(int) = delete;
668
669	_GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; }
670	};
671
672	// }}}
673	// __may_alias{{{
674	/**@internal
675	* Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
676	* aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
677	* that support it).
678	*/
679	template <typename _Tp>
680	using __may_alias [[__gnu__::__may_alias__]] = _Tp;
681
682	// }}}
683	// _UnsupportedBase {{{
684	// simd and simd_mask base for unsupported <_Tp, _Abi>
685	struct _UnsupportedBase
686	{
687	_UnsupportedBase() = delete;
688	_UnsupportedBase(const _UnsupportedBase&) = delete;
689	_UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
690	~_UnsupportedBase() = delete;
691	};
692
693	// }}}
694	// _InvalidTraits {{{
695	/**
696	* @internal
697	* Defines the implementation of __a given <_Tp, _Abi>.
698	*
699	* Implementations must ensure that only valid <_Tp, _Abi> instantiations are
700	* possible. Static assertions in the type definition do not suffice. It is
701	* important that SFINAE works.
702	*/
703	struct _InvalidTraits
704	{
705	using _IsValid = false_type;
706	using _SimdBase = _UnsupportedBase;
707	using _MaskBase = _UnsupportedBase;
708
709	static constexpr size_t _S_full_size = 0;
710	static constexpr bool _S_is_partial = false;
711
712	static constexpr size_t _S_simd_align = 1;
713	struct _SimdImpl;
714	struct _SimdMember {};
715	struct _SimdCastType;
716
717	static constexpr size_t _S_mask_align = 1;
718	struct _MaskImpl;
719	struct _MaskMember {};
720	struct _MaskCastType;
721	};
722
723	// }}}
724	// _SimdTraits {{{
725	template <typename _Tp, typename _Abi, typename = void_t<>>
726	struct _SimdTraits : _InvalidTraits {};
727
728	// }}}
729	// __private_init, __bitset_init{{{
730	/**
731	* @internal
732	* Tag used for private init constructor of simd and simd_mask
733	*/
734	inline constexpr struct _PrivateInit {} __private_init = {};
735
736	inline constexpr struct _BitsetInit {} __bitset_init = {};
737
738	// }}}
739	// __is_narrowing_conversion<_From, _To>{{{
740	template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
741	bool = is_arithmetic_v<_To>>
742	struct __is_narrowing_conversion;
743
744	// ignore "signed/unsigned mismatch" in the following trait.
745	// The implicit conversions will do the right thing here.
746	template <typename _From, typename _To>
747	struct __is_narrowing_conversion<_From, _To, true, true>
748	: public __bool_constant<(
749	__digits_v<_From> > __digits_v<_To>
750	\|\| __finite_max_v<_From> > __finite_max_v<_To>
751	\|\| __finite_min_v<_From> < __finite_min_v<_To>
752	\|\| (is_signed_v<_From> && is_unsigned_v<_To>))> {};
753
754	template <typename _Tp>
755	struct __is_narrowing_conversion<_Tp, bool, true, true>
756	: public true_type {};
757
758	template <>
759	struct __is_narrowing_conversion<bool, bool, true, true>
760	: public false_type {};
761
762	template <typename _Tp>
763	struct __is_narrowing_conversion<_Tp, _Tp, true, true>
764	: public false_type {};
765
766	template <typename _From, typename _To>
767	struct __is_narrowing_conversion<_From, _To, false, true>
768	: public negation<is_convertible<_From, _To>> {};
769
770	// }}}
771	// __converts_to_higher_integer_rank{{{
772	template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
773	struct __converts_to_higher_integer_rank : public true_type {};
774
775	// this may fail for char -> short if sizeof(char) == sizeof(short)
776	template <typename _From, typename _To>
777	struct __converts_to_higher_integer_rank<_From, _To, false>
778	: public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
779
780	// }}}
781	// __data(simd/simd_mask) {{{
782	template <typename _Tp, typename _Ap>
783	_GLIBCXX_SIMD_INTRINSIC constexpr const auto&
784	__data(const simd<_Tp, _Ap>& __x);
785
786	template <typename _Tp, typename _Ap>
787	_GLIBCXX_SIMD_INTRINSIC constexpr auto&
788	__data(simd<_Tp, _Ap>& __x);
789
790	template <typename _Tp, typename _Ap>
791	_GLIBCXX_SIMD_INTRINSIC constexpr const auto&
792	__data(const simd_mask<_Tp, _Ap>& __x);
793
794	template <typename _Tp, typename _Ap>
795	_GLIBCXX_SIMD_INTRINSIC constexpr auto&
796	__data(simd_mask<_Tp, _Ap>& __x);
797
798	// }}}
799	// _SimdConverter {{{
800	template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
801	typename = void>
802	struct _SimdConverter;
803
804	template <typename _Tp, typename _Ap>
805	struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
806	{
807	template <typename _Up>
808	_GLIBCXX_SIMD_INTRINSIC const _Up&
809	operator()(const _Up& __x)
810	{ return __x; }
811	};
812
813	// }}}
814	// __to_value_type_or_member_type {{{
815	template <typename _V>
816	_GLIBCXX_SIMD_INTRINSIC constexpr auto
817	__to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
818	{ return __data(__x); }
819
820	template <typename _V>
821	_GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
822	__to_value_type_or_member_type(const typename _V::value_type& __x)
823	{ return __x; }
824
825	// }}}
826	// __bool_storage_member_type{{{
827	template <size_t _Size>
828	struct __bool_storage_member_type;
829
830	template <size_t _Size>
831	using __bool_storage_member_type_t =
832	typename __bool_storage_member_type<_Size>::type;
833
834	// }}}
835	// _SimdTuple {{{
836	// why not tuple?
837	// 1. tuple gives no guarantee about the storage order, but I require
838	// storage
839	// equivalent to array<_Tp, _Np>
840	// 2. direct access to the element type (first template argument)
841	// 3. enforces equal element type, only different _Abi types are allowed
842	template <typename _Tp, typename... _Abis>
843	struct _SimdTuple;
844
845	//}}}
846	// __fixed_size_storage_t {{{
847	template <typename _Tp, int _Np>
848	struct __fixed_size_storage;
849
850	template <typename _Tp, int _Np>
851	using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
852
853	// }}}
854	// _SimdWrapper fwd decl{{{
855	template <typename _Tp, size_t _Size, typename = void_t<>>
856	struct _SimdWrapper;
857
858	template <typename _Tp>
859	using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
860	template <typename _Tp>
861	using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
862	template <typename _Tp>
863	using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
864	template <typename _Tp>
865	using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
866
867	// }}}
868	// __is_simd_wrapper {{{
869	template <typename _Tp>
870	struct __is_simd_wrapper : false_type {};
871
872	template <typename _Tp, size_t _Np>
873	struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
874
875	template <typename _Tp>
876	inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
877
878	// }}}
879	// _BitOps {{{
880	struct _BitOps
881	{
882	// _S_bit_iteration {{{
883	template <typename _Tp, typename _Fp>
884	static void
885	_S_bit_iteration(_Tp __mask, _Fp&& __f)
886	{
887	static_assert(sizeof(_ULLong) >= sizeof(_Tp));
888	conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
889	if constexpr (is_convertible_v<_Tp, decltype(__k)>)
890	__k = __mask;
891	else
892	__k = __mask.to_ullong();
893	while(__k)
894	{
895	__f(std::__countr_zero(__k));
896	__k &= (__k - 1);
897	}
898	}
899
900	//}}}
901	};
902
903	//}}}
904	// __increment, __decrement {{{
905	template <typename _Tp = void>
906	struct __increment
907	{ constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
908
909	template <>
910	struct __increment<void>
911	{
912	template <typename _Tp>
913	constexpr _Tp
914	operator()(_Tp __a) const
915	{ return ++__a; }
916	};
917
918	template <typename _Tp = void>
919	struct __decrement
920	{ constexpr _Tp operator()(_Tp __a) const { return --__a; } };
921
922	template <>
923	struct __decrement<void>
924	{
925	template <typename _Tp>
926	constexpr _Tp
927	operator()(_Tp __a) const
928	{ return --__a; }
929	};
930
931	// }}}
932	// _ValuePreserving(OrInt) {{{
933	template <typename _From, typename _To,
934	typename = enable_if_t<negation<
935	__is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
936	using _ValuePreserving = _From;
937
938	template <typename _From, typename _To,
939	typename _DecayedFrom = __remove_cvref_t<_From>,
940	typename = enable_if_t<conjunction<
941	is_convertible<_From, _To>,
942	disjunction<
943	is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
944	conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
945	negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
946	using _ValuePreservingOrInt = _From;
947
948	// }}}
949	// __intrinsic_type {{{
950	template <typename _Tp, size_t _Bytes, typename = void_t<>>
951	struct __intrinsic_type;
952
953	template <typename _Tp, size_t _Size>
954	using __intrinsic_type_t =
955	typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
956
957	template <typename _Tp>
958	using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
959	template <typename _Tp>
960	using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
961	template <typename _Tp>
962	using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
963	template <typename _Tp>
964	using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
965	template <typename _Tp>
966	using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
967	template <typename _Tp>
968	using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
969
970	// }}}
971	// _BitMask {{{
972	template <size_t _Np, bool _Sanitized = false>
973	struct _BitMask;
974
975	template <size_t _Np, bool _Sanitized>
976	struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
977
978	template <size_t _Np>
979	using _SanitizedBitMask = _BitMask<_Np, true>;
980
981	template <size_t _Np, bool _Sanitized>
982	struct _BitMask
983	{
984	static_assert(_Np > 0);
985
986	static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
987
988	using _Tp = conditional_t<_Np == 1, bool,
989	make_unsigned_t<__int_with_sizeof_t<std::min(
990	sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
991
992	static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
993
994	_Tp _M_bits[_S_array_size];
995
996	static constexpr int _S_unused_bits
997	= _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
998
999	static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1000
1001	constexpr _BitMask() noexcept = default;
1002
1003	constexpr _BitMask(unsigned long long __x) noexcept
1004	: _M_bits{static_cast<_Tp>(__x)} {}
1005
1006	_BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1007
1008	constexpr _BitMask(const _BitMask&) noexcept = default;
1009
1010	template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1011	&& _Sanitized == true>>
1012	constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1013	: _BitMask(__rhs._M_sanitized()) {}
1014
1015	constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1016	{
1017	static_assert(_S_array_size == 1);
1018	return _M_bits[0];
1019	}
1020
1021	// precondition: is sanitized
1022	constexpr _Tp
1023	_M_to_bits() const noexcept
1024	{
1025	static_assert(_S_array_size == 1);
1026	return _M_bits[0];
1027	}
1028
1029	// precondition: is sanitized
1030	constexpr unsigned long long
1031	to_ullong() const noexcept
1032	{
1033	static_assert(_S_array_size == 1);
1034	return _M_bits[0];
1035	}
1036
1037	// precondition: is sanitized
1038	constexpr unsigned long
1039	to_ulong() const noexcept
1040	{
1041	static_assert(_S_array_size == 1);
1042	return _M_bits[0];
1043	}
1044
1045	constexpr bitset<_Np>
1046	_M_to_bitset() const noexcept
1047	{
1048	static_assert(_S_array_size == 1);
1049	return _M_bits[0];
1050	}
1051
1052	constexpr decltype(auto)
1053	_M_sanitized() const noexcept
1054	{
1055	if constexpr (_Sanitized)
1056	return *this;
1057	else if constexpr (_Np == 1)
1058	return _SanitizedBitMask<_Np>(_M_bits[0]);
1059	else
1060	{
1061	_SanitizedBitMask<_Np> __r = {};
1062	for (int __i = 0; __i < _S_array_size; ++__i)
1063	__r._M_bits[__i] = _M_bits[__i];
1064	if constexpr (_S_unused_bits > 0)
1065	__r._M_bits[_S_array_size - 1] &= _S_bitmask;
1066	return __r;
1067	}
1068	}
1069
1070	template <size_t _Mp, bool _LSanitized>
1071	constexpr _BitMask<_Np + _Mp, _Sanitized>
1072	_M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1073	{
1074	constexpr size_t _RN = _Np + _Mp;
1075	using _Rp = _BitMask<_RN, _Sanitized>;
1076	if constexpr (_Rp::_S_array_size == 1)
1077	{
1078	_Rp __r{{_M_bits[0]}};
1079	__r._M_bits[0] <<= _Mp;
1080	__r._M_bits[0] \|= __lsb._M_sanitized()._M_bits[0];
1081	return __r;
1082	}
1083	else
1084	__assert_unreachable<_Rp>();
1085	}
1086
1087	// Return a new _BitMask with size _NewSize while dropping _DropLsb least
1088	// significant bits. If the operation implicitly produces a sanitized bitmask,
1089	// the result type will have _Sanitized set.
1090	template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1091	constexpr auto
1092	_M_extract() const noexcept
1093	{
1094	static_assert(_Np > _DropLsb);
1095	static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1096	"not implemented for bitmasks larger than one ullong");
1097	if constexpr (_NewSize == 1)
1098	// must sanitize because the return _Tp is bool
1099	return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1100	else
1101	return _BitMask<_NewSize,
1102	((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1103	&& _NewSize + _DropLsb <= _Np)
1104	\|\| ((_Sanitized \|\| _Np == sizeof(_Tp) * __CHAR_BIT__)
1105	&& _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1106	>> _DropLsb);
1107	}
1108
1109	// True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1110	constexpr bool
1111	all() const noexcept
1112	{
1113	if constexpr (_Np == 1)
1114	return _M_bits[0];
1115	else if constexpr (!_Sanitized)
1116	return _M_sanitized().all();
1117	else
1118	{
1119	constexpr _Tp __allbits = ~_Tp();
1120	for (int __i = 0; __i < _S_array_size - 1; ++__i)
1121	if (_M_bits[__i] != __allbits)
1122	return false;
1123	return _M_bits[_S_array_size - 1] == _S_bitmask;
1124	}
1125	}
1126
1127	// True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1128	// false.
1129	constexpr bool
1130	any() const noexcept
1131	{
1132	if constexpr (_Np == 1)
1133	return _M_bits[0];
1134	else if constexpr (!_Sanitized)
1135	return _M_sanitized().any();
1136	else
1137	{
1138	for (int __i = 0; __i < _S_array_size - 1; ++__i)
1139	if (_M_bits[__i] != 0)
1140	return true;
1141	return _M_bits[_S_array_size - 1] != 0;
1142	}
1143	}
1144
1145	// True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1146	constexpr bool
1147	none() const noexcept
1148	{
1149	if constexpr (_Np == 1)
1150	return !_M_bits[0];
1151	else if constexpr (!_Sanitized)
1152	return _M_sanitized().none();
1153	else
1154	{
1155	for (int __i = 0; __i < _S_array_size - 1; ++__i)
1156	if (_M_bits[__i] != 0)
1157	return false;
1158	return _M_bits[_S_array_size - 1] == 0;
1159	}
1160	}
1161
1162	// Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1163	// false.
1164	constexpr int
1165	count() const noexcept
1166	{
1167	if constexpr (_Np == 1)
1168	return _M_bits[0];
1169	else if constexpr (!_Sanitized)
1170	return _M_sanitized().none();
1171	else
1172	{
1173	int __result = __builtin_popcountll(_M_bits[0]);
1174	for (int __i = 1; __i < _S_array_size; ++__i)
1175	__result += __builtin_popcountll(_M_bits[__i]);
1176	return __result;
1177	}
1178	}
1179
1180	// Returns the bit at offset __i as bool.
1181	constexpr bool
1182	operator[](size_t __i) const noexcept
1183	{
1184	if constexpr (_Np == 1)
1185	return _M_bits[0];
1186	else if constexpr (_S_array_size == 1)
1187	return (_M_bits[0] >> __i) & 1;
1188	else
1189	{
1190	const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1191	const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1192	return (_M_bits[__j] >> __shift) & 1;
1193	}
1194	}
1195
1196	template <size_t __i>
1197	constexpr bool
1198	operator[](_SizeConstant<__i>) const noexcept
1199	{
1200	static_assert(__i < _Np);
1201	constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1202	constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1203	return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1204	}
1205
1206	// Set the bit at offset __i to __x.
1207	constexpr void
1208	set(size_t __i, bool __x) noexcept
1209	{
1210	if constexpr (_Np == 1)
1211	_M_bits[0] = __x;
1212	else if constexpr (_S_array_size == 1)
1213	{
1214	_M_bits[0] &= ~_Tp(_Tp(1) << __i);
1215	_M_bits[0] \|= _Tp(_Tp(__x) << __i);
1216	}
1217	else
1218	{
1219	const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1220	const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1221	_M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1222	_M_bits[__j] \|= _Tp(_Tp(__x) << __shift);
1223	}
1224	}
1225
1226	template <size_t __i>
1227	constexpr void
1228	set(_SizeConstant<__i>, bool __x) noexcept
1229	{
1230	static_assert(__i < _Np);
1231	if constexpr (_Np == 1)
1232	_M_bits[0] = __x;
1233	else
1234	{
1235	constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1236	constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1237	constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1238	_M_bits[__j] &= __mask;
1239	_M_bits[__j] \|= _Tp(_Tp(__x) << __shift);
1240	}
1241	}
1242
1243	// Inverts all bits. Sanitized input leads to sanitized output.
1244	constexpr _BitMask
1245	operator~() const noexcept
1246	{
1247	if constexpr (_Np == 1)
1248	return !_M_bits[0];
1249	else
1250	{
1251	_BitMask __result{};
1252	for (int __i = 0; __i < _S_array_size - 1; ++__i)
1253	__result._M_bits[__i] = ~_M_bits[__i];
1254	if constexpr (_Sanitized)
1255	__result._M_bits[_S_array_size - 1]
1256	= _M_bits[_S_array_size - 1] ^ _S_bitmask;
1257	else
1258	__result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1259	return __result;
1260	}
1261	}
1262
1263	constexpr _BitMask&
1264	operator^=(const _BitMask& __b) & noexcept
1265	{
1266	__execute_n_times<_S_array_size>(
1267	[&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
1268	return *this;
1269	}
1270
1271	constexpr _BitMask&
1272	operator\|=(const _BitMask& __b) & noexcept
1273	{
1274	__execute_n_times<_S_array_size>(
1275	[&](auto __i) { _M_bits[__i] \|= __b._M_bits[__i]; });
1276	return *this;
1277	}
1278
1279	constexpr _BitMask&
1280	operator&=(const _BitMask& __b) & noexcept
1281	{
1282	__execute_n_times<_S_array_size>(
1283	[&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
1284	return *this;
1285	}
1286
1287	friend constexpr _BitMask
1288	operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1289	{
1290	_BitMask __r = __a;
1291	__r ^= __b;
1292	return __r;
1293	}
1294
1295	friend constexpr _BitMask
1296	operator\|(const _BitMask& __a, const _BitMask& __b) noexcept
1297	{
1298	_BitMask __r = __a;
1299	__r \|= __b;
1300	return __r;
1301	}
1302
1303	friend constexpr _BitMask
1304	operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1305	{
1306	_BitMask __r = __a;
1307	__r &= __b;
1308	return __r;
1309	}
1310
1311	_GLIBCXX_SIMD_INTRINSIC
1312	constexpr bool
1313	_M_is_constprop() const
1314	{
1315	if constexpr (_S_array_size == 0)
1316	return __builtin_constant_p(_M_bits[0]);
1317	else
1318	{
1319	for (int __i = 0; __i < _S_array_size; ++__i)
1320	if (!__builtin_constant_p(_M_bits[__i]))
1321	return false;
1322	return true;
1323	}
1324	}
1325	};
1326
1327	// }}}
1328
1329	// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1330	// __min_vector_size {{{
1331	template <typename _Tp = void>
1332	static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1333
1334	#if _GLIBCXX_SIMD_HAVE_NEON
1335	template <>
1336	inline constexpr int __min_vector_size<void> = 8;
1337	#else
1338	template <>
1339	inline constexpr int __min_vector_size<void> = 16;
1340	#endif
1341
1342	// }}}
1343	// __vector_type {{{
1344	template <typename _Tp, size_t _Np, typename = void>
1345	struct __vector_type_n {};
1346
1347	// substition failure for 0-element case
1348	template <typename _Tp>
1349	struct __vector_type_n<_Tp, 0, void> {};
1350
1351	// special case 1-element to be _Tp itself
1352	template <typename _Tp>
1353	struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1354	{ using type = _Tp; };
1355
1356	// else, use GNU-style builtin vector types
1357	template <typename _Tp, size_t _Np>
1358	struct __vector_type_n<_Tp, _Np,
1359	enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1360	{
1361	static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1362
1363	static constexpr size_t _S_Bytes =
1364	#ifdef __i386__
1365	// Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1366	// those objects are passed via MMX registers and nothing ever calls EMMS.
1367	_S_Np2 == 8 ? 16 :
1368	#endif
1369	_S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1370	: _S_Np2;
1371
1372	using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1373	};
1374
1375	template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1376	struct __vector_type;
1377
1378	template <typename _Tp, size_t _Bytes>
1379	struct __vector_type<_Tp, _Bytes, 0>
1380	: __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1381
1382	template <typename _Tp, size_t _Size>
1383	using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1384
1385	template <typename _Tp>
1386	using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1387	template <typename _Tp>
1388	using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1389	template <typename _Tp>
1390	using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1391	template <typename _Tp>
1392	using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1393	template <typename _Tp>
1394	using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1395	template <typename _Tp>
1396	using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1397
1398	// }}}
1399	// __is_vector_type {{{
1400	template <typename _Tp, typename = void_t<>>
1401	struct __is_vector_type : false_type {};
1402
1403	template <typename _Tp>
1404	struct __is_vector_type<
1405	_Tp, void_t<typename __vector_type<
1406	remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1407	: is_same<_Tp, typename __vector_type<
1408	remove_reference_t<decltype(declval<_Tp>()[0])>,
1409	sizeof(_Tp)>::type> {};
1410
1411	template <typename _Tp>
1412	inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1413
1414	// }}}
1415	// __is_intrinsic_type {{{
1416	#if _GLIBCXX_SIMD_HAVE_SSE_ABI
1417	template <typename _Tp>
1418	using __is_intrinsic_type = __is_vector_type<_Tp>;
1419	#else // not SSE (x86)
1420	template <typename _Tp, typename = void_t<>>
1421	struct __is_intrinsic_type : false_type {};
1422
1423	template <typename _Tp>
1424	struct __is_intrinsic_type<
1425	_Tp, void_t<typename __intrinsic_type<
1426	remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1427	: is_same<_Tp, typename __intrinsic_type<
1428	remove_reference_t<decltype(declval<_Tp>()[0])>,
1429	sizeof(_Tp)>::type> {};
1430	#endif
1431
1432	template <typename _Tp>
1433	inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1434
1435	// }}}
1436	// _VectorTraits{{{
1437	template <typename _Tp, typename = void_t<>>
1438	struct _VectorTraitsImpl;
1439
1440	template <typename _Tp>
1441	struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1442	\|\| __is_intrinsic_type_v<_Tp>>>
1443	{
1444	using type = _Tp;
1445	using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1446	static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1447	using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1448	template <typename _Up, int _W = _S_full_size>
1449	static constexpr bool _S_is
1450	= is_same_v<value_type, _Up> && _W == _S_full_size;
1451	};
1452
1453	template <typename _Tp, size_t _Np>
1454	struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1455	void_t<__vector_type_t<_Tp, _Np>>>
1456	{
1457	using type = __vector_type_t<_Tp, _Np>;
1458	using value_type = _Tp;
1459	static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1460	using _Wrapper = _SimdWrapper<_Tp, _Np>;
1461	static constexpr bool _S_is_partial = (_Np == _S_full_size);
1462	static constexpr int _S_partial_width = _Np;
1463	template <typename _Up, int _W = _S_full_size>
1464	static constexpr bool _S_is
1465	= is_same_v<value_type, _Up>&& _W == _S_full_size;
1466	};
1467
1468	template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1469	using _VectorTraits = _VectorTraitsImpl<_Tp>;
1470
1471	// }}}
1472	// __as_vector{{{
1473	template <typename _V>
1474	_GLIBCXX_SIMD_INTRINSIC constexpr auto
1475	__as_vector(_V __x)
1476	{
1477	if constexpr (__is_vector_type_v<_V>)
1478	return __x;
1479	else if constexpr (is_simd<_V>::value \|\| is_simd_mask<_V>::value)
1480	return __data(__x)._M_data;
1481	else if constexpr (__is_vectorizable_v<_V>)
1482	return __vector_type_t<_V, 2>{__x};
1483	else
1484	return __x._M_data;
1485	}
1486
1487	// }}}
1488	// __as_wrapper{{{
1489	template <size_t _Np = 0, typename _V>
1490	_GLIBCXX_SIMD_INTRINSIC constexpr auto
1491	__as_wrapper(_V __x)
1492	{
1493	if constexpr (__is_vector_type_v<_V>)
1494	return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1495	(_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1496	else if constexpr (is_simd<_V>::value \|\| is_simd_mask<_V>::value)
1497	{
1498	static_assert(_V::size() == _Np);
1499	return __data(__x);
1500	}
1501	else
1502	{
1503	static_assert(_V::_S_size == _Np);
1504	return __x;
1505	}
1506	}
1507
1508	// }}}
1509	// __intrin_bitcast{{{
1510	template <typename _To, typename _From>
1511	_GLIBCXX_SIMD_INTRINSIC constexpr _To
1512	__intrin_bitcast(_From __v)
1513	{
1514	static_assert((__is_vector_type_v<_From> \|\| __is_intrinsic_type_v<_From>)
1515	&& (__is_vector_type_v<_To> \|\| __is_intrinsic_type_v<_To>));
1516	if constexpr (sizeof(_To) == sizeof(_From))
1517	return reinterpret_cast<_To>(__v);
1518	else if constexpr (sizeof(_From) > sizeof(_To))
1519	if constexpr (sizeof(_To) >= 16)
1520	return reinterpret_cast<const __may_alias<_To>&>(__v);
1521	else
1522	{
1523	_To __r;
1524	__builtin_memcpy(&__r, &__v, sizeof(_To));
1525	return __r;
1526	}
1527	#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1528	else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1529	return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1530	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1531	else if constexpr (__have_avx512f && sizeof(_From) == 16
1532	&& sizeof(_To) == 64)
1533	return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1534	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1535	else if constexpr (__have_avx512f && sizeof(_From) == 32
1536	&& sizeof(_To) == 64)
1537	return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1538	reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1539	#endif // _GLIBCXX_SIMD_X86INTRIN
1540	else if constexpr (sizeof(__v) <= 8)
1541	return reinterpret_cast<_To>(
1542	__vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1543	reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1544	else
1545	{
1546	static_assert(sizeof(_To) > sizeof(_From));
1547	_To __r = {};
1548	__builtin_memcpy(&__r, &__v, sizeof(_From));
1549	return __r;
1550	}
1551	}
1552
1553	// }}}
1554	// __vector_bitcast{{{
1555	template <typename _To, size_t _NN = 0, typename _From,
1556	typename _FromVT = _VectorTraits<_From>,
1557	size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1558	_GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1559	__vector_bitcast(_From __x)
1560	{
1561	using _R = __vector_type_t<_To, _Np>;
1562	return __intrin_bitcast<_R>(__x);
1563	}
1564
1565	template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1566	size_t _Np
1567	= _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1568	_GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1569	__vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1570	{
1571	static_assert(_Np > 1);
1572	return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1573	}
1574
1575	// }}}
1576	// __convert_x86 declarations {{{
1577	#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1578	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1579	_To __convert_x86(_Tp);
1580
1581	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1582	_To __convert_x86(_Tp, _Tp);
1583
1584	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1585	_To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1586
1587	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1588	_To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1589
1590	template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1591	_To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1592	_Tp, _Tp, _Tp, _Tp);
1593	#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1594
1595	//}}}
1596	// __bit_cast {{{
1597	template <typename _To, typename _From>
1598	_GLIBCXX_SIMD_INTRINSIC constexpr _To
1599	__bit_cast(const _From __x)
1600	{
1601	// TODO: implement with / replace by __builtin_bit_cast ASAP
1602	static_assert(sizeof(_To) == sizeof(_From));
1603	constexpr bool __to_is_vectorizable
1604	= is_arithmetic_v<_To> \|\| is_enum_v<_To>;
1605	constexpr bool __from_is_vectorizable
1606	= is_arithmetic_v<_From> \|\| is_enum_v<_From>;
1607	if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1608	return reinterpret_cast<_To>(__x);
1609	else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1610	{
1611	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1612	return reinterpret_cast<_To>(_FV{__x});
1613	}
1614	else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1615	{
1616	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1617	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1618	return reinterpret_cast<_TV>(_FV{__x})[0];
1619	}
1620	else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1621	{
1622	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1623	return reinterpret_cast<_TV>(__x)[0];
1624	}
1625	else
1626	{
1627	_To __r;
1628	__builtin_memcpy(reinterpret_cast<char*>(&__r),
1629	reinterpret_cast<const char*>(&__x), sizeof(_To));
1630	return __r;
1631	}
1632	}
1633
1634	// }}}
1635	// __to_intrin {{{
1636	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1637	typename _R
1638	= __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1639	_GLIBCXX_SIMD_INTRINSIC constexpr _R
1640	__to_intrin(_Tp __x)
1641	{
1642	static_assert(sizeof(__x) <= sizeof(_R),
1643	"__to_intrin may never drop values off the end");
1644	if constexpr (sizeof(__x) == sizeof(_R))
1645	return reinterpret_cast<_R>(__as_vector(__x));
1646	else
1647	{
1648	using _Up = __int_for_sizeof_t<_Tp>;
1649	return reinterpret_cast<_R>(
1650	__vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1651	}
1652	}
1653
1654	// }}}
1655	// __make_vector{{{
1656	template <typename _Tp, typename... _Args>
1657	_GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1658	__make_vector(const _Args&... __args)
1659	{
1660	return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...};
1661	}
1662
1663	// }}}
1664	// __vector_broadcast{{{
1665	template <size_t _Np, typename _Tp>
1666	_GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1667	__vector_broadcast(_Tp __x)
1668	{
1669	return __call_with_n_evaluations<_Np>(
1670	[](auto... __xx) { return __vector_type_t<_Tp, _Np>{__xx...}; },
1671	[&__x](int) { return __x; });
1672	}
1673
1674	// }}}
1675	// __generate_vector{{{
1676	template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1677	_GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1678	__generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1679	{
1680	return __vector_type_t<_Tp, _Np>{
1681	static_cast<_Tp>(__gen(_SizeConstant<_I>()))...};
1682	}
1683
1684	template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1685	_GLIBCXX_SIMD_INTRINSIC constexpr _V
1686	__generate_vector(_Gp&& __gen)
1687	{
1688	if constexpr (__is_vector_type_v<_V>)
1689	return __generate_vector_impl<typename _VVT::value_type,
1690	_VVT::_S_full_size>(
1691	static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1692	else
1693	return __generate_vector_impl<typename _VVT::value_type,
1694	_VVT::_S_partial_width>(
1695	static_cast<_Gp&&>(__gen),
1696	make_index_sequence<_VVT::_S_partial_width>());
1697	}
1698
1699	template <typename _Tp, size_t _Np, typename _Gp>
1700	_GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1701	__generate_vector(_Gp&& __gen)
1702	{
1703	return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1704	make_index_sequence<_Np>());
1705	}
1706
1707	// }}}
1708	// __xor{{{
1709	template <typename _TW>
1710	_GLIBCXX_SIMD_INTRINSIC constexpr _TW
1711	__xor(_TW __a, _TW __b) noexcept
1712	{
1713	if constexpr (__is_vector_type_v<_TW> \|\| __is_simd_wrapper_v<_TW>)
1714	{
1715	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1716	_VectorTraitsImpl<_TW>>::value_type;
1717	if constexpr (is_floating_point_v<_Tp>)
1718	{
1719	using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1720	return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1721	^ __vector_bitcast<_Ip>(__b));
1722	}
1723	else if constexpr (__is_vector_type_v<_TW>)
1724	return __a ^ __b;
1725	else
1726	return __a._M_data ^ __b._M_data;
1727	}
1728	else
1729	return __a ^ __b;
1730	}
1731
1732	// }}}
1733	// __or{{{
1734	template <typename _TW>
1735	_GLIBCXX_SIMD_INTRINSIC constexpr _TW
1736	__or(_TW __a, _TW __b) noexcept
1737	{
1738	if constexpr (__is_vector_type_v<_TW> \|\| __is_simd_wrapper_v<_TW>)
1739	{
1740	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1741	_VectorTraitsImpl<_TW>>::value_type;
1742	if constexpr (is_floating_point_v<_Tp>)
1743	{
1744	using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1745	return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1746	\| __vector_bitcast<_Ip>(__b));
1747	}
1748	else if constexpr (__is_vector_type_v<_TW>)
1749	return __a \| __b;
1750	else
1751	return __a._M_data \| __b._M_data;
1752	}
1753	else
1754	return __a \| __b;
1755	}
1756
1757	// }}}
1758	// __and{{{
1759	template <typename _TW>
1760	_GLIBCXX_SIMD_INTRINSIC constexpr _TW
1761	__and(_TW __a, _TW __b) noexcept
1762	{
1763	if constexpr (__is_vector_type_v<_TW> \|\| __is_simd_wrapper_v<_TW>)
1764	{
1765	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1766	_VectorTraitsImpl<_TW>>::value_type;
1767	if constexpr (is_floating_point_v<_Tp>)
1768	{
1769	using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1770	return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1771	& __vector_bitcast<_Ip>(__b));
1772	}
1773	else if constexpr (__is_vector_type_v<_TW>)
1774	return __a & __b;
1775	else
1776	return __a._M_data & __b._M_data;
1777	}
1778	else
1779	return __a & __b;
1780	}
1781
1782	// }}}
1783	// __andnot{{{
1784	#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1785	static constexpr struct
1786	{
1787	_GLIBCXX_SIMD_INTRINSIC __v4sf
1788	operator()(__v4sf __a, __v4sf __b) const noexcept
1789	{ return __builtin_ia32_andnps(__a, __b); }
1790
1791	_GLIBCXX_SIMD_INTRINSIC __v2df
1792	operator()(__v2df __a, __v2df __b) const noexcept
1793	{ return __builtin_ia32_andnpd(__a, __b); }
1794
1795	_GLIBCXX_SIMD_INTRINSIC __v2di
1796	operator()(__v2di __a, __v2di __b) const noexcept
1797	{ return __builtin_ia32_pandn128(__a, __b); }
1798
1799	_GLIBCXX_SIMD_INTRINSIC __v8sf
1800	operator()(__v8sf __a, __v8sf __b) const noexcept
1801	{ return __builtin_ia32_andnps256(__a, __b); }
1802
1803	_GLIBCXX_SIMD_INTRINSIC __v4df
1804	operator()(__v4df __a, __v4df __b) const noexcept
1805	{ return __builtin_ia32_andnpd256(__a, __b); }
1806
1807	_GLIBCXX_SIMD_INTRINSIC __v4di
1808	operator()(__v4di __a, __v4di __b) const noexcept
1809	{
1810	if constexpr (__have_avx2)
1811	return __builtin_ia32_andnotsi256(__a, __b);
1812	else
1813	return reinterpret_cast<__v4di>(
1814	__builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1815	reinterpret_cast<__v4df>(__b)));
1816	}
1817
1818	_GLIBCXX_SIMD_INTRINSIC __v16sf
1819	operator()(__v16sf __a, __v16sf __b) const noexcept
1820	{
1821	if constexpr (__have_avx512dq)
1822	return _mm512_andnot_ps(__a, __b);
1823	else
1824	return reinterpret_cast<__v16sf>(
1825	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1826	reinterpret_cast<__v8di>(__b)));
1827	}
1828
1829	_GLIBCXX_SIMD_INTRINSIC __v8df
1830	operator()(__v8df __a, __v8df __b) const noexcept
1831	{
1832	if constexpr (__have_avx512dq)
1833	return _mm512_andnot_pd(__a, __b);
1834	else
1835	return reinterpret_cast<__v8df>(
1836	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1837	reinterpret_cast<__v8di>(__b)));
1838	}
1839
1840	_GLIBCXX_SIMD_INTRINSIC __v8di
1841	operator()(__v8di __a, __v8di __b) const noexcept
1842	{ return _mm512_andnot_si512(__a, __b); }
1843	} _S_x86_andnot;
1844	#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1845
1846	template <typename _TW>
1847	_GLIBCXX_SIMD_INTRINSIC constexpr _TW
1848	__andnot(_TW __a, _TW __b) noexcept
1849	{
1850	if constexpr (__is_vector_type_v<_TW> \|\| __is_simd_wrapper_v<_TW>)
1851	{
1852	using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1853	_VectorTraitsImpl<_TW>>;
1854	using _Tp = typename _TVT::value_type;
1855	#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1856	if constexpr (sizeof(_TW) >= 16)
1857	{
1858	const auto __ai = __to_intrin(__a);
1859	const auto __bi = __to_intrin(__b);
1860	if (!__builtin_is_constant_evaluated()
1861	&& !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1862	{
1863	const auto __r = _S_x86_andnot(__ai, __bi);
1864	if constexpr (is_convertible_v<decltype(__r), _TW>)
1865	return __r;
1866	else
1867	return reinterpret_cast<typename _TVT::type>(__r);
1868	}
1869	}
1870	#endif // _GLIBCXX_SIMD_X86INTRIN
1871	using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1872	return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
1873	& __vector_bitcast<_Ip>(__b));
1874	}
1875	else
1876	return ~__a & __b;
1877	}
1878
1879	// }}}
1880	// __not{{{
1881	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1882	_GLIBCXX_SIMD_INTRINSIC constexpr _Tp
1883	__not(_Tp __a) noexcept
1884	{
1885	if constexpr (is_floating_point_v<typename _TVT::value_type>)
1886	return reinterpret_cast<typename _TVT::type>(
1887	~__vector_bitcast<unsigned>(__a));
1888	else
1889	return ~__a;
1890	}
1891
1892	// }}}
1893	// __concat{{{
1894	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1895	typename _R = __vector_type_t<typename _TVT::value_type,
1896	_TVT::_S_full_size * 2>>
1897	constexpr _R
1898	__concat(_Tp a_, _Tp b_)
1899	{
1900	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
1901	using _W
1902	= conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
1903	conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
1904	long long, typename _TVT::value_type>>;
1905	constexpr int input_width = sizeof(_Tp) / sizeof(_W);
1906	const auto __a = __vector_bitcast<_W>(a_);
1907	const auto __b = __vector_bitcast<_W>(b_);
1908	using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
1909	#else
1910	constexpr int input_width = _TVT::_S_full_size;
1911	const _Tp& __a = a_;
1912	const _Tp& __b = b_;
1913	using _Up = _R;
1914	#endif
1915	if constexpr (input_width == 2)
1916	return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
1917	else if constexpr (input_width == 4)
1918	return reinterpret_cast<_R>(
1919	_Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
1920	else if constexpr (input_width == 8)
1921	return reinterpret_cast<_R>(
1922	_Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
1923	__b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
1924	else if constexpr (input_width == 16)
1925	return reinterpret_cast<_R>(
1926	_Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1927	__a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1928	__a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
1929	__b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
1930	__b[12], __b[13], __b[14], __b[15]});
1931	else if constexpr (input_width == 32)
1932	return reinterpret_cast<_R>(
1933	_Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1934	__a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1935	__a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
1936	__a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
1937	__a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
1938	__b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
1939	__b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
1940	__b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
1941	__b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
1942	__b[31]});
1943	}
1944
1945	// }}}
1946	// __zero_extend {{{
1947	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1948	struct _ZeroExtendProxy
1949	{
1950	using value_type = typename _TVT::value_type;
1951	static constexpr size_t _Np = _TVT::_S_full_size;
1952	const _Tp __x;
1953
1954	template <typename _To, typename _ToVT = _VectorTraits<_To>,
1955	typename
1956	= enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
1957	_GLIBCXX_SIMD_INTRINSIC operator _To() const
1958	{
1959	constexpr size_t _ToN = _ToVT::_S_full_size;
1960	if constexpr (_ToN == _Np)
1961	return __x;
1962	else if constexpr (_ToN == 2 * _Np)
1963	{
1964	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1965	if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
1966	return __vector_bitcast<value_type>(
1967	_mm256_insertf128_ps(__m256(), __x, 0));
1968	else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
1969	return __vector_bitcast<value_type>(
1970	_mm256_insertf128_pd(__m256d(), __x, 0));
1971	else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
1972	return __vector_bitcast<value_type>(
1973	_mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
1974	else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
1975	{
1976	if constexpr (__have_avx512dq)
1977	return __vector_bitcast<value_type>(
1978	_mm512_insertf32x8(__m512(), __x, 0));
1979	else
1980	return reinterpret_cast<__m512>(
1981	_mm512_insertf64x4(__m512d(),
1982	reinterpret_cast<__m256d>(__x), 0));
1983	}
1984	else if constexpr (__have_avx512f
1985	&& _TVT::template _S_is<double, 4>)
1986	return __vector_bitcast<value_type>(
1987	_mm512_insertf64x4(__m512d(), __x, 0));
1988	else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
1989	return __vector_bitcast<value_type>(
1990	_mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
1991	#endif
1992	return __concat(__x, _Tp());
1993	}
1994	else if constexpr (_ToN == 4 * _Np)
1995	{
1996	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1997	if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
1998	{
1999	return __vector_bitcast<value_type>(
2000	_mm512_insertf64x2(__m512d(), __x, 0));
2001	}
2002	else if constexpr (__have_avx512f
2003	&& is_floating_point_v<value_type>)
2004	{
2005	return __vector_bitcast<value_type>(
2006	_mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2007	0));
2008	}
2009	else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2010	{
2011	return __vector_bitcast<value_type>(
2012	_mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2013	}
2014	#endif
2015	return __concat(__concat(__x, _Tp()),
2016	__vector_type_t<value_type, _Np * 2>());
2017	}
2018	else if constexpr (_ToN == 8 * _Np)
2019	return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2020	__vector_type_t<value_type, _Np * 4>());
2021	else if constexpr (_ToN == 16 * _Np)
2022	return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2023	__vector_type_t<value_type, _Np * 8>());
2024	else
2025	__assert_unreachable<_Tp>();
2026	}
2027	};
2028
2029	template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2030	_GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2031	__zero_extend(_Tp __x)
2032	{ return {__x}; }
2033
2034	// }}}
2035	// __extract<_Np, By>{{{
2036	template <int _Offset,
2037	int _SplitBy,
2038	typename _Tp,
2039	typename _TVT = _VectorTraits<_Tp>,
2040	typename _R = __vector_type_t<typename _TVT::value_type,
2041	_TVT::_S_full_size / _SplitBy>>
2042	_GLIBCXX_SIMD_INTRINSIC constexpr _R
2043	__extract(_Tp __in)
2044	{
2045	using value_type = typename _TVT::value_type;
2046	#if _GLIBCXX_SIMD_X86INTRIN // {{{
2047	if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2048	{
2049	if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2050	return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2051	else if constexpr (is_floating_point_v<value_type>)
2052	return __vector_bitcast<value_type>(
2053	_mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2054	else
2055	return reinterpret_cast<_R>(
2056	_mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2057	_Offset));
2058	}
2059	else
2060	#endif // _GLIBCXX_SIMD_X86INTRIN }}}
2061	{
2062	#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2063	using _W = conditional_t<
2064	is_floating_point_v<value_type>, double,
2065	conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2066	static_assert(sizeof(_R) % sizeof(_W) == 0);
2067	constexpr int __return_width = sizeof(_R) / sizeof(_W);
2068	using _Up = __vector_type_t<_W, __return_width>;
2069	const auto __x = __vector_bitcast<_W>(__in);
2070	#else
2071	constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2072	using _Up = _R;
2073	const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2074	= __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2075	#endif
2076	constexpr int _O = _Offset * __return_width;
2077	return __call_with_subscripts<__return_width, _O>(
2078	__x, [](auto... __entries) {
2079	return reinterpret_cast<_R>(_Up{__entries...});
2080	});
2081	}
2082	}
2083
2084	// }}}
2085	// __lo/__hi64[z]{{{
2086	template <typename _Tp,
2087	typename _R
2088	= __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2089	_GLIBCXX_SIMD_INTRINSIC constexpr _R
2090	__lo64(_Tp __x)
2091	{
2092	_R __r{};
2093	__builtin_memcpy(&__r, &__x, 8);
2094	return __r;
2095	}
2096
2097	template <typename _Tp,
2098	typename _R
2099	= __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2100	_GLIBCXX_SIMD_INTRINSIC constexpr _R
2101	__hi64(_Tp __x)
2102	{
2103	static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2104	_R __r{};
2105	__builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2106	return __r;
2107	}
2108
2109	template <typename _Tp,
2110	typename _R
2111	= __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2112	_GLIBCXX_SIMD_INTRINSIC constexpr _R
2113	__hi64z([[maybe_unused]] _Tp __x)
2114	{
2115	_R __r{};
2116	if constexpr (sizeof(_Tp) == 16)
2117	__builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2118	return __r;
2119	}
2120
2121	// }}}
2122	// __lo/__hi128{{{
2123	template <typename _Tp>
2124	_GLIBCXX_SIMD_INTRINSIC constexpr auto
2125	__lo128(_Tp __x)
2126	{ return __extract<0, sizeof(_Tp) / 16>(__x); }
2127
2128	template <typename _Tp>
2129	_GLIBCXX_SIMD_INTRINSIC constexpr auto
2130	__hi128(_Tp __x)
2131	{
2132	static_assert(sizeof(__x) == 32);
2133	return __extract<1, 2>(__x);
2134	}
2135
2136	// }}}
2137	// __lo/__hi256{{{
2138	template <typename _Tp>
2139	_GLIBCXX_SIMD_INTRINSIC constexpr auto
2140	__lo256(_Tp __x)
2141	{
2142	static_assert(sizeof(__x) == 64);
2143	return __extract<0, 2>(__x);
2144	}
2145
2146	template <typename _Tp>
2147	_GLIBCXX_SIMD_INTRINSIC constexpr auto
2148	__hi256(_Tp __x)
2149	{
2150	static_assert(sizeof(__x) == 64);
2151	return __extract<1, 2>(__x);
2152	}
2153
2154	// }}}
2155	// __auto_bitcast{{{
2156	template <typename _Tp>
2157	struct _AutoCast
2158	{
2159	static_assert(__is_vector_type_v<_Tp>);
2160
2161	const _Tp __x;
2162
2163	template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2164	_GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2165	{ return __intrin_bitcast<typename _UVT::type>(__x); }
2166	};
2167
2168	template <typename _Tp>
2169	_GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2170	__auto_bitcast(const _Tp& __x)
2171	{ return {__x}; }
2172
2173	template <typename _Tp, size_t _Np>
2174	_GLIBCXX_SIMD_INTRINSIC constexpr
2175	_AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2176	__auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2177	{ return {__x._M_data}; }
2178
2179	// }}}
2180	// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2181
2182	#if _GLIBCXX_SIMD_HAVE_SSE_ABI
2183	// __bool_storage_member_type{{{
2184	#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2185	template <size_t _Size>
2186	struct __bool_storage_member_type
2187	{
2188	static_assert((_Size & (_Size - 1)) != 0,
2189	"This trait may only be used for non-power-of-2 sizes. "
2190	"Power-of-2 sizes must be specialized.");
2191	using type =
2192	typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2193	};
2194
2195	template <>
2196	struct __bool_storage_member_type<1> { using type = bool; };
2197
2198	template <>
2199	struct __bool_storage_member_type<2> { using type = __mmask8; };
2200
2201	template <>
2202	struct __bool_storage_member_type<4> { using type = __mmask8; };
2203
2204	template <>
2205	struct __bool_storage_member_type<8> { using type = __mmask8; };
2206
2207	template <>
2208	struct __bool_storage_member_type<16> { using type = __mmask16; };
2209
2210	template <>
2211	struct __bool_storage_member_type<32> { using type = __mmask32; };
2212
2213	template <>
2214	struct __bool_storage_member_type<64> { using type = __mmask64; };
2215	#endif // _GLIBCXX_SIMD_HAVE_AVX512F
2216
2217	// }}}
2218	// __intrinsic_type (x86){{{
2219	// the following excludes bool via __is_vectorizable
2220	#if _GLIBCXX_SIMD_HAVE_SSE
2221	template <typename _Tp, size_t _Bytes>
2222	struct __intrinsic_type<_Tp, _Bytes,
2223	enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2224	{
2225	static_assert(!is_same_v<_Tp, long double>,
2226	"no __intrinsic_type support for long double on x86");
2227
2228	static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16
2229	: _Bytes <= 32 ? 32
2230	: 64;
2231
2232	using type [[__gnu__::__vector_size__(_S_VBytes)]]
2233	= conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2234	};
2235	#endif // _GLIBCXX_SIMD_HAVE_SSE
2236
2237	// }}}
2238	#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2239	// __intrinsic_type (ARM){{{
2240	#if _GLIBCXX_SIMD_HAVE_NEON
2241	template <>
2242	struct __intrinsic_type<float, 8, void>
2243	{ using type = float32x2_t; };
2244
2245	template <>
2246	struct __intrinsic_type<float, 16, void>
2247	{ using type = float32x4_t; };
2248
2249	#if _GLIBCXX_SIMD_HAVE_NEON_A64
2250	template <>
2251	struct __intrinsic_type<double, 8, void>
2252	{ using type = float64x1_t; };
2253
2254	template <>
2255	struct __intrinsic_type<double, 16, void>
2256	{ using type = float64x2_t; };
2257	#endif
2258
2259	#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2260	template <> \
2261	struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2262	_Np * _Bits / 8, void> \
2263	{ using type = int##_Bits##x##_Np##_t; }; \
2264	template <> \
2265	struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2266	_Np * _Bits / 8, void> \
2267	{ using type = uint##_Bits##x##_Np##_t; }
2268	_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2269	_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2270	_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2271	_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2272	_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2273	_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2274	_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2275	_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2276	#undef _GLIBCXX_SIMD_ARM_INTRIN
2277
2278	template <typename _Tp, size_t _Bytes>
2279	struct __intrinsic_type<_Tp, _Bytes,
2280	enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2281	{
2282	static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2283	using _Ip = __int_for_sizeof_t<_Tp>;
2284	using _Up = conditional_t<
2285	is_floating_point_v<_Tp>, _Tp,
2286	conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2287	static_assert(!is_same_v<_Tp, _Up> \|\| _SVecBytes != _Bytes,
2288	"should use explicit specialization above");
2289	using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2290	};
2291	#endif // _GLIBCXX_SIMD_HAVE_NEON
2292
2293	// }}}
2294	// __intrinsic_type (PPC){{{
2295	#ifdef __ALTIVEC__
2296	template <typename _Tp>
2297	struct __intrinsic_type_impl;
2298
2299	#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2300	template <> \
2301	struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2302	_GLIBCXX_SIMD_PPC_INTRIN(float);
2303	_GLIBCXX_SIMD_PPC_INTRIN(double);
2304	_GLIBCXX_SIMD_PPC_INTRIN(signed char);
2305	_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2306	_GLIBCXX_SIMD_PPC_INTRIN(signed short);
2307	_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2308	_GLIBCXX_SIMD_PPC_INTRIN(signed int);
2309	_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2310	_GLIBCXX_SIMD_PPC_INTRIN(signed long);
2311	_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2312	_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2313	_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2314	#undef _GLIBCXX_SIMD_PPC_INTRIN
2315
2316	template <typename _Tp, size_t _Bytes>
2317	struct __intrinsic_type<_Tp, _Bytes,
2318	enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2319	{
2320	static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2321	// allow _Tp == long double with -mlong-double-64
2322	static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2323	"no __intrinsic_type support for long double on PPC");
2324	#ifndef __VSX__
2325	static_assert(!is_same_v<_Tp, double>,
2326	"no __intrinsic_type support for double on PPC w/o VSX");
2327	#endif
2328	using type =
2329	typename __intrinsic_type_impl<
2330	conditional_t<is_floating_point_v<_Tp>,
2331	conditional_t<_S_is_ldouble, double, _Tp>,
2332	__int_for_sizeof_t<_Tp>>>::type;
2333	};
2334	#endif // __ALTIVEC__
2335
2336	// }}}
2337	// _SimdWrapper<bool>{{{1
2338	template <size_t _Width>
2339	struct _SimdWrapper<bool, _Width,
2340	void_t<typename __bool_storage_member_type<_Width>::type>>
2341	{
2342	using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2343	using value_type = bool;
2344
2345	static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2346
2347	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2348	__as_full_vector() const { return _M_data; }
2349
2350	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2351	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_BuiltinType __k)
2352	: _M_data(__k) {};
2353
2354	_GLIBCXX_SIMD_INTRINSIC operator const _BuiltinType&() const
2355	{ return _M_data; }
2356
2357	_GLIBCXX_SIMD_INTRINSIC operator _BuiltinType&()
2358	{ return _M_data; }
2359
2360	_GLIBCXX_SIMD_INTRINSIC _BuiltinType __intrin() const
2361	{ return _M_data; }
2362
2363	_GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](size_t __i) const
2364	{ return _M_data & (_BuiltinType(1) << __i); }
2365
2366	template <size_t __i>
2367	_GLIBCXX_SIMD_INTRINSIC constexpr value_type
2368	operator[](_SizeConstant<__i>) const
2369	{ return _M_data & (_BuiltinType(1) << __i); }
2370
2371	_GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, value_type __x)
2372	{
2373	if (__x)
2374	_M_data \|= (_BuiltinType(1) << __i);
2375	else
2376	_M_data &= ~(_BuiltinType(1) << __i);
2377	}
2378
2379	_GLIBCXX_SIMD_INTRINSIC
2380	constexpr bool _M_is_constprop() const
2381	{ return __builtin_constant_p(_M_data); }
2382
2383	_GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2384	{
2385	if (__builtin_constant_p(_M_data))
2386	{
2387	constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2388	constexpr _BuiltinType __active_mask
2389	= ~_BuiltinType() >> (__nbits - _Width);
2390	return (_M_data & __active_mask) == 0;
2391	}
2392	return false;
2393	}
2394
2395	_GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2396	{
2397	if (__builtin_constant_p(_M_data))
2398	{
2399	constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2400	constexpr _BuiltinType __active_mask
2401	= ~_BuiltinType() >> (__nbits - _Width);
2402	return (_M_data & __active_mask) == __active_mask;
2403	}
2404	return false;
2405	}
2406
2407	_BuiltinType _M_data;
2408	};
2409
2410	// _SimdWrapperBase{{{1
2411	template <bool _MustZeroInitPadding, typename _BuiltinType>
2412	struct _SimdWrapperBase;
2413
2414	template <typename _BuiltinType>
2415	struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2416	{
2417	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() = default;
2418	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2419	: _M_data(__init)
2420	{}
2421
2422	_BuiltinType _M_data;
2423	};
2424
2425	template <typename _BuiltinType>
2426	struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2427	// never become SNaN
2428	{
2429	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() : _M_data() {}
2430	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2431	: _M_data(__init)
2432	{}
2433
2434	_BuiltinType _M_data;
2435	};
2436
2437	// }}}
2438	// _SimdWrapper{{{
2439	template <typename _Tp, size_t _Width>
2440	struct _SimdWrapper<
2441	_Tp, _Width,
2442	void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2443	: _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2444	&& sizeof(_Tp) * _Width
2445	== sizeof(__vector_type_t<_Tp, _Width>),
2446	__vector_type_t<_Tp, _Width>>
2447	{
2448	using _Base
2449	= _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2450	&& sizeof(_Tp) * _Width
2451	== sizeof(__vector_type_t<_Tp, _Width>),
2452	__vector_type_t<_Tp, _Width>>;
2453
2454	static_assert(__is_vectorizable_v<_Tp>);
2455	static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2456
2457	using _BuiltinType = __vector_type_t<_Tp, _Width>;
2458	using value_type = _Tp;
2459
2460	static inline constexpr size_t _S_full_size
2461	= sizeof(_BuiltinType) / sizeof(value_type);
2462	static inline constexpr int _S_size = _Width;
2463	static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2464
2465	using _Base::_M_data;
2466
2467	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2468	__as_full_vector() const
2469	{ return _M_data; }
2470
2471	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
2472	: _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2473	[&](auto __i) { return __init.begin()[__i.value]; })) {}
2474
2475	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2476	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&)
2477	= default;
2478	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_SimdWrapper&&) = default;
2479
2480	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2481	operator=(const _SimdWrapper&) = default;
2482	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2483	operator=(_SimdWrapper&&) = default;
2484
2485	template <typename _V, typename = enable_if_t<disjunction_v<
2486	is_same<_V, __vector_type_t<_Tp, _Width>>,
2487	is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2488	_GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_V __x)
2489	// __vector_bitcast can convert e.g. __m128 to __vector(2) float
2490	: _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2491
2492	template <typename... _As,
2493	typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2494	&& sizeof...(_As) <= _Width)>>
2495	_GLIBCXX_SIMD_INTRINSIC constexpr
2496	operator _SimdTuple<_Tp, _As...>() const
2497	{
2498	const auto& dd = _M_data; // workaround for GCC7 ICE
2499	return __generate_from_n_evaluations<sizeof...(_As),
2500	_SimdTuple<_Tp, _As...>>([&](
2501	auto __i) constexpr { return dd[int(__i)]; });
2502	}
2503
2504	_GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const
2505	{ return _M_data; }
2506
2507	_GLIBCXX_SIMD_INTRINSIC constexpr operator _BuiltinType&()
2508	{ return _M_data; }
2509
2510	_GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](size_t __i) const
2511	{ return _M_data[__i]; }
2512
2513	template <size_t __i>
2514	_GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](_SizeConstant<__i>) const
2515	{ return _M_data[__i]; }
2516
2517	_GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, _Tp __x)
2518	{ _M_data[__i] = __x; }
2519
2520	_GLIBCXX_SIMD_INTRINSIC
2521	constexpr bool _M_is_constprop() const
2522	{ return __builtin_constant_p(_M_data); }
2523
2524	_GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2525	{
2526	if (__builtin_constant_p(_M_data))
2527	{
2528	bool __r = true;
2529	if constexpr (is_floating_point_v<_Tp>)
2530	{
2531	using _Ip = __int_for_sizeof_t<_Tp>;
2532	const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2533	__execute_n_times<_Width>(
2534	[&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2535	}
2536	else
2537	__execute_n_times<_Width>(
2538	[&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2539	return __r;
2540	}
2541	return false;
2542	}
2543
2544	_GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2545	{
2546	if (__builtin_constant_p(_M_data))
2547	{
2548	bool __r = true;
2549	if constexpr (is_floating_point_v<_Tp>)
2550	{
2551	using _Ip = __int_for_sizeof_t<_Tp>;
2552	const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2553	__execute_n_times<_Width>(
2554	[&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2555	}
2556	else
2557	__execute_n_times<_Width>(
2558	[&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2559	return __r;
2560	}
2561	return false;
2562	}
2563	};
2564
2565	// }}}
2566
2567	// __vectorized_sizeof {{{
2568	template <typename _Tp>
2569	constexpr size_t
2570	__vectorized_sizeof()
2571	{
2572	if constexpr (!__is_vectorizable_v<_Tp>)
2573	return 0;
2574
2575	if constexpr (sizeof(_Tp) <= 8)
2576	{
2577	// X86:
2578	if constexpr (__have_avx512bw)
2579	return 64;
2580	if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2581	return 64;
2582	if constexpr (__have_avx2)
2583	return 32;
2584	if constexpr (__have_avx && is_floating_point_v<_Tp>)
2585	return 32;
2586	if constexpr (__have_sse2)
2587	return 16;
2588	if constexpr (__have_sse && is_same_v<_Tp, float>)
2589	return 16;
2590	/* The following is too much trouble because of mixed MMX and x87 code.
2591	* While nothing here explicitly calls MMX instructions of registers,
2592	* they are still emitted but no EMMS cleanup is done.
2593	if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2594	return 8;
2595	*/
2596
2597	// PowerPC:
2598	if constexpr (__have_power8vec
2599	\|\| (__have_power_vmx && (sizeof(_Tp) < 8))
2600	\|\| (__have_power_vsx && is_floating_point_v<_Tp>) )
2601	return 16;
2602
2603	// ARM:
2604	if constexpr (__have_neon_a64
2605	\|\| (__have_neon_a32 && !is_same_v<_Tp, double>) )
2606	return 16;
2607	if constexpr (__have_neon
2608	&& sizeof(_Tp) < 8
2609	// Only allow fp if the user allows non-ICE559 fp (e.g.
2610	// via -ffast-math). ARMv7 NEON fp is not conforming to
2611	// IEC559.
2612	&& (__support_neon_float \|\| !is_floating_point_v<_Tp>))
2613	return 16;
2614	}
2615
2616	return sizeof(_Tp);
2617	}
2618
2619	// }}}
2620	namespace simd_abi {
2621	// most of simd_abi is defined in simd_detail.h
2622	template <typename _Tp>
2623	inline constexpr int max_fixed_size
2624	= (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2625
2626	// compatible {{{
2627	#if defined __x86_64__ \|\| defined __aarch64__
2628	template <typename _Tp>
2629	using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2630	#elif defined __ARM_NEON
2631	// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2632	// ABI?)
2633	template <typename _Tp>
2634	using compatible
2635	= conditional_t<(sizeof(_Tp) < 8
2636	&& (__support_neon_float \|\| !is_floating_point_v<_Tp>)),
2637	_VecBuiltin<16>, scalar>;
2638	#else
2639	template <typename>
2640	using compatible = scalar;
2641	#endif
2642
2643	// }}}
2644	// native {{{
2645	template <typename _Tp>
2646	constexpr auto
2647	__determine_native_abi()
2648	{
2649	constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2650	if constexpr (__bytes == sizeof(_Tp))
2651	return static_cast<scalar*>(nullptr);
2652	else if constexpr (__have_avx512vl \|\| (__have_avx512f && __bytes == 64))
2653	return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2654	else
2655	return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2656	}
2657
2658	template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2659	using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2660
2661	// }}}
2662	// __default_abi {{{
2663	#if defined _GLIBCXX_SIMD_DEFAULT_ABI
2664	template <typename _Tp>
2665	using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2666	#else
2667	template <typename _Tp>
2668	using __default_abi = compatible<_Tp>;
2669	#endif
2670
2671	// }}}
2672	} // namespace simd_abi
2673
2674	// traits {{{1
2675	// is_abi_tag {{{2
2676	template <typename _Tp, typename = void_t<>>
2677	struct is_abi_tag : false_type {};
2678
2679	template <typename _Tp>
2680	struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2681	: public _Tp::_IsValidAbiTag {};
2682
2683	template <typename _Tp>
2684	inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2685
2686	// is_simd(_mask) {{{2
2687	template <typename _Tp>
2688	struct is_simd : public false_type {};
2689
2690	template <typename _Tp>
2691	inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2692
2693	template <typename _Tp>
2694	struct is_simd_mask : public false_type {};
2695
2696	template <typename _Tp>
2697	inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2698
2699	// simd_size {{{2
2700	template <typename _Tp, typename _Abi, typename = void>
2701	struct __simd_size_impl {};
2702
2703	template <typename _Tp, typename _Abi>
2704	struct __simd_size_impl<
2705	_Tp, _Abi,
2706	enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2707	: _SizeConstant<_Abi::template _S_size<_Tp>> {};
2708
2709	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2710	struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2711
2712	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2713	inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2714
2715	// simd_abi::deduce {{{2
2716	template <typename _Tp, size_t _Np, typename = void>
2717	struct __deduce_impl;
2718
2719	namespace simd_abi {
2720	/**
2721	* @tparam _Tp The requested `value_type` for the elements.
2722	* @tparam _Np The requested number of elements.
2723	* @tparam _Abis This parameter is ignored, since this implementation cannot
2724	* make any use of it. Either __a good native ABI is matched and used as `type`
2725	* alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2726	* the best matching native ABIs.
2727	*/
2728	template <typename _Tp, size_t _Np, typename...>
2729	struct deduce : __deduce_impl<_Tp, _Np> {};
2730
2731	template <typename _Tp, size_t _Np, typename... _Abis>
2732	using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2733	} // namespace simd_abi
2734
2735	// }}}2
2736	// rebind_simd {{{2
2737	template <typename _Tp, typename _V, typename = void>
2738	struct rebind_simd;
2739
2740	template <typename _Tp, typename _Up, typename _Abi>
2741	struct rebind_simd<
2742	_Tp, simd<_Up, _Abi>,
2743	void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2744	{
2745	using type
2746	= simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2747	};
2748
2749	template <typename _Tp, typename _Up, typename _Abi>
2750	struct rebind_simd<
2751	_Tp, simd_mask<_Up, _Abi>,
2752	void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2753	{
2754	using type
2755	= simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2756	};
2757
2758	template <typename _Tp, typename _V>
2759	using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2760
2761	// resize_simd {{{2
2762	template <int _Np, typename _V, typename = void>
2763	struct resize_simd;
2764
2765	template <int _Np, typename _Tp, typename _Abi>
2766	struct resize_simd<_Np, simd<_Tp, _Abi>,
2767	void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2768	{ using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2769
2770	template <int _Np, typename _Tp, typename _Abi>
2771	struct resize_simd<_Np, simd_mask<_Tp, _Abi>,
2772	void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2773	{ using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2774
2775	template <int _Np, typename _V>
2776	using resize_simd_t = typename resize_simd<_Np, _V>::type;
2777
2778	// }}}2
2779	// memory_alignment {{{2
2780	template <typename _Tp, typename _Up = typename _Tp::value_type>
2781	struct memory_alignment
2782	: public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
2783
2784	template <typename _Tp, typename _Up = typename _Tp::value_type>
2785	inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
2786
2787	// class template simd [simd] {{{1
2788	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2789	class simd;
2790
2791	template <typename _Tp, typename _Abi>
2792	struct is_simd<simd<_Tp, _Abi>> : public true_type {};
2793
2794	template <typename _Tp>
2795	using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
2796
2797	template <typename _Tp, int _Np>
2798	using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
2799
2800	template <typename _Tp, size_t _Np>
2801	using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2802
2803	// class template simd_mask [simd_mask] {{{1
2804	template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2805	class simd_mask;
2806
2807	template <typename _Tp, typename _Abi>
2808	struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
2809
2810	template <typename _Tp>
2811	using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
2812
2813	template <typename _Tp, int _Np>
2814	using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
2815
2816	template <typename _Tp, size_t _Np>
2817	using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2818
2819	// casts [simd.casts] {{{1
2820	// static_simd_cast {{{2
2821	template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>,
2822	typename = void>
2823	struct __static_simd_cast_return_type;
2824
2825	template <typename _Tp, typename _A0, typename _Up, typename _Ap>
2826	struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false,
2827	void>
2828	: __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
2829
2830	template <typename _Tp, typename _Up, typename _Ap>
2831	struct __static_simd_cast_return_type<
2832	_Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
2833	{ using type = _Tp; };
2834
2835	template <typename _Tp, typename _Ap>
2836	struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
2837	#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2838	enable_if_t<__is_vectorizable_v<_Tp>>
2839	#else
2840	void
2841	#endif
2842	>
2843	{ using type = simd<_Tp, _Ap>; };
2844
2845	template <typename _Tp, typename = void>
2846	struct __safe_make_signed { using type = _Tp;};
2847
2848	template <typename _Tp>
2849	struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
2850	{
2851	// the extra make_unsigned_t is because of PR85951
2852	using type = make_signed_t<make_unsigned_t<_Tp>>;
2853	};
2854
2855	template <typename _Tp>
2856	using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
2857
2858	template <typename _Tp, typename _Up, typename _Ap>
2859	struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
2860	#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2861	enable_if_t<__is_vectorizable_v<_Tp>>
2862	#else
2863	void
2864	#endif
2865	>
2866	{
2867	using type = conditional_t<
2868	(is_integral_v<_Up> && is_integral_v<_Tp> &&
2869	#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
2870	is_signed_v<_Up> != is_signed_v<_Tp> &&
2871	#endif
2872	is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
2873	simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
2874	};
2875
2876	template <typename _Tp, typename _Up, typename _Ap,
2877	typename _R
2878	= typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2879	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
2880	static_simd_cast(const simd<_Up, _Ap>& __x)
2881	{
2882	if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
2883	return __x;
2884	else
2885	{
2886	_SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
2887	__c;
2888	return _R(__private_init, __c(__data(__x)));
2889	}
2890	}
2891
2892	namespace __proposed {
2893	template <typename _Tp, typename _Up, typename _Ap,
2894	typename _R
2895	= typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2896	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
2897	static_simd_cast(const simd_mask<_Up, _Ap>& __x)
2898	{
2899	using _RM = typename _R::mask_type;
2900	return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
2901	typename _RM::simd_type::value_type>(__x)};
2902	}
2903	} // namespace __proposed
2904
2905	// simd_cast {{{2
2906	template <typename _Tp, typename _Up, typename _Ap,
2907	typename _To = __value_type_or_identity_t<_Tp>>
2908	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2909	simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
2910	-> decltype(static_simd_cast<_Tp>(__x))
2911	{ return static_simd_cast<_Tp>(__x); }
2912
2913	namespace __proposed {
2914	template <typename _Tp, typename _Up, typename _Ap,
2915	typename _To = __value_type_or_identity_t<_Tp>>
2916	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2917	simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
2918	-> decltype(static_simd_cast<_Tp>(__x))
2919	{ return static_simd_cast<_Tp>(__x); }
2920	} // namespace __proposed
2921
2922	// }}}2
2923	// resizing_simd_cast {{{
2924	namespace __proposed {
2925	/* Proposed spec:
2926
2927	template <class T, class U, class Abi>
2928	T resizing_simd_cast(const simd<U, Abi>& x)
2929
2930	p1 Constraints:
2931	- is_simd_v<T> is true and
2932	- T::value_type is the same type as U
2933
2934	p2 Returns:
2935	A simd object with the i^th element initialized to x[i] for all i in the
2936	range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2937	than simd_size_v<U, Abi>, the remaining elements are value-initialized.
2938
2939	template <class T, class U, class Abi>
2940	T resizing_simd_cast(const simd_mask<U, Abi>& x)
2941
2942	p1 Constraints: is_simd_mask_v<T> is true
2943
2944	p2 Returns:
2945	A simd_mask object with the i^th element initialized to x[i] for all i in
2946	the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2947	than simd_size_v<U, Abi>, the remaining elements are initialized to false.
2948
2949	*/
2950
2951	template <typename _Tp, typename _Up, typename _Ap>
2952	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
2953	conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
2954	resizing_simd_cast(const simd<_Up, _Ap>& __x)
2955	{
2956	if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
2957	return __x;
2958	else if constexpr (simd_size_v<_Up, _Ap> == 1)
2959	{
2960	_Tp __r{};
2961	__r[0] = __x[0];
2962	return __r;
2963	}
2964	else if constexpr (_Tp::size() == 1)
2965	return __x[0];
2966	else if constexpr (sizeof(_Tp) == sizeof(__x)
2967	&& !__is_fixed_size_abi_v<_Ap>)
2968	return {__private_init,
2969	__vector_bitcast<typename _Tp::value_type, _Tp::size()>(
2970	_Ap::_S_masked(__data(__x))._M_data)};
2971	else
2972	{
2973	_Tp __r{};
2974	__builtin_memcpy(&__data(__r), &__data(__x),
2975	sizeof(_Up)
2976	* std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
2977	return __r;
2978	}
2979	}
2980
2981	template <typename _Tp, typename _Up, typename _Ap>
2982	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
2983	enable_if_t<is_simd_mask_v<_Tp>, _Tp>
2984	resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
2985	{
2986	return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
2987	typename _Tp::simd_type::value_type>(__x)};
2988	}
2989	} // namespace __proposed
2990
2991	// }}}
2992	// to_fixed_size {{{2
2993	template <typename _Tp, int _Np>
2994	_GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
2995	to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
2996	{ return __x; }
2997
2998	template <typename _Tp, int _Np>
2999	_GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3000	to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3001	{ return __x; }
3002
3003	template <typename _Tp, typename _Ap>
3004	_GLIBCXX_SIMD_INTRINSIC auto
3005	to_fixed_size(const simd<_Tp, _Ap>& __x)
3006	{
3007	return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x](
3008	auto __i) constexpr { return __x[__i]; });
3009	}
3010
3011	template <typename _Tp, typename _Ap>
3012	_GLIBCXX_SIMD_INTRINSIC auto
3013	to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3014	{
3015	constexpr int _Np = simd_mask<_Tp, _Ap>::size();
3016	fixed_size_simd_mask<_Tp, _Np> __r;
3017	__execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; });
3018	return __r;
3019	}
3020
3021	// to_native {{{2
3022	template <typename _Tp, int _Np>
3023	_GLIBCXX_SIMD_INTRINSIC
3024	enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3025	to_native(const fixed_size_simd<_Tp, _Np>& __x)
3026	{
3027	alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3028	__x.copy_to(__mem, vector_aligned);
3029	return {__mem, vector_aligned};
3030	}
3031
3032	template <typename _Tp, size_t _Np>
3033	_GLIBCXX_SIMD_INTRINSIC
3034	enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3035	to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3036	{
3037	return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; });
3038	}
3039
3040	// to_compatible {{{2
3041	template <typename _Tp, size_t _Np>
3042	_GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3043	to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3044	{
3045	alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3046	__x.copy_to(__mem, vector_aligned);
3047	return {__mem, vector_aligned};
3048	}
3049
3050	template <typename _Tp, size_t _Np>
3051	_GLIBCXX_SIMD_INTRINSIC
3052	enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3053	to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3054	{ return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); }
3055
3056	// masked assignment [simd_mask.where] {{{1
3057
3058	// where_expression {{{1
3059	// const_where_expression<M, T> {{{2
3060	template <typename _M, typename _Tp>
3061	class const_where_expression
3062	{
3063	using _V = _Tp;
3064	static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3065
3066	struct _Wrapper { using value_type = _V; };
3067
3068	protected:
3069	using _Impl = typename _V::_Impl;
3070
3071	using value_type =
3072	typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3073
3074	_GLIBCXX_SIMD_INTRINSIC friend const _M&
3075	__get_mask(const const_where_expression& __x)
3076	{ return __x._M_k; }
3077
3078	_GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3079	__get_lvalue(const const_where_expression& __x)
3080	{ return __x._M_value; }
3081
3082	const _M& _M_k;
3083	_Tp& _M_value;
3084
3085	public:
3086	const_where_expression(const const_where_expression&) = delete;
3087	const_where_expression& operator=(const const_where_expression&) = delete;
3088
3089	_GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd)
3090	: _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3091
3092	_GLIBCXX_SIMD_INTRINSIC _V
3093	operator-() const&&
3094	{
3095	return {__private_init,
3096	_Impl::template _S_masked_unary<negate>(__data(_M_k),
3097	__data(_M_value))};
3098	}
3099
3100	template <typename _Up, typename _Flags>
3101	[[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3102	copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3103	{
3104	return {__private_init,
3105	_Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3106	_Flags::template _S_apply<_V>(__mem))};
3107	}
3108
3109	template <typename _Up, typename _Flags>
3110	_GLIBCXX_SIMD_INTRINSIC void
3111	copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3112	{
3113	_Impl::_S_masked_store(__data(_M_value),
3114	_Flags::template _S_apply<_V>(__mem),
3115	__data(_M_k));
3116	}
3117	};
3118
3119	// const_where_expression<bool, T> {{{2
3120	template <typename _Tp>
3121	class const_where_expression<bool, _Tp>
3122	{
3123	using _M = bool;
3124	using _V = _Tp;
3125
3126	static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3127
3128	struct _Wrapper { using value_type = _V; };
3129
3130	protected:
3131	using value_type =
3132	typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3133
3134	_GLIBCXX_SIMD_INTRINSIC friend const _M&
3135	__get_mask(const const_where_expression& __x)
3136	{ return __x._M_k; }
3137
3138	_GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3139	__get_lvalue(const const_where_expression& __x)
3140	{ return __x._M_value; }
3141
3142	const bool _M_k;
3143	_Tp& _M_value;
3144
3145	public:
3146	const_where_expression(const const_where_expression&) = delete;
3147	const_where_expression& operator=(const const_where_expression&) = delete;
3148
3149	_GLIBCXX_SIMD_INTRINSIC const_where_expression(const bool __kk, const _Tp& dd)
3150	: _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3151
3152	_GLIBCXX_SIMD_INTRINSIC _V operator-() const&&
3153	{ return _M_k ? -_M_value : _M_value; }
3154
3155	template <typename _Up, typename _Flags>
3156	[[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3157	copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3158	{ return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3159
3160	template <typename _Up, typename _Flags>
3161	_GLIBCXX_SIMD_INTRINSIC void
3162	copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3163	{
3164	if (_M_k)
3165	__mem[0] = _M_value;
3166	}
3167	};
3168
3169	// where_expression<M, T> {{{2
3170	template <typename _M, typename _Tp>
3171	class where_expression : public const_where_expression<_M, _Tp>
3172	{
3173	using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3174
3175	static_assert(!is_const<_Tp>::value,
3176	"where_expression may only be instantiated with __a non-const "
3177	"_Tp parameter");
3178
3179	using typename const_where_expression<_M, _Tp>::value_type;
3180	using const_where_expression<_M, _Tp>::_M_k;
3181	using const_where_expression<_M, _Tp>::_M_value;
3182
3183	static_assert(
3184	is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3185	static_assert(_M::size() == _Tp::size(), "");
3186
3187	_GLIBCXX_SIMD_INTRINSIC friend _Tp& __get_lvalue(where_expression& __x)
3188	{ return __x._M_value; }
3189
3190	public:
3191	where_expression(const where_expression&) = delete;
3192	where_expression& operator=(const where_expression&) = delete;
3193
3194	_GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3195	: const_where_expression<_M, _Tp>(__kk, dd) {}
3196
3197	template <typename _Up>
3198	_GLIBCXX_SIMD_INTRINSIC void operator=(_Up&& __x) &&
3199	{
3200	_Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3201	__to_value_type_or_member_type<_Tp>(
3202	static_cast<_Up&&>(__x)));
3203	}
3204
3205	#define _GLIBCXX_SIMD_OP_(__op, __name) \
3206	template <typename _Up> \
3207	_GLIBCXX_SIMD_INTRINSIC void operator __op##=(_Up&& __x)&& \
3208	{ \
3209	_Impl::template _S_masked_cassign( \
3210	__data(_M_k), __data(_M_value), \
3211	__to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3212	[](auto __impl, auto __lhs, auto __rhs) constexpr { \
3213	return __impl.__name(__lhs, __rhs); \
3214	}); \
3215	} \
3216	static_assert(true)
3217	_GLIBCXX_SIMD_OP_(+, _S_plus);
3218	_GLIBCXX_SIMD_OP_(-, _S_minus);
3219	_GLIBCXX_SIMD_OP_(*, _S_multiplies);
3220	_GLIBCXX_SIMD_OP_(/, _S_divides);
3221	_GLIBCXX_SIMD_OP_(%, _S_modulus);
3222	_GLIBCXX_SIMD_OP_(&, _S_bit_and);
3223	_GLIBCXX_SIMD_OP_(\|, _S_bit_or);
3224	_GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3225	_GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3226	_GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3227	#undef _GLIBCXX_SIMD_OP_
3228
3229	_GLIBCXX_SIMD_INTRINSIC void operator++() &&
3230	{
3231	__data(_M_value)
3232	= _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3233	__data(_M_value));
3234	}
3235
3236	_GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3237	{
3238	__data(_M_value)
3239	= _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3240	__data(_M_value));
3241	}
3242
3243	_GLIBCXX_SIMD_INTRINSIC void operator--() &&
3244	{
3245	__data(_M_value)
3246	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3247	__data(_M_value));
3248	}
3249
3250	_GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3251	{
3252	__data(_M_value)
3253	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3254	__data(_M_value));
3255	}
3256
3257	// intentionally hides const_where_expression::copy_from
3258	template <typename _Up, typename _Flags>
3259	_GLIBCXX_SIMD_INTRINSIC void
3260	copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3261	{
3262	__data(_M_value)
3263	= _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3264	_Flags::template _S_apply<_Tp>(__mem));
3265	}
3266	};
3267
3268	// where_expression<bool, T> {{{2
3269	template <typename _Tp>
3270	class where_expression<bool, _Tp> : public const_where_expression<bool, _Tp>
3271	{
3272	using _M = bool;
3273	using typename const_where_expression<_M, _Tp>::value_type;
3274	using const_where_expression<_M, _Tp>::_M_k;
3275	using const_where_expression<_M, _Tp>::_M_value;
3276
3277	public:
3278	where_expression(const where_expression&) = delete;
3279	where_expression& operator=(const where_expression&) = delete;
3280
3281	_GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3282	: const_where_expression<_M, _Tp>(__kk, dd) {}
3283
3284	#define _GLIBCXX_SIMD_OP_(__op) \
3285	template <typename _Up> \
3286	_GLIBCXX_SIMD_INTRINSIC void operator __op(_Up&& __x)&& \
3287	{ if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3288
3289	_GLIBCXX_SIMD_OP_(=)
3290	_GLIBCXX_SIMD_OP_(+=)
3291	_GLIBCXX_SIMD_OP_(-=)
3292	_GLIBCXX_SIMD_OP_(*=)
3293	_GLIBCXX_SIMD_OP_(/=)
3294	_GLIBCXX_SIMD_OP_(%=)
3295	_GLIBCXX_SIMD_OP_(&=)
3296	_GLIBCXX_SIMD_OP_(\|=)
3297	_GLIBCXX_SIMD_OP_(^=)
3298	_GLIBCXX_SIMD_OP_(<<=)
3299	_GLIBCXX_SIMD_OP_(>>=)
3300	#undef _GLIBCXX_SIMD_OP_
3301
3302	_GLIBCXX_SIMD_INTRINSIC void operator++() &&
3303	{ if (_M_k) ++_M_value; }
3304
3305	_GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3306	{ if (_M_k) ++_M_value; }
3307
3308	_GLIBCXX_SIMD_INTRINSIC void operator--() &&
3309	{ if (_M_k) --_M_value; }
3310
3311	_GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3312	{ if (_M_k) --_M_value; }
3313
3314	// intentionally hides const_where_expression::copy_from
3315	template <typename _Up, typename _Flags>
3316	_GLIBCXX_SIMD_INTRINSIC void
3317	copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3318	{ if (_M_k) _M_value = __mem[0]; }
3319	};
3320
3321	// where {{{1
3322	template <typename _Tp, typename _Ap>
3323	_GLIBCXX_SIMD_INTRINSIC where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3324	where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3325	{ return {__k, __value}; }
3326
3327	template <typename _Tp, typename _Ap>
3328	_GLIBCXX_SIMD_INTRINSIC
3329	const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3330	where(const typename simd<_Tp, _Ap>::mask_type& __k,
3331	const simd<_Tp, _Ap>& __value)
3332	{ return {__k, __value}; }
3333
3334	template <typename _Tp, typename _Ap>
3335	_GLIBCXX_SIMD_INTRINSIC
3336	where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3337	where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3338	simd_mask<_Tp, _Ap>& __value)
3339	{ return {__k, __value}; }
3340
3341	template <typename _Tp, typename _Ap>
3342	_GLIBCXX_SIMD_INTRINSIC
3343	const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3344	where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3345	const simd_mask<_Tp, _Ap>& __value)
3346	{ return {__k, __value}; }
3347
3348	template <typename _Tp>
3349	_GLIBCXX_SIMD_INTRINSIC where_expression<bool, _Tp>
3350	where(_ExactBool __k, _Tp& __value)
3351	{ return {__k, __value}; }
3352
3353	template <typename _Tp>
3354	_GLIBCXX_SIMD_INTRINSIC const_where_expression<bool, _Tp>
3355	where(_ExactBool __k, const _Tp& __value)
3356	{ return {__k, __value}; }
3357
3358	template <typename _Tp, typename _Ap>
3359	void where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3360
3361	template <typename _Tp, typename _Ap>
3362	void where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3363
3364	// proposed mask iterations {{{1
3365	namespace __proposed {
3366	template <size_t _Np>
3367	class where_range
3368	{
3369	const bitset<_Np> __bits;
3370
3371	public:
3372	where_range(bitset<_Np> __b) : __bits(__b) {}
3373
3374	class iterator
3375	{
3376	size_t __mask;
3377	size_t __bit;
3378
3379	_GLIBCXX_SIMD_INTRINSIC void __next_bit()
3380	{ __bit = __builtin_ctzl(__mask); }
3381
3382	_GLIBCXX_SIMD_INTRINSIC void __reset_lsb()
3383	{
3384	// 01100100 - 1 = 01100011
3385	__mask &= (__mask - 1);
3386	// __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3387	}
3388
3389	public:
3390	iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3391	iterator(const iterator&) = default;
3392	iterator(iterator&&) = default;
3393
3394	_GLIBCXX_SIMD_ALWAYS_INLINE size_t operator->() const
3395	{ return __bit; }
3396
3397	_GLIBCXX_SIMD_ALWAYS_INLINE size_t operator*() const
3398	{ return __bit; }
3399
3400	_GLIBCXX_SIMD_ALWAYS_INLINE iterator& operator++()
3401	{
3402	__reset_lsb();
3403	__next_bit();
3404	return *this;
3405	}
3406
3407	_GLIBCXX_SIMD_ALWAYS_INLINE iterator operator++(int)
3408	{
3409	iterator __tmp = *this;
3410	__reset_lsb();
3411	__next_bit();
3412	return __tmp;
3413	}
3414
3415	_GLIBCXX_SIMD_ALWAYS_INLINE bool operator==(const iterator& __rhs) const
3416	{ return __mask == __rhs.__mask; }
3417
3418	_GLIBCXX_SIMD_ALWAYS_INLINE bool operator!=(const iterator& __rhs) const
3419	{ return __mask != __rhs.__mask; }
3420	};
3421
3422	iterator begin() const
3423	{ return __bits.to_ullong(); }
3424
3425	iterator end() const
3426	{ return 0; }
3427	};
3428
3429	template <typename _Tp, typename _Ap>
3430	where_range<simd_size_v<_Tp, _Ap>>
3431	where(const simd_mask<_Tp, _Ap>& __k)
3432	{ return __k.__to_bitset(); }
3433
3434	} // namespace __proposed
3435
3436	// }}}1
3437	// reductions [simd.reductions] {{{1
3438	template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3439	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3440	reduce(const simd<_Tp, _Abi>& __v,
3441	_BinaryOperation __binary_op = _BinaryOperation())
3442	{ return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3443
3444	template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3445	_GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3446	reduce(const const_where_expression<_M, _V>& __x,
3447	typename _V::value_type __identity_element,
3448	_BinaryOperation __binary_op)
3449	{
3450	if (__builtin_expect(none_of(__get_mask(__x)), false))
3451	return __identity_element;
3452
3453	_V __tmp = __identity_element;
3454	_V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3455	__data(__get_lvalue(__x)));
3456	return reduce(__tmp, __binary_op);
3457	}
3458
3459	template <typename _M, typename _V>
3460	_GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3461	reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3462	{ return reduce(__x, 0, __binary_op); }
3463
3464	template <typename _M, typename _V>
3465	_GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3466	reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3467	{ return reduce(__x, 1, __binary_op); }
3468
3469	template <typename _M, typename _V>
3470	_GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3471	reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3472	{ return reduce(__x, ~typename _V::value_type(), __binary_op); }
3473
3474	template <typename _M, typename _V>
3475	_GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3476	reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3477	{ return reduce(__x, 0, __binary_op); }
3478
3479	template <typename _M, typename _V>
3480	_GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3481	reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3482	{ return reduce(__x, 0, __binary_op); }
3483
3484	template <typename _Tp, typename _Abi>
3485	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3486	hmin(const simd<_Tp, _Abi>& __v) noexcept
3487	{
3488	return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum());
3489	}
3490
3491	template <typename _Tp, typename _Abi>
3492	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3493	hmax(const simd<_Tp, _Abi>& __v) noexcept
3494	{
3495	return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum());
3496	}
3497
3498	template <typename _M, typename _V>
3499	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3500	typename _V::value_type
3501	hmin(const const_where_expression<_M, _V>& __x) noexcept
3502	{
3503	using _Tp = typename _V::value_type;
3504	constexpr _Tp __id_elem =
3505	#ifdef __FINITE_MATH_ONLY__
3506	__finite_max_v<_Tp>;
3507	#else
3508	__value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3509	#endif
3510	_V __tmp = __id_elem;
3511	_V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3512	__data(__get_lvalue(__x)));
3513	return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3514	}
3515
3516	template <typename _M, typename _V>
3517	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3518	typename _V::value_type
3519	hmax(const const_where_expression<_M, _V>& __x) noexcept
3520	{
3521	using _Tp = typename _V::value_type;
3522	constexpr _Tp __id_elem =
3523	#ifdef __FINITE_MATH_ONLY__
3524	__finite_min_v<_Tp>;
3525	#else
3526	[] {
3527	if constexpr (__value_exists_v<__infinity, _Tp>)
3528	return -__infinity_v<_Tp>;
3529	else
3530	return __finite_min_v<_Tp>;
3531	}();
3532	#endif
3533	_V __tmp = __id_elem;
3534	_V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3535	__data(__get_lvalue(__x)));
3536	return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3537	}
3538
3539	// }}}1
3540	// algorithms [simd.alg] {{{
3541	template <typename _Tp, typename _Ap>
3542	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3543	min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3544	{ return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3545
3546	template <typename _Tp, typename _Ap>
3547	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3548	max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3549	{ return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3550
3551	template <typename _Tp, typename _Ap>
3552	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3553	pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3554	minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3555	{
3556	const auto pair_of_members
3557	= _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3558	return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3559	simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3560	}
3561
3562	template <typename _Tp, typename _Ap>
3563	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3564	clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo,
3565	const simd<_Tp, _Ap>& __hi)
3566	{
3567	using _Impl = typename _Ap::_SimdImpl;
3568	return {__private_init,
3569	_Impl::_S_min(__data(__hi),
3570	_Impl::_S_max(__data(__lo), __data(__v)))};
3571	}
3572
3573	// }}}
3574
3575	template <size_t... _Sizes, typename _Tp, typename _Ap,
3576	typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3577	inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3578	split(const simd<_Tp, _Ap>&);
3579
3580	// __extract_part {{{
3581	template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3582	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
3583	_SimdWrapper<_Tp, _Np / _Total * _Combine>
3584	__extract_part(const _SimdWrapper<_Tp, _Np> __x);
3585
3586	template <int Index, int Parts, int _Combine = 1, typename _Tp, typename _A0,
3587	typename... _As>
3588	_GLIBCXX_SIMD_INTRINSIC auto
3589	__extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3590
3591	// }}}
3592	// _SizeList {{{
3593	template <size_t _V0, size_t... _Values>
3594	struct _SizeList
3595	{
3596	template <size_t _I>
3597	static constexpr size_t _S_at(_SizeConstant<_I> = {})
3598	{
3599	if constexpr (_I == 0)
3600	return _V0;
3601	else
3602	return _SizeList<_Values...>::template _S_at<_I - 1>();
3603	}
3604
3605	template <size_t _I>
3606	static constexpr auto _S_before(_SizeConstant<_I> = {})
3607	{
3608	if constexpr (_I == 0)
3609	return _SizeConstant<0>();
3610	else
3611	return _SizeConstant<
3612	_V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3613	}
3614
3615	template <size_t _Np>
3616	static constexpr auto _S_pop_front(_SizeConstant<_Np> = {})
3617	{
3618	if constexpr (_Np == 0)
3619	return _SizeList();
3620	else
3621	return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3622	}
3623	};
3624
3625	// }}}
3626	// __extract_center {{{
3627	template <typename _Tp, size_t _Np>
3628	_GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3629	__extract_center(_SimdWrapper<_Tp, _Np> __x)
3630	{
3631	static_assert(_Np >= 4);
3632	static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3633	#if _GLIBCXX_SIMD_X86INTRIN // {{{
3634	if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3635	{
3636	const auto __intrin = __to_intrin(__x);
3637	if constexpr (is_integral_v<_Tp>)
3638	return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3639	_mm512_shuffle_i32x4(__intrin, __intrin,
3640	1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3641	else if constexpr (sizeof(_Tp) == 4)
3642	return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3643	_mm512_shuffle_f32x4(__intrin, __intrin,
3644	1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3645	else if constexpr (sizeof(_Tp) == 8)
3646	return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3647	_mm512_shuffle_f64x2(__intrin, __intrin,
3648	1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3649	else
3650	__assert_unreachable<_Tp>();
3651	}
3652	else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3653	return __vector_bitcast<_Tp>(
3654	_mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3655	__hi128(__vector_bitcast<double>(__x)), 1));
3656	else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3657	return __vector_bitcast<_Tp>(
3658	_mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3659	__lo128(__vector_bitcast<_LLong>(__x)),
3660	sizeof(_Tp) * _Np / 4));
3661	else
3662	#endif // _GLIBCXX_SIMD_X86INTRIN }}}
3663	{
3664	__vector_type_t<_Tp, _Np / 2> __r;
3665	__builtin_memcpy(&__r,
3666	reinterpret_cast<const char*>(&__x)
3667	+ sizeof(_Tp) * _Np / 4,
3668	sizeof(_Tp) * _Np / 2);
3669	return __r;
3670	}
3671	}
3672
3673	template <typename _Tp, typename _A0, typename... _As>
3674	_GLIBCXX_SIMD_INTRINSIC
3675	_SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3676	__extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3677	{
3678	if constexpr (sizeof...(_As) == 0)
3679	return __extract_center(__x.first);
3680	else
3681	return __extract_part<1, 4, 2>(__x);
3682	}
3683
3684	// }}}
3685	// __split_wrapper {{{
3686	template <size_t... _Sizes, typename _Tp, typename... _As>
3687	auto
3688	__split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3689	{
3690	return split<_Sizes...>(
3691	fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
3692	__x));
3693	}
3694
3695	// }}}
3696
3697	// split<simd>(simd) {{{
3698	template <typename _V, typename _Ap,
3699	size_t Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
3700	enable_if_t<simd_size_v<typename _V::value_type, _Ap> == Parts * _V::size()
3701	&& is_simd_v<_V>, array<_V, Parts>>
3702	split(const simd<typename _V::value_type, _Ap>& __x)
3703	{
3704	using _Tp = typename _V::value_type;
3705	if constexpr (Parts == 1)
3706	{
3707	return {simd_cast<_V>(__x)};
3708	}
3709	else if (__x._M_is_constprop())
3710	{
3711	return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3712	auto __i) constexpr {
3713	return _V([&](auto __j) constexpr {
3714	return __x[__i * _V::size() + __j];
3715	});
3716	});
3717	}
3718	else if constexpr (
3719	__is_fixed_size_abi_v<_Ap>
3720	&& (is_same_v<typename _V::abi_type, simd_abi::scalar>
3721	\|\| (__is_fixed_size_abi_v<typename _V::abi_type>
3722	&& sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
3723	)))
3724	{
3725	// fixed_size -> fixed_size (w/o padding) or scalar
3726	#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3727	const __may_alias<_Tp>* const __element_ptr
3728	= reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
3729	return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3730	auto __i) constexpr {
3731	return _V(__element_ptr + __i * _V::size(), vector_aligned);
3732	});
3733	#else
3734	const auto& __xx = __data(__x);
3735	return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3736	auto __i) constexpr {
3737	[[maybe_unused]] constexpr size_t __offset
3738	= decltype(__i)::value * _V::size();
3739	return _V([&](auto __j) constexpr {
3740	constexpr _SizeConstant<__j + __offset> __k;
3741	return __xx[__k];
3742	});
3743	});
3744	#endif
3745	}
3746	else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
3747	{
3748	// normally memcpy should work here as well
3749	return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3750	auto __i) constexpr { return __x[__i]; });
3751	}
3752	else
3753	{
3754	return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3755	auto __i) constexpr {
3756	if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
3757	return _V([&](auto __j) constexpr {
3758	return __x[__i * _V::size() + __j];
3759	});
3760	else
3761	return _V(__private_init,
3762	__extract_part<decltype(__i)::value, Parts>(__data(__x)));
3763	});
3764	}
3765	}
3766
3767	// }}}
3768	// split<simd_mask>(simd_mask) {{{
3769	template <typename _V, typename _Ap,
3770	size_t _Parts
3771	= simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
3772	enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
3773	_V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
3774	split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
3775	{
3776	if constexpr (is_same_v<_Ap, typename _V::abi_type>)
3777	return {__x};
3778	else if constexpr (_Parts == 1)
3779	return {__proposed::static_simd_cast<_V>(__x)};
3780	else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
3781	&& __is_avx_abi<_Ap>())
3782	return {_V(__private_init, __lo128(__data(__x))),
3783	_V(__private_init, __hi128(__data(__x)))};
3784	else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
3785	{
3786	const bitset __bits = __x.__to_bitset();
3787	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3788	auto __i) constexpr {
3789	constexpr size_t __offset = __i * _V::size();
3790	return _V(__bitset_init, (__bits >> __offset).to_ullong());
3791	});
3792	}
3793	else
3794	{
3795	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3796	auto __i) constexpr {
3797	constexpr size_t __offset = __i * _V::size();
3798	return _V(
3799	__private_init, [&](auto __j) constexpr {
3800	return __x[__j + __offset];
3801	});
3802	});
3803	}
3804	}
3805
3806	// }}}
3807	// split<_Sizes...>(simd) {{{
3808	template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
3809	_GLIBCXX_SIMD_ALWAYS_INLINE
3810	tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3811	split(const simd<_Tp, _Ap>& __x)
3812	{
3813	using _SL = _SizeList<_Sizes...>;
3814	using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
3815	constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
3816	constexpr size_t _N0 = _SL::template _S_at<0>();
3817	using _V = __deduced_simd<_Tp, _N0>;
3818
3819	if (__x._M_is_constprop())
3820	return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3821	auto __i) constexpr {
3822	using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3823	constexpr size_t __offset = _SL::_S_before(__i);
3824	return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; });
3825	});
3826	else if constexpr (_Np == _N0)
3827	{
3828	static_assert(sizeof...(_Sizes) == 1);
3829	return {simd_cast<_V>(__x)};
3830	}
3831	else if constexpr // split from fixed_size, such that __x::first.size == _N0
3832	(__is_fixed_size_abi_v<
3833	_Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
3834	{
3835	static_assert(
3836	!__is_fixed_size_abi_v<typename _V::abi_type>,
3837	"How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
3838	"fixed_size_simd "
3839	"when deduced?");
3840	// extract first and recurse (__split_wrapper is needed to deduce a new
3841	// _Sizes pack)
3842	return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
3843	__split_wrapper(_SL::template _S_pop_front<1>(),
3844	__data(__x).second));
3845	}
3846	else if constexpr ((!is_same_v<simd_abi::scalar,
3847	simd_abi::deduce_t<_Tp, _Sizes>> && ...)
3848	&& (!__is_fixed_size_abi_v<
3849	simd_abi::deduce_t<_Tp, _Sizes>> && ...))
3850	{
3851	if constexpr (((_Sizes * 2 == _Np) && ...))
3852	return {{__private_init, __extract_part<0, 2>(__data(__x))},
3853	{__private_init, __extract_part<1, 2>(__data(__x))}};
3854	else if constexpr (is_same_v<_SizeList<_Sizes...>,
3855	_SizeList<_Np / 3, _Np / 3, _Np / 3>>)
3856	return {{__private_init, __extract_part<0, 3>(__data(__x))},
3857	{__private_init, __extract_part<1, 3>(__data(__x))},
3858	{__private_init, __extract_part<2, 3>(__data(__x))}};
3859	else if constexpr (is_same_v<_SizeList<_Sizes...>,
3860	_SizeList<2 * _Np / 3, _Np / 3>>)
3861	return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
3862	{__private_init, __extract_part<2, 3>(__data(__x))}};
3863	else if constexpr (is_same_v<_SizeList<_Sizes...>,
3864	_SizeList<_Np / 3, 2 * _Np / 3>>)
3865	return {{__private_init, __extract_part<0, 3>(__data(__x))},
3866	{__private_init, __extract_part<1, 3, 2>(__data(__x))}};
3867	else if constexpr (is_same_v<_SizeList<_Sizes...>,
3868	_SizeList<_Np / 2, _Np / 4, _Np / 4>>)
3869	return {{__private_init, __extract_part<0, 2>(__data(__x))},
3870	{__private_init, __extract_part<2, 4>(__data(__x))},
3871	{__private_init, __extract_part<3, 4>(__data(__x))}};
3872	else if constexpr (is_same_v<_SizeList<_Sizes...>,
3873	_SizeList<_Np / 4, _Np / 4, _Np / 2>>)
3874	return {{__private_init, __extract_part<0, 4>(__data(__x))},
3875	{__private_init, __extract_part<1, 4>(__data(__x))},
3876	{__private_init, __extract_part<1, 2>(__data(__x))}};
3877	else if constexpr (is_same_v<_SizeList<_Sizes...>,
3878	_SizeList<_Np / 4, _Np / 2, _Np / 4>>)
3879	return {{__private_init, __extract_part<0, 4>(__data(__x))},
3880	{__private_init, __extract_center(__data(__x))},
3881	{__private_init, __extract_part<3, 4>(__data(__x))}};
3882	else if constexpr (((_Sizes * 4 == _Np) && ...))
3883	return {{__private_init, __extract_part<0, 4>(__data(__x))},
3884	{__private_init, __extract_part<1, 4>(__data(__x))},
3885	{__private_init, __extract_part<2, 4>(__data(__x))},
3886	{__private_init, __extract_part<3, 4>(__data(__x))}};
3887	// else fall through
3888	}
3889	#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3890	const __may_alias<_Tp>* const __element_ptr
3891	= reinterpret_cast<const __may_alias<_Tp>*>(&__x);
3892	return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3893	auto __i) constexpr {
3894	using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3895	constexpr size_t __offset = _SL::_S_before(__i);
3896	constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
3897	constexpr size_t __a
3898	= __base_align - ((__offset * sizeof(_Tp)) % __base_align);
3899	constexpr size_t __b = ((__a - 1) & __a) ^ __a;
3900	constexpr size_t __alignment = __b == 0 ? __a : __b;
3901	return _Vi(__element_ptr + __offset, overaligned<__alignment>);
3902	});
3903	#else
3904	return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3905	auto __i) constexpr {
3906	using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3907	const auto& __xx = __data(__x);
3908	using _Offset = decltype(_SL::_S_before(__i));
3909	return _Vi([&](auto __j) constexpr {
3910	constexpr _SizeConstant<_Offset::value + __j> __k;
3911	return __xx[__k];
3912	});
3913	});
3914	#endif
3915	}
3916
3917	// }}}
3918
3919	// __subscript_in_pack {{{
3920	template <size_t _I, typename _Tp, typename _Ap, typename... _As>
3921	_GLIBCXX_SIMD_INTRINSIC constexpr _Tp
3922	__subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
3923	{
3924	if constexpr (_I < simd_size_v<_Tp, _Ap>)
3925	return __x[_I];
3926	else
3927	return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
3928	}
3929
3930	// }}}
3931	// __store_pack_of_simd {{{
3932	template <typename _Tp, typename _A0, typename... _As>
3933	_GLIBCXX_SIMD_INTRINSIC void
3934	__store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0,
3935	const simd<_Tp, _As>&... __xs)
3936	{
3937	constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
3938	__builtin_memcpy(__mem, &__data(__x0), __n_bytes);
3939	if constexpr (sizeof...(__xs) > 0)
3940	__store_pack_of_simd(__mem + __n_bytes, __xs...);
3941	}
3942
3943	// }}}
3944	// concat(simd...) {{{
3945	template <typename _Tp, typename... _As>
3946	inline _GLIBCXX_SIMD_CONSTEXPR
3947	simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
3948	concat(const simd<_Tp, _As>&... __xs)
3949	{
3950	using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
3951	if constexpr (sizeof...(__xs) == 1)
3952	return simd_cast<_Rp>(__xs...);
3953	else if ((... && __xs._M_is_constprop()))
3954	return simd<_Tp,
3955	simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&](
3956	auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); });
3957	else
3958	{
3959	_Rp __r{};
3960	__store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
3961	return __r;
3962	}
3963	}
3964
3965	// }}}
3966	// concat(array<simd>) {{{
3967	template <typename _Tp, typename _Abi, size_t _Np>
3968	_GLIBCXX_SIMD_ALWAYS_INLINE
3969	_GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
3970	concat(const array<simd<_Tp, _Abi>, _Np>& __x)
3971	{
3972	return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) {
3973	return concat(__xs...);
3974	});
3975	}
3976
3977	// }}}
3978
3979	// _SmartReference {{{
3980	template <typename _Up, typename _Accessor = _Up,
3981	typename _ValueType = typename _Up::value_type>
3982	class _SmartReference
3983	{
3984	friend _Accessor;
3985	int _M_index;
3986	_Up& _M_obj;
3987
3988	_GLIBCXX_SIMD_INTRINSIC constexpr _ValueType _M_read() const noexcept
3989	{
3990	if constexpr (is_arithmetic_v<_Up>)
3991	return _M_obj;
3992	else
3993	return _M_obj[_M_index];
3994	}
3995
3996	template <typename _Tp>
3997	_GLIBCXX_SIMD_INTRINSIC constexpr void _M_write(_Tp&& __x) const
3998	{ _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
3999
4000	public:
4001	_GLIBCXX_SIMD_INTRINSIC constexpr
4002	_SmartReference(_Up& __o, int __i) noexcept
4003	: _M_index(__i), _M_obj(__o) {}
4004
4005	using value_type = _ValueType;
4006
4007	_GLIBCXX_SIMD_INTRINSIC _SmartReference(const _SmartReference&) = delete;
4008
4009	_GLIBCXX_SIMD_INTRINSIC constexpr operator value_type() const noexcept
4010	{ return _M_read(); }
4011
4012	template <typename _Tp,
4013	typename
4014	= _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4015	_GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator=(_Tp&& __x) &&
4016	{
4017	_M_write(static_cast<_Tp&&>(__x));
4018	return {_M_obj, _M_index};
4019	}
4020
4021	#define _GLIBCXX_SIMD_OP_(__op) \
4022	template <typename _Tp, \
4023	typename _TT \
4024	= decltype(declval<value_type>() __op declval<_Tp>()), \
4025	typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4026	typename = _ValuePreservingOrInt<_TT, value_type>> \
4027	_GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4028	operator __op##=(_Tp&& __x) && \
4029	{ \
4030	const value_type& __lhs = _M_read(); \
4031	_M_write(__lhs __op __x); \
4032	return {_M_obj, _M_index}; \
4033	}
4034	_GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4035	_GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4036	_GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4037	#undef _GLIBCXX_SIMD_OP_
4038
4039	template <typename _Tp = void,
4040	typename
4041	= decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4042	_GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator++() &&
4043	{
4044	value_type __x = _M_read();
4045	_M_write(++__x);
4046	return {_M_obj, _M_index};
4047	}
4048
4049	template <typename _Tp = void,
4050	typename
4051	= decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4052	_GLIBCXX_SIMD_INTRINSIC constexpr value_type operator++(int) &&
4053	{
4054	const value_type __r = _M_read();
4055	value_type __x = __r;
4056	_M_write(++__x);
4057	return __r;
4058	}
4059
4060	template <typename _Tp = void,
4061	typename
4062	= decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4063	_GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator--() &&
4064	{
4065	value_type __x = _M_read();
4066	_M_write(--__x);
4067	return {_M_obj, _M_index};
4068	}
4069
4070	template <typename _Tp = void,
4071	typename
4072	= decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4073	_GLIBCXX_SIMD_INTRINSIC constexpr value_type operator--(int) &&
4074	{
4075	const value_type __r = _M_read();
4076	value_type __x = __r;
4077	_M_write(--__x);
4078	return __r;
4079	}
4080
4081	_GLIBCXX_SIMD_INTRINSIC friend void
4082	swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4083	conjunction<
4084	is_nothrow_constructible<value_type, _SmartReference&&>,
4085	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4086	{
4087	value_type __tmp = static_cast<_SmartReference&&>(__a);
4088	static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4089	static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4090	}
4091
4092	_GLIBCXX_SIMD_INTRINSIC friend void
4093	swap(value_type& __a, _SmartReference&& __b) noexcept(
4094	conjunction<
4095	is_nothrow_constructible<value_type, value_type&&>,
4096	is_nothrow_assignable<value_type&, value_type&&>,
4097	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4098	{
4099	value_type __tmp(std::move(__a));
4100	__a = static_cast<value_type>(__b);
4101	static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4102	}
4103
4104	_GLIBCXX_SIMD_INTRINSIC friend void
4105	swap(_SmartReference&& __a, value_type& __b) noexcept(
4106	conjunction<
4107	is_nothrow_constructible<value_type, _SmartReference&&>,
4108	is_nothrow_assignable<value_type&, value_type&&>,
4109	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4110	{
4111	value_type __tmp(__a);
4112	static_cast<_SmartReference&&>(__a) = std::move(__b);
4113	__b = std::move(__tmp);
4114	}
4115	};
4116
4117	// }}}
4118	// __scalar_abi_wrapper {{{
4119	template <int _Bytes>
4120	struct __scalar_abi_wrapper
4121	{
4122	template <typename _Tp> static constexpr size_t _S_full_size = 1;
4123	template <typename _Tp> static constexpr size_t _S_size = 1;
4124	template <typename _Tp> static constexpr size_t _S_is_partial = false;
4125
4126	template <typename _Tp, typename _Abi = simd_abi::scalar>
4127	static constexpr bool _S_is_valid_v
4128	= _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4129	};
4130
4131	// }}}
4132	// __decay_abi metafunction {{{
4133	template <typename _Tp>
4134	struct __decay_abi { using type = _Tp; };
4135
4136	template <int _Bytes>
4137	struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4138	{ using type = simd_abi::scalar; };
4139
4140	// }}}
4141	// __find_next_valid_abi metafunction {{{1
4142	// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4143	// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4144	// recursion at 2 elements in the resulting ABI tag. In this case
4145	// type::_S_is_valid_v<_Tp> may be false.
4146	template <template <int> class _Abi, int _Bytes, typename _Tp>
4147	struct __find_next_valid_abi
4148	{
4149	static constexpr auto _S_choose()
4150	{
4151	constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4152	using _NextAbi = _Abi<_NextBytes>;
4153	if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4154	return _Abi<_Bytes>();
4155	else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4156	&& _NextAbi::template _S_is_valid_v<_Tp>)
4157	return _NextAbi();
4158	else
4159	return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4160	}
4161
4162	using type = decltype(_S_choose());
4163	};
4164
4165	template <int _Bytes, typename _Tp>
4166	struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4167	{ using type = simd_abi::scalar; };
4168
4169	// _AbiList {{{1
4170	template <template <int> class...>
4171	struct _AbiList
4172	{
4173	template <typename, int> static constexpr bool _S_has_valid_abi = false;
4174	template <typename, int> using _FirstValidAbi = void;
4175	template <typename, int> using _BestAbi = void;
4176	};
4177
4178	template <template <int> class _A0, template <int> class... _Rest>
4179	struct _AbiList<_A0, _Rest...>
4180	{
4181	template <typename _Tp, int _Np>
4182	static constexpr bool _S_has_valid_abi
4183	= _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4184	_Tp> \|\| _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4185
4186	template <typename _Tp, int _Np>
4187	using _FirstValidAbi = conditional_t<
4188	_A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4189	typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4190	typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4191
4192	template <typename _Tp, int _Np>
4193	static constexpr auto _S_determine_best_abi()
4194	{
4195	static_assert(_Np >= 1);
4196	constexpr int _Bytes = sizeof(_Tp) * _Np;
4197	if constexpr (_Np == 1)
4198	return __make_dependent_t<_Tp, simd_abi::scalar>{};
4199	else
4200	{
4201	constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4202	// _A0<_Bytes> is good if:
4203	// 1. The ABI tag is valid for _Tp
4204	// 2. The storage overhead is no more than padding to fill the next
4205	// power-of-2 number of bytes
4206	if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4207	_Tp> && __fullsize / 2 < _Np)
4208	return typename __decay_abi<_A0<_Bytes>>::type{};
4209	else
4210	{
4211	using _Bp =
4212	typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4213	if constexpr (_Bp::template _S_is_valid_v<
4214	_Tp> && _Bp::template _S_size<_Tp> <= _Np)
4215	return _Bp{};
4216	else
4217	return
4218	typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4219	}
4220	}
4221	}
4222
4223	template <typename _Tp, int _Np>
4224	using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4225	};
4226
4227	// }}}1
4228
4229	// the following lists all native ABIs, which makes them accessible to
4230	// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4231	// matters: Whatever comes first has higher priority.
4232	using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4233	__scalar_abi_wrapper>;
4234
4235	// valid _SimdTraits specialization {{{1
4236	template <typename _Tp, typename _Abi>
4237	struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4238	: _Abi::template __traits<_Tp> {};
4239
4240	// __deduce_impl specializations {{{1
4241	// try all native ABIs (including scalar) first
4242	template <typename _Tp, size_t _Np>
4243	struct __deduce_impl<
4244	_Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4245	{ using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4246
4247	// fall back to fixed_size only if scalar and native ABIs don't match
4248	template <typename _Tp, size_t _Np, typename = void>
4249	struct __deduce_fixed_size_fallback {};
4250
4251	template <typename _Tp, size_t _Np>
4252	struct __deduce_fixed_size_fallback<_Tp, _Np,
4253	enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4254	{ using type = simd_abi::fixed_size<_Np>; };
4255
4256	template <typename _Tp, size_t _Np, typename>
4257	struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4258
4259	//}}}1
4260
4261	// simd_mask {{{
4262	template <typename _Tp, typename _Abi>
4263	class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4264	{
4265	// types, tags, and friends {{{
4266	using _Traits = _SimdTraits<_Tp, _Abi>;
4267	using _MemberType = typename _Traits::_MaskMember;
4268
4269	// We map all masks with equal element sizeof to a single integer type, the
4270	// one given by __int_for_sizeof_t<_Tp>. This is the approach
4271	// [[gnu::vector_size(N)]] types take as well and it reduces the number of
4272	// template specializations in the implementation classes.
4273	using _Ip = __int_for_sizeof_t<_Tp>;
4274	static constexpr _Ip* _S_type_tag = nullptr;
4275
4276	friend typename _Traits::_MaskBase;
4277	friend class simd<_Tp, _Abi>; // to construct masks on return
4278	friend typename _Traits::_SimdImpl; // to construct masks on return and
4279	// inspect data on masked operations
4280	public:
4281	using _Impl = typename _Traits::_MaskImpl;
4282	friend _Impl;
4283
4284	// }}}
4285	// member types {{{
4286	using value_type = bool;
4287	using reference = _SmartReference<_MemberType, _Impl, value_type>;
4288	using simd_type = simd<_Tp, _Abi>;
4289	using abi_type = _Abi;
4290
4291	// }}}
4292	static constexpr size_t size() // {{{
4293	{ return __size_or_zero_v<_Tp, _Abi>; }
4294
4295	// }}}
4296	// constructors & assignment {{{
4297	simd_mask() = default;
4298	simd_mask(const simd_mask&) = default;
4299	simd_mask(simd_mask&&) = default;
4300	simd_mask& operator=(const simd_mask&) = default;
4301	simd_mask& operator=(simd_mask&&) = default;
4302
4303	// }}}
4304	// access to internal representation (optional feature) {{{
4305	_GLIBCXX_SIMD_ALWAYS_INLINE explicit
4306	simd_mask(typename _Traits::_MaskCastType __init)
4307	: _M_data{__init} {}
4308	// conversions to internal type is done in _MaskBase
4309
4310	// }}}
4311	// bitset interface (extension to be proposed) {{{
4312	// TS_FEEDBACK:
4313	// Conversion of simd_mask to and from bitset makes it much easier to
4314	// interface with other facilities. I suggest adding `static
4315	// simd_mask::from_bitset` and `simd_mask::to_bitset`.
4316	_GLIBCXX_SIMD_ALWAYS_INLINE static simd_mask
4317	__from_bitset(bitset<size()> bs)
4318	{ return {__bitset_init, bs}; }
4319
4320	_GLIBCXX_SIMD_ALWAYS_INLINE bitset<size()>
4321	__to_bitset() const
4322	{ return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4323
4324	// }}}
4325	// explicit broadcast constructor {{{
4326	_GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4327	simd_mask(value_type __x)
4328	: _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4329
4330	// }}}
4331	// implicit type conversion constructor {{{
4332	#ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4333	// proposed improvement
4334	template <typename _Up, typename _A2,
4335	typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4336	_GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4337	!= sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4338	simd_mask(const simd_mask<_Up, _A2>& __x)
4339	: simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4340	#else
4341	// conforming to ISO/IEC 19570:2018
4342	template <typename _Up, typename = enable_if_t<conjunction<
4343	is_same<abi_type, simd_abi::fixed_size<size()>>,
4344	is_same<_Up, _Up>>::value>>
4345	_GLIBCXX_SIMD_ALWAYS_INLINE
4346	simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4347	: _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4348	#endif
4349
4350	// }}}
4351	// load constructor {{{
4352	template <typename _Flags>
4353	_GLIBCXX_SIMD_ALWAYS_INLINE
4354	simd_mask(const value_type* __mem, _Flags)
4355	: _M_data(_Impl::template _S_load<_Ip>(
4356	_Flags::template _S_apply<simd_mask>(__mem))) {}
4357
4358	template <typename _Flags>
4359	_GLIBCXX_SIMD_ALWAYS_INLINE
4360	simd_mask(const value_type* __mem, simd_mask __k, _Flags)
4361	: _M_data{}
4362	{
4363	_M_data
4364	= _Impl::_S_masked_load(_M_data, __k._M_data,
4365	_Flags::template _S_apply<simd_mask>(__mem));
4366	}
4367
4368	// }}}
4369	// loads [simd_mask.load] {{{
4370	template <typename _Flags>
4371	_GLIBCXX_SIMD_ALWAYS_INLINE void
4372	copy_from(const value_type* __mem, _Flags)
4373	{
4374	_M_data = _Impl::template _S_load<_Ip>(
4375	_Flags::template _S_apply<simd_mask>(__mem));
4376	}
4377
4378	// }}}
4379	// stores [simd_mask.store] {{{
4380	template <typename _Flags>
4381	_GLIBCXX_SIMD_ALWAYS_INLINE void
4382	copy_to(value_type* __mem, _Flags) const
4383	{ _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4384
4385	// }}}
4386	// scalar access {{{
4387	_GLIBCXX_SIMD_ALWAYS_INLINE reference
4388	operator[](size_t __i)
4389	{
4390	if (__i >= size())
4391	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4392	return {_M_data, int(__i)};
4393	}
4394
4395	_GLIBCXX_SIMD_ALWAYS_INLINE value_type
4396	operator[](size_t __i) const
4397	{
4398	if (__i >= size())
4399	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4400	if constexpr (__is_scalar_abi<_Abi>())
4401	return _M_data;
4402	else
4403	return static_cast<bool>(_M_data[__i]);
4404	}
4405
4406	// }}}
4407	// negation {{{
4408	_GLIBCXX_SIMD_ALWAYS_INLINE simd_mask
4409	operator!() const
4410	{ return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4411
4412	// }}}
4413	// simd_mask binary operators [simd_mask.binary] {{{
4414	#ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4415	// simd_mask<int> && simd_mask<uint> needs disambiguation
4416	template <typename _Up, typename _A2,
4417	typename
4418	= enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4419	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4420	operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4421	{
4422	return {__private_init,
4423	_Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4424	}
4425
4426	template <typename _Up, typename _A2,
4427	typename
4428	= enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4429	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4430	operator\|\|(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4431	{
4432	return {__private_init,
4433	_Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4434	}
4435	#endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4436
4437	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4438	operator&&(const simd_mask& __x, const simd_mask& __y)
4439	{
4440	return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)};
4441	}
4442
4443	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4444	operator\|\|(const simd_mask& __x, const simd_mask& __y)
4445	{
4446	return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)};
4447	}
4448
4449	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4450	operator&(const simd_mask& __x, const simd_mask& __y)
4451	{ return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4452
4453	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4454	operator\|(const simd_mask& __x, const simd_mask& __y)
4455	{ return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4456
4457	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4458	operator^(const simd_mask& __x, const simd_mask& __y)
4459	{ return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4460
4461	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4462	operator&=(simd_mask& __x, const simd_mask& __y)
4463	{
4464	__x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4465	return __x;
4466	}
4467
4468	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4469	operator\|=(simd_mask& __x, const simd_mask& __y)
4470	{
4471	__x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4472	return __x;
4473	}
4474
4475	_GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4476	operator^=(simd_mask& __x, const simd_mask& __y)
4477	{
4478	__x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4479	return __x;
4480	}
4481
4482	// }}}
4483	// simd_mask compares [simd_mask.comparison] {{{
4484	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4485	operator==(const simd_mask& __x, const simd_mask& __y)
4486	{ return !operator!=(__x, __y); }
4487
4488	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4489	operator!=(const simd_mask& __x, const simd_mask& __y)
4490	{ return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4491
4492	// }}}
4493	// private_init ctor {{{
4494	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4495	simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4496	: _M_data(__init) {}
4497
4498	// }}}
4499	// private_init generator ctor {{{
4500	template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4501	_GLIBCXX_SIMD_INTRINSIC constexpr
4502	simd_mask(_PrivateInit, _Fp&& __gen)
4503	: _M_data()
4504	{
4505	__execute_n_times<size()>([&](auto __i) constexpr {
4506	_Impl::_S_set(_M_data, __i, __gen(__i));
4507	});
4508	}
4509
4510	// }}}
4511	// bitset_init ctor {{{
4512	_GLIBCXX_SIMD_INTRINSIC simd_mask(_BitsetInit, bitset<size()> __init)
4513	: _M_data(
4514	_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4515	{}
4516
4517	// }}}
4518	// __cvt {{{
4519	// TS_FEEDBACK:
4520	// The conversion operator this implements should be a ctor on simd_mask.
4521	// Once you call .__cvt() on a simd_mask it converts conveniently.
4522	// A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4523	struct _CvtProxy
4524	{
4525	template <typename _Up, typename _A2,
4526	typename
4527	= enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4528	operator simd_mask<_Up, _A2>() &&
4529	{
4530	using namespace std::experimental::__proposed;
4531	return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4532	}
4533
4534	const simd_mask<_Tp, _Abi>& _M_data;
4535	};
4536
4537	_GLIBCXX_SIMD_INTRINSIC _CvtProxy
4538	__cvt() const
4539	{ return {*this}; }
4540
4541	// }}}
4542	// operator?: overloads (suggested extension) {{{
4543	#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4544	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4545	operator?:(const simd_mask& __k, const simd_mask& __where_true,
4546	const simd_mask& __where_false)
4547	{
4548	auto __ret = __where_false;
4549	_Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4550	return __ret;
4551	}
4552
4553	template <typename _U1, typename _U2,
4554	typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4555	typename = enable_if_t<conjunction_v<
4556	is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4557	is_convertible<simd_mask, typename _Rp::mask_type>>>>
4558	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4559	operator?:(const simd_mask& __k, const _U1& __where_true,
4560	const _U2& __where_false)
4561	{
4562	_Rp __ret = __where_false;
4563	_Rp::_Impl::_S_masked_assign(
4564	__data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4565	__data(static_cast<_Rp>(__where_true)));
4566	return __ret;
4567	}
4568
4569	#ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4570	template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4571	typename = enable_if_t<
4572	conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4573	is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4574	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4575	operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4576	const simd_mask<_Up, _Au>& __where_false)
4577	{
4578	simd_mask __ret = __where_false;
4579	_Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4580	__where_true._M_data);
4581	return __ret;
4582	}
4583	#endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4584	#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4585
4586	// }}}
4587	// _M_is_constprop {{{
4588	_GLIBCXX_SIMD_INTRINSIC constexpr bool
4589	_M_is_constprop() const
4590	{
4591	if constexpr (__is_scalar_abi<_Abi>())
4592	return __builtin_constant_p(_M_data);
4593	else
4594	return _M_data._M_is_constprop();
4595	}
4596
4597	// }}}
4598
4599	private:
4600	friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4601	friend auto& __data<_Tp, abi_type>(simd_mask&);
4602	alignas(_Traits::_S_mask_align) _MemberType _M_data;
4603	};
4604
4605	// }}}
4606
4607	// __data(simd_mask) {{{
4608	template <typename _Tp, typename _Ap>
4609	_GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4610	__data(const simd_mask<_Tp, _Ap>& __x)
4611	{ return __x._M_data; }
4612
4613	template <typename _Tp, typename _Ap>
4614	_GLIBCXX_SIMD_INTRINSIC constexpr auto&
4615	__data(simd_mask<_Tp, _Ap>& __x)
4616	{ return __x._M_data; }
4617
4618	// }}}
4619
4620	// simd_mask reductions [simd_mask.reductions] {{{
4621	template <typename _Tp, typename _Abi>
4622	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4623	all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4624	{
4625	if (__builtin_is_constant_evaluated() \|\| __k._M_is_constprop())
4626	{
4627	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4628	if (!__k[__i])
4629	return false;
4630	return true;
4631	}
4632	else
4633	return _Abi::_MaskImpl::_S_all_of(__k);
4634	}
4635
4636	template <typename _Tp, typename _Abi>
4637	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4638	any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4639	{
4640	if (__builtin_is_constant_evaluated() \|\| __k._M_is_constprop())
4641	{
4642	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4643	if (__k[__i])
4644	return true;
4645	return false;
4646	}
4647	else
4648	return _Abi::_MaskImpl::_S_any_of(__k);
4649	}
4650
4651	template <typename _Tp, typename _Abi>
4652	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4653	none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4654	{
4655	if (__builtin_is_constant_evaluated() \|\| __k._M_is_constprop())
4656	{
4657	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4658	if (__k[__i])
4659	return false;
4660	return true;
4661	}
4662	else
4663	return _Abi::_MaskImpl::_S_none_of(__k);
4664	}
4665
4666	template <typename _Tp, typename _Abi>
4667	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4668	some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4669	{
4670	if (__builtin_is_constant_evaluated() \|\| __k._M_is_constprop())
4671	{
4672	for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4673	if (__k[__i] != __k[__i - 1])
4674	return true;
4675	return false;
4676	}
4677	else
4678	return _Abi::_MaskImpl::_S_some_of(__k);
4679	}
4680
4681	template <typename _Tp, typename _Abi>
4682	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4683	popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4684	{
4685	if (__builtin_is_constant_evaluated() \|\| __k._M_is_constprop())
4686	{
4687	const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4688	__k, [](auto... __elements) { return ((__elements != 0) + ...); });
4689	if (__builtin_is_constant_evaluated() \|\| __builtin_constant_p(__r))
4690	return __r;
4691	}
4692	return _Abi::_MaskImpl::_S_popcount(__k);
4693	}
4694
4695	template <typename _Tp, typename _Abi>
4696	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4697	find_first_set(const simd_mask<_Tp, _Abi>& __k)
4698	{
4699	if (__builtin_is_constant_evaluated() \|\| __k._M_is_constprop())
4700	{
4701	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4702	const size_t _Idx = __call_with_n_evaluations<_Np>(
4703	[](auto... __indexes) { return std::min({__indexes...}); },
4704	[&](auto __i) { return __k[__i] ? +__i : _Np; });
4705	if (_Idx >= _Np)
4706	__invoke_ub("find_first_set(empty mask) is UB");
4707	if (__builtin_constant_p(_Idx))
4708	return _Idx;
4709	}
4710	return _Abi::_MaskImpl::_S_find_first_set(__k);
4711	}
4712
4713	template <typename _Tp, typename _Abi>
4714	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4715	find_last_set(const simd_mask<_Tp, _Abi>& __k)
4716	{
4717	if (__builtin_is_constant_evaluated() \|\| __k._M_is_constprop())
4718	{
4719	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4720	const int _Idx = __call_with_n_evaluations<_Np>(
4721	[](auto... __indexes) { return std::max({__indexes...}); },
4722	[&](auto __i) { return __k[__i] ? int(__i) : -1; });
4723	if (_Idx < 0)
4724	__invoke_ub("find_first_set(empty mask) is UB");
4725	if (__builtin_constant_p(_Idx))
4726	return _Idx;
4727	}
4728	return _Abi::_MaskImpl::_S_find_last_set(__k);
4729	}
4730
4731	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4732	all_of(_ExactBool __x) noexcept
4733	{ return __x; }
4734
4735	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4736	any_of(_ExactBool __x) noexcept
4737	{ return __x; }
4738
4739	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4740	none_of(_ExactBool __x) noexcept
4741	{ return !__x; }
4742
4743	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4744	some_of(_ExactBool) noexcept
4745	{ return false; }
4746
4747	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4748	popcount(_ExactBool __x) noexcept
4749	{ return __x; }
4750
4751	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4752	find_first_set(_ExactBool)
4753	{ return 0; }
4754
4755	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4756	find_last_set(_ExactBool)
4757	{ return 0; }
4758
4759	// }}}
4760
4761	// _SimdIntOperators{{{1
4762	template <typename _V, typename _Impl, bool>
4763	class _SimdIntOperators {};
4764
4765	template <typename _V, typename _Impl>
4766	class _SimdIntOperators<_V, _Impl, true>
4767	{
4768	_GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
4769	{ return static_cast<const _V>(this); }
4770
4771	template <typename _Tp>
4772	_GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
4773	_S_make_derived(_Tp&& __d)
4774	{ return {__private_init, static_cast<_Tp&&>(__d)}; }
4775
4776	public:
4777	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x)
4778	{ return __lhs = __lhs % __x; }
4779
4780	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x)
4781	{ return __lhs = __lhs & __x; }
4782
4783	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator\|=(_V& __lhs, const _V& __x)
4784	{ return __lhs = __lhs \| __x; }
4785
4786	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x)
4787	{ return __lhs = __lhs ^ __x; }
4788
4789	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x)
4790	{ return __lhs = __lhs << __x; }
4791
4792	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x)
4793	{ return __lhs = __lhs >> __x; }
4794
4795	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x)
4796	{ return __lhs = __lhs << __x; }
4797
4798	_GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x)
4799	{ return __lhs = __lhs >> __x; }
4800
4801	_GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y)
4802	{
4803	return _SimdIntOperators::_S_make_derived(
4804	_Impl::_S_modulus(__data(__x), __data(__y)));
4805	}
4806
4807	_GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y)
4808	{
4809	return _SimdIntOperators::_S_make_derived(
4810	_Impl::_S_bit_and(__data(__x), __data(__y)));
4811	}
4812
4813	_GLIBCXX_SIMD_CONSTEXPR friend _V operator\|(const _V& __x, const _V& __y)
4814	{
4815	return _SimdIntOperators::_S_make_derived(
4816	_Impl::_S_bit_or(__data(__x), __data(__y)));
4817	}
4818
4819	_GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y)
4820	{
4821	return _SimdIntOperators::_S_make_derived(
4822	_Impl::_S_bit_xor(__data(__x), __data(__y)));
4823	}
4824
4825	_GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y)
4826	{
4827	return _SimdIntOperators::_S_make_derived(
4828	_Impl::_S_bit_shift_left(__data(__x), __data(__y)));
4829	}
4830
4831	_GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y)
4832	{
4833	return _SimdIntOperators::_S_make_derived(
4834	_Impl::_S_bit_shift_right(__data(__x), __data(__y)));
4835	}
4836
4837	template <typename _VV = _V>
4838	_GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y)
4839	{
4840	using _Tp = typename _VV::value_type;
4841	if (__y < 0)
4842	__invoke_ub("The behavior is undefined if the right operand of a "
4843	"shift operation is negative. [expr.shift]\nA shift by "
4844	"%d was requested",
4845	__y);
4846	if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4847	__invoke_ub(
4848	"The behavior is undefined if the right operand of a "
4849	"shift operation is greater than or equal to the width of the "
4850	"promoted left operand. [expr.shift]\nA shift by %d was requested",
4851	__y);
4852	return _SimdIntOperators::_S_make_derived(
4853	_Impl::_S_bit_shift_left(__data(__x), __y));
4854	}
4855
4856	template <typename _VV = _V>
4857	_GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y)
4858	{
4859	using _Tp = typename _VV::value_type;
4860	if (__y < 0)
4861	__invoke_ub(
4862	"The behavior is undefined if the right operand of a shift "
4863	"operation is negative. [expr.shift]\nA shift by %d was requested",
4864	__y);
4865	if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4866	__invoke_ub(
4867	"The behavior is undefined if the right operand of a shift "
4868	"operation is greater than or equal to the width of the promoted "
4869	"left operand. [expr.shift]\nA shift by %d was requested",
4870	__y);
4871	return _SimdIntOperators::_S_make_derived(
4872	_Impl::_S_bit_shift_right(__data(__x), __y));
4873	}
4874
4875	// unary operators (for integral _Tp)
4876	_GLIBCXX_SIMD_CONSTEXPR _V operator~() const
4877	{ return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
4878	};
4879
4880	//}}}1
4881
4882	// simd {{{
4883	template <typename _Tp, typename _Abi>
4884	class simd : public _SimdIntOperators<
4885	simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl,
4886	conjunction<is_integral<_Tp>,
4887	typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
4888	public _SimdTraits<_Tp, _Abi>::_SimdBase
4889	{
4890	using _Traits = _SimdTraits<_Tp, _Abi>;
4891	using _MemberType = typename _Traits::_SimdMember;
4892	using _CastType = typename _Traits::_SimdCastType;
4893	static constexpr _Tp* _S_type_tag = nullptr;
4894	friend typename _Traits::_SimdBase;
4895
4896	public:
4897	using _Impl = typename _Traits::_SimdImpl;
4898	friend _Impl;
4899	friend _SimdIntOperators<simd, _Impl, true>;
4900
4901	using value_type = _Tp;
4902	using reference = _SmartReference<_MemberType, _Impl, value_type>;
4903	using mask_type = simd_mask<_Tp, _Abi>;
4904	using abi_type = _Abi;
4905
4906	static constexpr size_t size()
4907	{ return __size_or_zero_v<_Tp, _Abi>; }
4908
4909	_GLIBCXX_SIMD_CONSTEXPR simd() = default;
4910	_GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
4911	_GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
4912	_GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
4913	_GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
4914
4915	// implicit broadcast constructor
4916	template <typename _Up,
4917	typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
4918	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4919	simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
4920	: _M_data(
4921	_Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
4922	{}
4923
4924	// implicit type conversion constructor (convert from fixed_size to
4925	// fixed_size)
4926	template <typename _Up>
4927	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4928	simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
4929	enable_if_t<
4930	conjunction<
4931	is_same<simd_abi::fixed_size<size()>, abi_type>,
4932	negation<__is_narrowing_conversion<_Up, value_type>>,
4933	__converts_to_higher_integer_rank<_Up, value_type>>::value,
4934	void*> = nullptr)
4935	: simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
4936
4937	// explicit type conversion constructor
4938	#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4939	template <typename _Up, typename _A2,
4940	typename = decltype(static_simd_cast<simd>(
4941	declval<const simd<_Up, _A2>&>()))>
4942	_GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4943	simd(const simd<_Up, _A2>& __x)
4944	: simd(static_simd_cast<simd>(__x)) {}
4945	#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4946
4947	// generator constructor
4948	template <typename _Fp>
4949	_GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4950	simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
4951	declval<_SizeConstant<0>&>())),
4952	value_type>* = nullptr)
4953	: _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
4954
4955	// load constructor
4956	template <typename _Up, typename _Flags>
4957	_GLIBCXX_SIMD_ALWAYS_INLINE
4958	simd(const _Up* __mem, _Flags)
4959	: _M_data(
4960	_Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
4961	{}
4962
4963	// loads [simd.load]
4964	template <typename _Up, typename _Flags>
4965	_GLIBCXX_SIMD_ALWAYS_INLINE void
4966	copy_from(const _Vectorizable<_Up>* __mem, _Flags)
4967	{
4968	_M_data = static_cast<decltype(_M_data)>(
4969	_Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
4970	}
4971
4972	// stores [simd.store]
4973	template <typename _Up, typename _Flags>
4974	_GLIBCXX_SIMD_ALWAYS_INLINE void
4975	copy_to(_Vectorizable<_Up>* __mem, _Flags) const
4976	{
4977	_Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
4978	_S_type_tag);
4979	}
4980
4981	// scalar access
4982	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4983	operator[](size_t __i)
4984	{ return {_M_data, int(__i)}; }
4985
4986	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4987	operator[]([[maybe_unused]] size_t __i) const
4988	{
4989	if constexpr (__is_scalar_abi<_Abi>())
4990	{
4991	_GLIBCXX_DEBUG_ASSERT(__i == 0);
4992	return _M_data;
4993	}
4994	else
4995	return _M_data[__i];
4996	}
4997
4998	// increment and decrement:
4999	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5000	operator++()
5001	{
5002	_Impl::_S_increment(_M_data);
5003	return *this;
5004	}
5005
5006	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5007	operator++(int)
5008	{
5009	simd __r = *this;
5010	_Impl::_S_increment(_M_data);
5011	return __r;
5012	}
5013
5014	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5015	operator--()
5016	{
5017	_Impl::_S_decrement(_M_data);
5018	return *this;
5019	}
5020
5021	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5022	operator--(int)
5023	{
5024	simd __r = *this;
5025	_Impl::_S_decrement(_M_data);
5026	return __r;
5027	}
5028
5029	// unary operators (for any _Tp)
5030	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5031	operator!() const
5032	{ return {__private_init, _Impl::_S_negate(_M_data)}; }
5033
5034	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5035	operator+() const
5036	{ return *this; }
5037
5038	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5039	operator-() const
5040	{ return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5041
5042	// access to internal representation (suggested extension)
5043	_GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5044	simd(_CastType __init) : _M_data(__init) {}
5045
5046	// compound assignment [simd.cassign]
5047	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5048	operator+=(simd& __lhs, const simd& __x)
5049	{ return __lhs = __lhs + __x; }
5050
5051	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5052	operator-=(simd& __lhs, const simd& __x)
5053	{ return __lhs = __lhs - __x; }
5054
5055	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5056	operator*=(simd& __lhs, const simd& __x)
5057	{ return __lhs = __lhs * __x; }
5058
5059	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5060	operator/=(simd& __lhs, const simd& __x)
5061	{ return __lhs = __lhs / __x; }
5062
5063	// binary operators [simd.binary]
5064	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5065	operator+(const simd& __x, const simd& __y)
5066	{ return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5067
5068	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5069	operator-(const simd& __x, const simd& __y)
5070	{ return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5071
5072	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5073	operator*(const simd& __x, const simd& __y)
5074	{ return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5075
5076	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5077	operator/(const simd& __x, const simd& __y)
5078	{ return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5079
5080	// compares [simd.comparison]
5081	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5082	operator==(const simd& __x, const simd& __y)
5083	{ return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5084
5085	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5086	operator!=(const simd& __x, const simd& __y)
5087	{
5088	return simd::_S_make_mask(
5089	_Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5090	}
5091
5092	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5093	operator<(const simd& __x, const simd& __y)
5094	{ return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5095
5096	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5097	operator<=(const simd& __x, const simd& __y)
5098	{
5099	return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5100	}
5101
5102	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5103	operator>(const simd& __x, const simd& __y)
5104	{ return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5105
5106	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5107	operator>=(const simd& __x, const simd& __y)
5108	{
5109	return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5110	}
5111
5112	// operator?: overloads (suggested extension) {{{
5113	#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5114	_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5115	operator?:(const mask_type& __k, const simd& __where_true,
5116	const simd& __where_false)
5117	{
5118	auto __ret = __where_false;
5119	_Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5120	return __ret;
5121	}
5122
5123	#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5124	// }}}
5125
5126	// "private" because of the first arguments's namespace
5127	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5128	simd(_PrivateInit, const _MemberType& __init)
5129	: _M_data(__init) {}
5130
5131	// "private" because of the first arguments's namespace
5132	_GLIBCXX_SIMD_INTRINSIC
5133	simd(_BitsetInit, bitset<size()> __init) : _M_data()
5134	{ where(mask_type(__bitset_init, __init), this) = ~this; }
5135
5136	_GLIBCXX_SIMD_INTRINSIC constexpr bool
5137	_M_is_constprop() const
5138	{
5139	if constexpr (__is_scalar_abi<_Abi>())
5140	return __builtin_constant_p(_M_data);
5141	else
5142	return _M_data._M_is_constprop();
5143	}
5144
5145	private:
5146	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR static mask_type
5147	_S_make_mask(typename mask_type::_MemberType __k)
5148	{ return {__private_init, __k}; }
5149
5150	friend const auto& __data<value_type, abi_type>(const simd&);
5151	friend auto& __data<value_type, abi_type>(simd&);
5152	alignas(_Traits::_S_simd_align) _MemberType _M_data;
5153	};
5154
5155	// }}}
5156	// __data {{{
5157	template <typename _Tp, typename _Ap>
5158	_GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5159	__data(const simd<_Tp, _Ap>& __x)
5160	{ return __x._M_data; }
5161
5162	template <typename _Tp, typename _Ap>
5163	_GLIBCXX_SIMD_INTRINSIC constexpr auto&
5164	__data(simd<_Tp, _Ap>& __x)
5165	{ return __x._M_data; }
5166
5167	// }}}
5168	namespace __float_bitwise_operators { //{{{
5169	template <typename _Tp, typename _Ap>
5170	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5171	operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5172	{
5173	return {__private_init,
5174	_Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))};
5175	}
5176
5177	template <typename _Tp, typename _Ap>
5178	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5179	operator\|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5180	{
5181	return {__private_init,
5182	_Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))};
5183	}
5184
5185	template <typename _Tp, typename _Ap>
5186	_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5187	operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5188	{
5189	return {__private_init,
5190	_Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))};
5191	}
5192	} // namespace __float_bitwise_operators }}}
5193
5194	_GLIBCXX_SIMD_END_NAMESPACE
5195
5196	#endif // __cplusplus >= 201703L
5197	#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5198
5199	// vim: foldmethod=marker foldmarker={{{,}}}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: Daodan/MSYS2/mingw32/include/c++/11.2.0/experimental/bits/simd.h

Download in other formats: