source: Daodan/MSYS2/mingw32/include/c++/11.2.0/experimental/bits/simd.h@ 1186

Last change on this file since 1186 was 1166, checked in by rossy, 3 years ago

Daodan: Replace MinGW build env with an up-to-date MSYS2 env

File size: 161.1 KB
RevLine 
[1166]1// Definition of the public simd interfaces -*- C++ -*-
2
3// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26#define _GLIBCXX_EXPERIMENTAL_SIMD_H
27
28#if __cplusplus >= 201703L
29
30#include "simd_detail.h"
31#include "numeric_traits.h"
32#include <bit>
33#include <bitset>
34#ifdef _GLIBCXX_DEBUG_UB
35#include <cstdio> // for stderr
36#endif
37#include <cstring>
38#include <functional>
39#include <iosfwd>
40#include <utility>
41
42#if _GLIBCXX_SIMD_X86INTRIN
43#include <x86intrin.h>
44#elif _GLIBCXX_SIMD_HAVE_NEON
45#include <arm_neon.h>
46#endif
47
48/* There are several closely related types, with the following naming
49 * convention:
50 * _Tp: vectorizable (arithmetic) type (or any type)
51 * _TV: __vector_type_t<_Tp, _Np>
52 * _TW: _SimdWrapper<_Tp, _Np>
53 * _TI: __intrinsic_type_t<_Tp, _Np>
54 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
55 * If one additional type is needed use _U instead of _T.
56 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
57 *
58 * More naming conventions:
59 * _Ap or _Abi: An ABI tag from the simd_abi namespace
60 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
61 * _IV, _IW as for _TV, _TW
62 * _Np: number of elements (not bytes)
63 * _Bytes: number of bytes
64 *
65 * Variable names:
66 * __k: mask object (vector- or bitmask)
67 */
68_GLIBCXX_SIMD_BEGIN_NAMESPACE
69
70#if !_GLIBCXX_SIMD_X86INTRIN
71using __m128 [[__gnu__::__vector_size__(16)]] = float;
72using __m128d [[__gnu__::__vector_size__(16)]] = double;
73using __m128i [[__gnu__::__vector_size__(16)]] = long long;
74using __m256 [[__gnu__::__vector_size__(32)]] = float;
75using __m256d [[__gnu__::__vector_size__(32)]] = double;
76using __m256i [[__gnu__::__vector_size__(32)]] = long long;
77using __m512 [[__gnu__::__vector_size__(64)]] = float;
78using __m512d [[__gnu__::__vector_size__(64)]] = double;
79using __m512i [[__gnu__::__vector_size__(64)]] = long long;
80#endif
81
82namespace simd_abi {
83// simd_abi forward declarations {{{
84// implementation details:
85struct _Scalar;
86
87template <int _Np>
88 struct _Fixed;
89
90// There are two major ABIs that appear on different architectures.
91// Both have non-boolean values packed into an N Byte register
92// -> #elements = N / sizeof(T)
93// Masks differ:
94// 1. Use value vector registers for masks (all 0 or all 1)
95// 2. Use bitmasks (mask registers) with one bit per value in the corresponding
96// value vector
97//
98// Both can be partially used, masking off the rest when doing horizontal
99// operations or operations that can trap (e.g. FP_INVALID or integer division
100// by 0). This is encoded as the number of used bytes.
101template <int _UsedBytes>
102 struct _VecBuiltin;
103
104template <int _UsedBytes>
105 struct _VecBltnBtmsk;
106
107template <typename _Tp, int _Np>
108 using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
109
110template <int _UsedBytes = 16>
111 using _Sse = _VecBuiltin<_UsedBytes>;
112
113template <int _UsedBytes = 32>
114 using _Avx = _VecBuiltin<_UsedBytes>;
115
116template <int _UsedBytes = 64>
117 using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
118
119template <int _UsedBytes = 16>
120 using _Neon = _VecBuiltin<_UsedBytes>;
121
122// implementation-defined:
123using __sse = _Sse<>;
124using __avx = _Avx<>;
125using __avx512 = _Avx512<>;
126using __neon = _Neon<>;
127using __neon128 = _Neon<16>;
128using __neon64 = _Neon<8>;
129
130// standard:
131template <typename _Tp, size_t _Np, typename...>
132 struct deduce;
133
134template <int _Np>
135 using fixed_size = _Fixed<_Np>;
136
137using scalar = _Scalar;
138
139// }}}
140} // namespace simd_abi
141// forward declarations is_simd(_mask), simd(_mask), simd_size {{{
142template <typename _Tp>
143 struct is_simd;
144
145template <typename _Tp>
146 struct is_simd_mask;
147
148template <typename _Tp, typename _Abi>
149 class simd;
150
151template <typename _Tp, typename _Abi>
152 class simd_mask;
153
154template <typename _Tp, typename _Abi>
155 struct simd_size;
156
157// }}}
158// load/store flags {{{
159struct element_aligned_tag
160{
161 template <typename _Tp, typename _Up = typename _Tp::value_type>
162 static constexpr size_t _S_alignment = alignof(_Up);
163
164 template <typename _Tp, typename _Up>
165 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
166 _S_apply(_Up* __ptr)
167 { return __ptr; }
168};
169
170struct vector_aligned_tag
171{
172 template <typename _Tp, typename _Up = typename _Tp::value_type>
173 static constexpr size_t _S_alignment
174 = std::__bit_ceil(sizeof(_Up) * _Tp::size());
175
176 template <typename _Tp, typename _Up>
177 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
178 _S_apply(_Up* __ptr)
179 {
180 return static_cast<_Up*>(
181 __builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>));
182 }
183};
184
185template <size_t _Np> struct overaligned_tag
186{
187 template <typename _Tp, typename _Up = typename _Tp::value_type>
188 static constexpr size_t _S_alignment = _Np;
189
190 template <typename _Tp, typename _Up>
191 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
192 _S_apply(_Up* __ptr)
193 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
194};
195
196inline constexpr element_aligned_tag element_aligned = {};
197
198inline constexpr vector_aligned_tag vector_aligned = {};
199
200template <size_t _Np>
201 inline constexpr overaligned_tag<_Np> overaligned = {};
202
203// }}}
204template <size_t _Xp>
205 using _SizeConstant = integral_constant<size_t, _Xp>;
206
207namespace __detail
208{
209 struct _Minimum
210 {
211 template <typename _Tp>
212 _GLIBCXX_SIMD_INTRINSIC constexpr
213 _Tp
214 operator()(_Tp __a, _Tp __b) const
215 {
216 using std::min;
217 return min(__a, __b);
218 }
219 };
220
221 struct _Maximum
222 {
223 template <typename _Tp>
224 _GLIBCXX_SIMD_INTRINSIC constexpr
225 _Tp
226 operator()(_Tp __a, _Tp __b) const
227 {
228 using std::max;
229 return max(__a, __b);
230 }
231 };
232} // namespace __detail
233
234// unrolled/pack execution helpers
235// __execute_n_times{{{
236template <typename _Fp, size_t... _I>
237 _GLIBCXX_SIMD_INTRINSIC constexpr void
238 __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
239 { ((void)__f(_SizeConstant<_I>()), ...); }
240
241template <typename _Fp>
242 _GLIBCXX_SIMD_INTRINSIC constexpr void
243 __execute_on_index_sequence(_Fp&&, index_sequence<>)
244 { }
245
246template <size_t _Np, typename _Fp>
247 _GLIBCXX_SIMD_INTRINSIC constexpr void
248 __execute_n_times(_Fp&& __f)
249 {
250 __execute_on_index_sequence(static_cast<_Fp&&>(__f),
251 make_index_sequence<_Np>{});
252 }
253
254// }}}
255// __generate_from_n_evaluations{{{
256template <typename _R, typename _Fp, size_t... _I>
257 _GLIBCXX_SIMD_INTRINSIC constexpr _R
258 __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
259 { return _R{__f(_SizeConstant<_I>())...}; }
260
261template <size_t _Np, typename _R, typename _Fp>
262 _GLIBCXX_SIMD_INTRINSIC constexpr _R
263 __generate_from_n_evaluations(_Fp&& __f)
264 {
265 return __execute_on_index_sequence_with_return<_R>(
266 static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
267 }
268
269// }}}
270// __call_with_n_evaluations{{{
271template <size_t... _I, typename _F0, typename _FArgs>
272 _GLIBCXX_SIMD_INTRINSIC constexpr auto
273 __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
274 { return __f0(__fargs(_SizeConstant<_I>())...); }
275
276template <size_t _Np, typename _F0, typename _FArgs>
277 _GLIBCXX_SIMD_INTRINSIC constexpr auto
278 __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
279 {
280 return __call_with_n_evaluations(make_index_sequence<_Np>{},
281 static_cast<_F0&&>(__f0),
282 static_cast<_FArgs&&>(__fargs));
283 }
284
285// }}}
286// __call_with_subscripts{{{
287template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
288 _GLIBCXX_SIMD_INTRINSIC constexpr auto
289 __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
290 { return __fun(__x[_First + _It]...); }
291
292template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
293 _GLIBCXX_SIMD_INTRINSIC constexpr auto
294 __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
295 {
296 return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
297 make_index_sequence<_Np>(),
298 static_cast<_Fp&&>(__fun));
299 }
300
301// }}}
302
303// vvv ---- type traits ---- vvv
304// integer type aliases{{{
305using _UChar = unsigned char;
306using _SChar = signed char;
307using _UShort = unsigned short;
308using _UInt = unsigned int;
309using _ULong = unsigned long;
310using _ULLong = unsigned long long;
311using _LLong = long long;
312
313//}}}
314// __first_of_pack{{{
315template <typename _T0, typename...>
316 struct __first_of_pack
317 { using type = _T0; };
318
319template <typename... _Ts>
320 using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
321
322//}}}
323// __value_type_or_identity_t {{{
324template <typename _Tp>
325 typename _Tp::value_type
326 __value_type_or_identity_impl(int);
327
328template <typename _Tp>
329 _Tp
330 __value_type_or_identity_impl(float);
331
332template <typename _Tp>
333 using __value_type_or_identity_t
334 = decltype(__value_type_or_identity_impl<_Tp>(int()));
335
336// }}}
337// __is_vectorizable {{{
338template <typename _Tp>
339 struct __is_vectorizable : public is_arithmetic<_Tp> {};
340
341template <>
342 struct __is_vectorizable<bool> : public false_type {};
343
344template <typename _Tp>
345 inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
346
347// Deduces to a vectorizable type
348template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
349 using _Vectorizable = _Tp;
350
351// }}}
352// _LoadStorePtr / __is_possible_loadstore_conversion {{{
353template <typename _Ptr, typename _ValueType>
354 struct __is_possible_loadstore_conversion
355 : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
356
357template <>
358 struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
359
360// Deduces to a type allowed for load/store with the given value type.
361template <typename _Ptr, typename _ValueType,
362 typename = enable_if_t<
363 __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
364 using _LoadStorePtr = _Ptr;
365
366// }}}
367// __is_bitmask{{{
368template <typename _Tp, typename = void_t<>>
369 struct __is_bitmask : false_type {};
370
371template <typename _Tp>
372 inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
373
374// the __mmaskXX case:
375template <typename _Tp>
376 struct __is_bitmask<_Tp,
377 void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
378 : true_type {};
379
380// }}}
381// __int_for_sizeof{{{
382#pragma GCC diagnostic push
383#pragma GCC diagnostic ignored "-Wpedantic"
384template <size_t _Bytes>
385 constexpr auto
386 __int_for_sizeof()
387 {
388 if constexpr (_Bytes == sizeof(int))
389 return int();
390 #ifdef __clang__
391 else if constexpr (_Bytes == sizeof(char))
392 return char();
393 #else
394 else if constexpr (_Bytes == sizeof(_SChar))
395 return _SChar();
396 #endif
397 else if constexpr (_Bytes == sizeof(short))
398 return short();
399 #ifndef __clang__
400 else if constexpr (_Bytes == sizeof(long))
401 return long();
402 #endif
403 else if constexpr (_Bytes == sizeof(_LLong))
404 return _LLong();
405 #ifdef __SIZEOF_INT128__
406 else if constexpr (_Bytes == sizeof(__int128))
407 return __int128();
408 #endif // __SIZEOF_INT128__
409 else if constexpr (_Bytes % sizeof(int) == 0)
410 {
411 constexpr size_t _Np = _Bytes / sizeof(int);
412 struct _Ip
413 {
414 int _M_data[_Np];
415
416 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
417 operator&(_Ip __rhs) const
418 {
419 return __generate_from_n_evaluations<_Np, _Ip>(
420 [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
421 }
422
423 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
424 operator|(_Ip __rhs) const
425 {
426 return __generate_from_n_evaluations<_Np, _Ip>(
427 [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; });
428 }
429
430 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
431 operator^(_Ip __rhs) const
432 {
433 return __generate_from_n_evaluations<_Np, _Ip>(
434 [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
435 }
436
437 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
438 operator~() const
439 {
440 return __generate_from_n_evaluations<_Np, _Ip>(
441 [&](auto __i) { return ~_M_data[__i]; });
442 }
443 };
444 return _Ip{};
445 }
446 else
447 static_assert(_Bytes != _Bytes, "this should be unreachable");
448 }
449#pragma GCC diagnostic pop
450
451template <typename _Tp>
452 using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
453
454template <size_t _Np>
455 using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
456
457// }}}
458// __is_fixed_size_abi{{{
459template <typename _Tp>
460 struct __is_fixed_size_abi : false_type {};
461
462template <int _Np>
463 struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
464
465template <typename _Tp>
466 inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
467
468// }}}
469// constexpr feature detection{{{
470constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
471constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
472constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
473constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
474constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
475constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
476constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
477constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
478constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
479constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
480constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
481constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
482constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
483constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
484constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
485constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
486constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
487constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
488constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
489constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
490constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
491constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
492constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
493constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
494
495constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
496constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
497constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
498constexpr inline bool __support_neon_float =
499#if defined __GCC_IEC_559
500 __GCC_IEC_559 == 0;
501#elif defined __FAST_MATH__
502 true;
503#else
504 false;
505#endif
506
507#ifdef _ARCH_PWR10
508constexpr inline bool __have_power10vec = true;
509#else
510constexpr inline bool __have_power10vec = false;
511#endif
512#ifdef __POWER9_VECTOR__
513constexpr inline bool __have_power9vec = true;
514#else
515constexpr inline bool __have_power9vec = false;
516#endif
517#if defined __POWER8_VECTOR__
518constexpr inline bool __have_power8vec = true;
519#else
520constexpr inline bool __have_power8vec = __have_power9vec;
521#endif
522#if defined __VSX__
523constexpr inline bool __have_power_vsx = true;
524#else
525constexpr inline bool __have_power_vsx = __have_power8vec;
526#endif
527#if defined __ALTIVEC__
528constexpr inline bool __have_power_vmx = true;
529#else
530constexpr inline bool __have_power_vmx = __have_power_vsx;
531#endif
532
533// }}}
534// __is_scalar_abi {{{
535template <typename _Abi>
536 constexpr bool
537 __is_scalar_abi()
538 { return is_same_v<simd_abi::scalar, _Abi>; }
539
540// }}}
541// __abi_bytes_v {{{
542template <template <int> class _Abi, int _Bytes>
543 constexpr int
544 __abi_bytes_impl(_Abi<_Bytes>*)
545 { return _Bytes; }
546
547template <typename _Tp>
548 constexpr int
549 __abi_bytes_impl(_Tp*)
550 { return -1; }
551
552template <typename _Abi>
553 inline constexpr int __abi_bytes_v
554 = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
555
556// }}}
557// __is_builtin_bitmask_abi {{{
558template <typename _Abi>
559 constexpr bool
560 __is_builtin_bitmask_abi()
561 { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
562
563// }}}
564// __is_sse_abi {{{
565template <typename _Abi>
566 constexpr bool
567 __is_sse_abi()
568 {
569 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
570 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
571 }
572
573// }}}
574// __is_avx_abi {{{
575template <typename _Abi>
576 constexpr bool
577 __is_avx_abi()
578 {
579 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
580 return _Bytes > 16 && _Bytes <= 32
581 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
582 }
583
584// }}}
585// __is_avx512_abi {{{
586template <typename _Abi>
587 constexpr bool
588 __is_avx512_abi()
589 {
590 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
591 return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
592 }
593
594// }}}
595// __is_neon_abi {{{
596template <typename _Abi>
597 constexpr bool
598 __is_neon_abi()
599 {
600 constexpr auto _Bytes = __abi_bytes_v<_Abi>;
601 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
602 }
603
604// }}}
605// __make_dependent_t {{{
606template <typename, typename _Up>
607 struct __make_dependent
608 { using type = _Up; };
609
610template <typename _Tp, typename _Up>
611 using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
612
613// }}}
614// ^^^ ---- type traits ---- ^^^
615
616// __invoke_ub{{{
617template <typename... _Args>
618 [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
619 __invoke_ub([[maybe_unused]] const char* __msg,
620 [[maybe_unused]] const _Args&... __args)
621 {
622#ifdef _GLIBCXX_DEBUG_UB
623 __builtin_fprintf(stderr, __msg, __args...);
624 __builtin_trap();
625#else
626 __builtin_unreachable();
627#endif
628 }
629
630// }}}
631// __assert_unreachable{{{
632template <typename _Tp>
633 struct __assert_unreachable
634 { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
635
636// }}}
637// __size_or_zero_v {{{
638template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
639 constexpr size_t
640 __size_or_zero_dispatch(int)
641 { return _Np; }
642
643template <typename _Tp, typename _Ap>
644 constexpr size_t
645 __size_or_zero_dispatch(float)
646 { return 0; }
647
648template <typename _Tp, typename _Ap>
649 inline constexpr size_t __size_or_zero_v
650 = __size_or_zero_dispatch<_Tp, _Ap>(0);
651
652// }}}
653// __div_roundup {{{
654inline constexpr size_t
655__div_roundup(size_t __a, size_t __b)
656{ return (__a + __b - 1) / __b; }
657
658// }}}
659// _ExactBool{{{
660class _ExactBool
661{
662 const bool _M_data;
663
664public:
665 _GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {}
666
667 _ExactBool(int) = delete;
668
669 _GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; }
670};
671
672// }}}
673// __may_alias{{{
674/**@internal
675 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
676 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
677 * that support it).
678 */
679template <typename _Tp>
680 using __may_alias [[__gnu__::__may_alias__]] = _Tp;
681
682// }}}
683// _UnsupportedBase {{{
684// simd and simd_mask base for unsupported <_Tp, _Abi>
685struct _UnsupportedBase
686{
687 _UnsupportedBase() = delete;
688 _UnsupportedBase(const _UnsupportedBase&) = delete;
689 _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
690 ~_UnsupportedBase() = delete;
691};
692
693// }}}
694// _InvalidTraits {{{
695/**
696 * @internal
697 * Defines the implementation of __a given <_Tp, _Abi>.
698 *
699 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
700 * possible. Static assertions in the type definition do not suffice. It is
701 * important that SFINAE works.
702 */
703struct _InvalidTraits
704{
705 using _IsValid = false_type;
706 using _SimdBase = _UnsupportedBase;
707 using _MaskBase = _UnsupportedBase;
708
709 static constexpr size_t _S_full_size = 0;
710 static constexpr bool _S_is_partial = false;
711
712 static constexpr size_t _S_simd_align = 1;
713 struct _SimdImpl;
714 struct _SimdMember {};
715 struct _SimdCastType;
716
717 static constexpr size_t _S_mask_align = 1;
718 struct _MaskImpl;
719 struct _MaskMember {};
720 struct _MaskCastType;
721};
722
723// }}}
724// _SimdTraits {{{
725template <typename _Tp, typename _Abi, typename = void_t<>>
726 struct _SimdTraits : _InvalidTraits {};
727
728// }}}
729// __private_init, __bitset_init{{{
730/**
731 * @internal
732 * Tag used for private init constructor of simd and simd_mask
733 */
734inline constexpr struct _PrivateInit {} __private_init = {};
735
736inline constexpr struct _BitsetInit {} __bitset_init = {};
737
738// }}}
739// __is_narrowing_conversion<_From, _To>{{{
740template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
741 bool = is_arithmetic_v<_To>>
742 struct __is_narrowing_conversion;
743
744// ignore "signed/unsigned mismatch" in the following trait.
745// The implicit conversions will do the right thing here.
746template <typename _From, typename _To>
747 struct __is_narrowing_conversion<_From, _To, true, true>
748 : public __bool_constant<(
749 __digits_v<_From> > __digits_v<_To>
750 || __finite_max_v<_From> > __finite_max_v<_To>
751 || __finite_min_v<_From> < __finite_min_v<_To>
752 || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
753
754template <typename _Tp>
755 struct __is_narrowing_conversion<_Tp, bool, true, true>
756 : public true_type {};
757
758template <>
759 struct __is_narrowing_conversion<bool, bool, true, true>
760 : public false_type {};
761
762template <typename _Tp>
763 struct __is_narrowing_conversion<_Tp, _Tp, true, true>
764 : public false_type {};
765
766template <typename _From, typename _To>
767 struct __is_narrowing_conversion<_From, _To, false, true>
768 : public negation<is_convertible<_From, _To>> {};
769
770// }}}
771// __converts_to_higher_integer_rank{{{
772template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
773 struct __converts_to_higher_integer_rank : public true_type {};
774
775// this may fail for char -> short if sizeof(char) == sizeof(short)
776template <typename _From, typename _To>
777 struct __converts_to_higher_integer_rank<_From, _To, false>
778 : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
779
780// }}}
781// __data(simd/simd_mask) {{{
782template <typename _Tp, typename _Ap>
783 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
784 __data(const simd<_Tp, _Ap>& __x);
785
786template <typename _Tp, typename _Ap>
787 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
788 __data(simd<_Tp, _Ap>& __x);
789
790template <typename _Tp, typename _Ap>
791 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
792 __data(const simd_mask<_Tp, _Ap>& __x);
793
794template <typename _Tp, typename _Ap>
795 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
796 __data(simd_mask<_Tp, _Ap>& __x);
797
798// }}}
799// _SimdConverter {{{
800template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
801 typename = void>
802 struct _SimdConverter;
803
804template <typename _Tp, typename _Ap>
805 struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
806 {
807 template <typename _Up>
808 _GLIBCXX_SIMD_INTRINSIC const _Up&
809 operator()(const _Up& __x)
810 { return __x; }
811 };
812
813// }}}
814// __to_value_type_or_member_type {{{
815template <typename _V>
816 _GLIBCXX_SIMD_INTRINSIC constexpr auto
817 __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
818 { return __data(__x); }
819
820template <typename _V>
821 _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
822 __to_value_type_or_member_type(const typename _V::value_type& __x)
823 { return __x; }
824
825// }}}
826// __bool_storage_member_type{{{
827template <size_t _Size>
828 struct __bool_storage_member_type;
829
830template <size_t _Size>
831 using __bool_storage_member_type_t =
832 typename __bool_storage_member_type<_Size>::type;
833
834// }}}
835// _SimdTuple {{{
836// why not tuple?
837// 1. tuple gives no guarantee about the storage order, but I require
838// storage
839// equivalent to array<_Tp, _Np>
840// 2. direct access to the element type (first template argument)
841// 3. enforces equal element type, only different _Abi types are allowed
842template <typename _Tp, typename... _Abis>
843 struct _SimdTuple;
844
845//}}}
846// __fixed_size_storage_t {{{
847template <typename _Tp, int _Np>
848 struct __fixed_size_storage;
849
850template <typename _Tp, int _Np>
851 using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
852
853// }}}
854// _SimdWrapper fwd decl{{{
855template <typename _Tp, size_t _Size, typename = void_t<>>
856 struct _SimdWrapper;
857
858template <typename _Tp>
859 using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
860template <typename _Tp>
861 using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
862template <typename _Tp>
863 using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
864template <typename _Tp>
865 using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
866
867// }}}
868// __is_simd_wrapper {{{
869template <typename _Tp>
870 struct __is_simd_wrapper : false_type {};
871
872template <typename _Tp, size_t _Np>
873 struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
874
875template <typename _Tp>
876 inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
877
878// }}}
879// _BitOps {{{
880struct _BitOps
881{
882 // _S_bit_iteration {{{
883 template <typename _Tp, typename _Fp>
884 static void
885 _S_bit_iteration(_Tp __mask, _Fp&& __f)
886 {
887 static_assert(sizeof(_ULLong) >= sizeof(_Tp));
888 conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
889 if constexpr (is_convertible_v<_Tp, decltype(__k)>)
890 __k = __mask;
891 else
892 __k = __mask.to_ullong();
893 while(__k)
894 {
895 __f(std::__countr_zero(__k));
896 __k &= (__k - 1);
897 }
898 }
899
900 //}}}
901};
902
903//}}}
904// __increment, __decrement {{{
905template <typename _Tp = void>
906 struct __increment
907 { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
908
909template <>
910 struct __increment<void>
911 {
912 template <typename _Tp>
913 constexpr _Tp
914 operator()(_Tp __a) const
915 { return ++__a; }
916 };
917
918template <typename _Tp = void>
919 struct __decrement
920 { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
921
922template <>
923 struct __decrement<void>
924 {
925 template <typename _Tp>
926 constexpr _Tp
927 operator()(_Tp __a) const
928 { return --__a; }
929 };
930
931// }}}
932// _ValuePreserving(OrInt) {{{
933template <typename _From, typename _To,
934 typename = enable_if_t<negation<
935 __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
936 using _ValuePreserving = _From;
937
938template <typename _From, typename _To,
939 typename _DecayedFrom = __remove_cvref_t<_From>,
940 typename = enable_if_t<conjunction<
941 is_convertible<_From, _To>,
942 disjunction<
943 is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
944 conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
945 negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
946 using _ValuePreservingOrInt = _From;
947
948// }}}
949// __intrinsic_type {{{
950template <typename _Tp, size_t _Bytes, typename = void_t<>>
951 struct __intrinsic_type;
952
953template <typename _Tp, size_t _Size>
954 using __intrinsic_type_t =
955 typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
956
957template <typename _Tp>
958 using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
959template <typename _Tp>
960 using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
961template <typename _Tp>
962 using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
963template <typename _Tp>
964 using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
965template <typename _Tp>
966 using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
967template <typename _Tp>
968 using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
969
970// }}}
971// _BitMask {{{
972template <size_t _Np, bool _Sanitized = false>
973 struct _BitMask;
974
975template <size_t _Np, bool _Sanitized>
976 struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
977
978template <size_t _Np>
979 using _SanitizedBitMask = _BitMask<_Np, true>;
980
981template <size_t _Np, bool _Sanitized>
982 struct _BitMask
983 {
984 static_assert(_Np > 0);
985
986 static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
987
988 using _Tp = conditional_t<_Np == 1, bool,
989 make_unsigned_t<__int_with_sizeof_t<std::min(
990 sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
991
992 static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
993
994 _Tp _M_bits[_S_array_size];
995
996 static constexpr int _S_unused_bits
997 = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
998
999 static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1000
1001 constexpr _BitMask() noexcept = default;
1002
1003 constexpr _BitMask(unsigned long long __x) noexcept
1004 : _M_bits{static_cast<_Tp>(__x)} {}
1005
1006 _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1007
1008 constexpr _BitMask(const _BitMask&) noexcept = default;
1009
1010 template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1011 && _Sanitized == true>>
1012 constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1013 : _BitMask(__rhs._M_sanitized()) {}
1014
1015 constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1016 {
1017 static_assert(_S_array_size == 1);
1018 return _M_bits[0];
1019 }
1020
1021 // precondition: is sanitized
1022 constexpr _Tp
1023 _M_to_bits() const noexcept
1024 {
1025 static_assert(_S_array_size == 1);
1026 return _M_bits[0];
1027 }
1028
1029 // precondition: is sanitized
1030 constexpr unsigned long long
1031 to_ullong() const noexcept
1032 {
1033 static_assert(_S_array_size == 1);
1034 return _M_bits[0];
1035 }
1036
1037 // precondition: is sanitized
1038 constexpr unsigned long
1039 to_ulong() const noexcept
1040 {
1041 static_assert(_S_array_size == 1);
1042 return _M_bits[0];
1043 }
1044
1045 constexpr bitset<_Np>
1046 _M_to_bitset() const noexcept
1047 {
1048 static_assert(_S_array_size == 1);
1049 return _M_bits[0];
1050 }
1051
1052 constexpr decltype(auto)
1053 _M_sanitized() const noexcept
1054 {
1055 if constexpr (_Sanitized)
1056 return *this;
1057 else if constexpr (_Np == 1)
1058 return _SanitizedBitMask<_Np>(_M_bits[0]);
1059 else
1060 {
1061 _SanitizedBitMask<_Np> __r = {};
1062 for (int __i = 0; __i < _S_array_size; ++__i)
1063 __r._M_bits[__i] = _M_bits[__i];
1064 if constexpr (_S_unused_bits > 0)
1065 __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1066 return __r;
1067 }
1068 }
1069
1070 template <size_t _Mp, bool _LSanitized>
1071 constexpr _BitMask<_Np + _Mp, _Sanitized>
1072 _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1073 {
1074 constexpr size_t _RN = _Np + _Mp;
1075 using _Rp = _BitMask<_RN, _Sanitized>;
1076 if constexpr (_Rp::_S_array_size == 1)
1077 {
1078 _Rp __r{{_M_bits[0]}};
1079 __r._M_bits[0] <<= _Mp;
1080 __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1081 return __r;
1082 }
1083 else
1084 __assert_unreachable<_Rp>();
1085 }
1086
1087 // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1088 // significant bits. If the operation implicitly produces a sanitized bitmask,
1089 // the result type will have _Sanitized set.
1090 template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1091 constexpr auto
1092 _M_extract() const noexcept
1093 {
1094 static_assert(_Np > _DropLsb);
1095 static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1096 "not implemented for bitmasks larger than one ullong");
1097 if constexpr (_NewSize == 1)
1098 // must sanitize because the return _Tp is bool
1099 return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1100 else
1101 return _BitMask<_NewSize,
1102 ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1103 && _NewSize + _DropLsb <= _Np)
1104 || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1105 && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1106 >> _DropLsb);
1107 }
1108
1109 // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1110 constexpr bool
1111 all() const noexcept
1112 {
1113 if constexpr (_Np == 1)
1114 return _M_bits[0];
1115 else if constexpr (!_Sanitized)
1116 return _M_sanitized().all();
1117 else
1118 {
1119 constexpr _Tp __allbits = ~_Tp();
1120 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1121 if (_M_bits[__i] != __allbits)
1122 return false;
1123 return _M_bits[_S_array_size - 1] == _S_bitmask;
1124 }
1125 }
1126
1127 // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1128 // false.
1129 constexpr bool
1130 any() const noexcept
1131 {
1132 if constexpr (_Np == 1)
1133 return _M_bits[0];
1134 else if constexpr (!_Sanitized)
1135 return _M_sanitized().any();
1136 else
1137 {
1138 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1139 if (_M_bits[__i] != 0)
1140 return true;
1141 return _M_bits[_S_array_size - 1] != 0;
1142 }
1143 }
1144
1145 // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1146 constexpr bool
1147 none() const noexcept
1148 {
1149 if constexpr (_Np == 1)
1150 return !_M_bits[0];
1151 else if constexpr (!_Sanitized)
1152 return _M_sanitized().none();
1153 else
1154 {
1155 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1156 if (_M_bits[__i] != 0)
1157 return false;
1158 return _M_bits[_S_array_size - 1] == 0;
1159 }
1160 }
1161
1162 // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1163 // false.
1164 constexpr int
1165 count() const noexcept
1166 {
1167 if constexpr (_Np == 1)
1168 return _M_bits[0];
1169 else if constexpr (!_Sanitized)
1170 return _M_sanitized().none();
1171 else
1172 {
1173 int __result = __builtin_popcountll(_M_bits[0]);
1174 for (int __i = 1; __i < _S_array_size; ++__i)
1175 __result += __builtin_popcountll(_M_bits[__i]);
1176 return __result;
1177 }
1178 }
1179
1180 // Returns the bit at offset __i as bool.
1181 constexpr bool
1182 operator[](size_t __i) const noexcept
1183 {
1184 if constexpr (_Np == 1)
1185 return _M_bits[0];
1186 else if constexpr (_S_array_size == 1)
1187 return (_M_bits[0] >> __i) & 1;
1188 else
1189 {
1190 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1191 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1192 return (_M_bits[__j] >> __shift) & 1;
1193 }
1194 }
1195
1196 template <size_t __i>
1197 constexpr bool
1198 operator[](_SizeConstant<__i>) const noexcept
1199 {
1200 static_assert(__i < _Np);
1201 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1202 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1203 return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1204 }
1205
1206 // Set the bit at offset __i to __x.
1207 constexpr void
1208 set(size_t __i, bool __x) noexcept
1209 {
1210 if constexpr (_Np == 1)
1211 _M_bits[0] = __x;
1212 else if constexpr (_S_array_size == 1)
1213 {
1214 _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1215 _M_bits[0] |= _Tp(_Tp(__x) << __i);
1216 }
1217 else
1218 {
1219 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1220 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1221 _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1222 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1223 }
1224 }
1225
1226 template <size_t __i>
1227 constexpr void
1228 set(_SizeConstant<__i>, bool __x) noexcept
1229 {
1230 static_assert(__i < _Np);
1231 if constexpr (_Np == 1)
1232 _M_bits[0] = __x;
1233 else
1234 {
1235 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1236 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1237 constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1238 _M_bits[__j] &= __mask;
1239 _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1240 }
1241 }
1242
1243 // Inverts all bits. Sanitized input leads to sanitized output.
1244 constexpr _BitMask
1245 operator~() const noexcept
1246 {
1247 if constexpr (_Np == 1)
1248 return !_M_bits[0];
1249 else
1250 {
1251 _BitMask __result{};
1252 for (int __i = 0; __i < _S_array_size - 1; ++__i)
1253 __result._M_bits[__i] = ~_M_bits[__i];
1254 if constexpr (_Sanitized)
1255 __result._M_bits[_S_array_size - 1]
1256 = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1257 else
1258 __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1259 return __result;
1260 }
1261 }
1262
1263 constexpr _BitMask&
1264 operator^=(const _BitMask& __b) & noexcept
1265 {
1266 __execute_n_times<_S_array_size>(
1267 [&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
1268 return *this;
1269 }
1270
1271 constexpr _BitMask&
1272 operator|=(const _BitMask& __b) & noexcept
1273 {
1274 __execute_n_times<_S_array_size>(
1275 [&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; });
1276 return *this;
1277 }
1278
1279 constexpr _BitMask&
1280 operator&=(const _BitMask& __b) & noexcept
1281 {
1282 __execute_n_times<_S_array_size>(
1283 [&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
1284 return *this;
1285 }
1286
1287 friend constexpr _BitMask
1288 operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1289 {
1290 _BitMask __r = __a;
1291 __r ^= __b;
1292 return __r;
1293 }
1294
1295 friend constexpr _BitMask
1296 operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1297 {
1298 _BitMask __r = __a;
1299 __r |= __b;
1300 return __r;
1301 }
1302
1303 friend constexpr _BitMask
1304 operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1305 {
1306 _BitMask __r = __a;
1307 __r &= __b;
1308 return __r;
1309 }
1310
1311 _GLIBCXX_SIMD_INTRINSIC
1312 constexpr bool
1313 _M_is_constprop() const
1314 {
1315 if constexpr (_S_array_size == 0)
1316 return __builtin_constant_p(_M_bits[0]);
1317 else
1318 {
1319 for (int __i = 0; __i < _S_array_size; ++__i)
1320 if (!__builtin_constant_p(_M_bits[__i]))
1321 return false;
1322 return true;
1323 }
1324 }
1325 };
1326
1327// }}}
1328
1329// vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1330// __min_vector_size {{{
1331template <typename _Tp = void>
1332 static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1333
1334#if _GLIBCXX_SIMD_HAVE_NEON
1335template <>
1336 inline constexpr int __min_vector_size<void> = 8;
1337#else
1338template <>
1339 inline constexpr int __min_vector_size<void> = 16;
1340#endif
1341
1342// }}}
1343// __vector_type {{{
1344template <typename _Tp, size_t _Np, typename = void>
1345 struct __vector_type_n {};
1346
1347// substition failure for 0-element case
1348template <typename _Tp>
1349 struct __vector_type_n<_Tp, 0, void> {};
1350
1351// special case 1-element to be _Tp itself
1352template <typename _Tp>
1353 struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1354 { using type = _Tp; };
1355
1356// else, use GNU-style builtin vector types
1357template <typename _Tp, size_t _Np>
1358 struct __vector_type_n<_Tp, _Np,
1359 enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1360 {
1361 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1362
1363 static constexpr size_t _S_Bytes =
1364#ifdef __i386__
1365 // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1366 // those objects are passed via MMX registers and nothing ever calls EMMS.
1367 _S_Np2 == 8 ? 16 :
1368#endif
1369 _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1370 : _S_Np2;
1371
1372 using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1373 };
1374
1375template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1376 struct __vector_type;
1377
1378template <typename _Tp, size_t _Bytes>
1379 struct __vector_type<_Tp, _Bytes, 0>
1380 : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1381
1382template <typename _Tp, size_t _Size>
1383 using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1384
1385template <typename _Tp>
1386 using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1387template <typename _Tp>
1388 using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1389template <typename _Tp>
1390 using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1391template <typename _Tp>
1392 using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1393template <typename _Tp>
1394 using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1395template <typename _Tp>
1396 using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1397
1398// }}}
1399// __is_vector_type {{{
1400template <typename _Tp, typename = void_t<>>
1401 struct __is_vector_type : false_type {};
1402
1403template <typename _Tp>
1404 struct __is_vector_type<
1405 _Tp, void_t<typename __vector_type<
1406 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1407 : is_same<_Tp, typename __vector_type<
1408 remove_reference_t<decltype(declval<_Tp>()[0])>,
1409 sizeof(_Tp)>::type> {};
1410
1411template <typename _Tp>
1412 inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1413
1414// }}}
1415// __is_intrinsic_type {{{
1416#if _GLIBCXX_SIMD_HAVE_SSE_ABI
1417template <typename _Tp>
1418 using __is_intrinsic_type = __is_vector_type<_Tp>;
1419#else // not SSE (x86)
1420template <typename _Tp, typename = void_t<>>
1421 struct __is_intrinsic_type : false_type {};
1422
1423template <typename _Tp>
1424 struct __is_intrinsic_type<
1425 _Tp, void_t<typename __intrinsic_type<
1426 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1427 : is_same<_Tp, typename __intrinsic_type<
1428 remove_reference_t<decltype(declval<_Tp>()[0])>,
1429 sizeof(_Tp)>::type> {};
1430#endif
1431
1432template <typename _Tp>
1433 inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1434
1435// }}}
1436// _VectorTraits{{{
1437template <typename _Tp, typename = void_t<>>
1438 struct _VectorTraitsImpl;
1439
1440template <typename _Tp>
1441 struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1442 || __is_intrinsic_type_v<_Tp>>>
1443 {
1444 using type = _Tp;
1445 using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1446 static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1447 using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1448 template <typename _Up, int _W = _S_full_size>
1449 static constexpr bool _S_is
1450 = is_same_v<value_type, _Up> && _W == _S_full_size;
1451 };
1452
1453template <typename _Tp, size_t _Np>
1454 struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1455 void_t<__vector_type_t<_Tp, _Np>>>
1456 {
1457 using type = __vector_type_t<_Tp, _Np>;
1458 using value_type = _Tp;
1459 static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1460 using _Wrapper = _SimdWrapper<_Tp, _Np>;
1461 static constexpr bool _S_is_partial = (_Np == _S_full_size);
1462 static constexpr int _S_partial_width = _Np;
1463 template <typename _Up, int _W = _S_full_size>
1464 static constexpr bool _S_is
1465 = is_same_v<value_type, _Up>&& _W == _S_full_size;
1466 };
1467
1468template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1469 using _VectorTraits = _VectorTraitsImpl<_Tp>;
1470
1471// }}}
1472// __as_vector{{{
1473template <typename _V>
1474 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1475 __as_vector(_V __x)
1476 {
1477 if constexpr (__is_vector_type_v<_V>)
1478 return __x;
1479 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1480 return __data(__x)._M_data;
1481 else if constexpr (__is_vectorizable_v<_V>)
1482 return __vector_type_t<_V, 2>{__x};
1483 else
1484 return __x._M_data;
1485 }
1486
1487// }}}
1488// __as_wrapper{{{
1489template <size_t _Np = 0, typename _V>
1490 _GLIBCXX_SIMD_INTRINSIC constexpr auto
1491 __as_wrapper(_V __x)
1492 {
1493 if constexpr (__is_vector_type_v<_V>)
1494 return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1495 (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1496 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1497 {
1498 static_assert(_V::size() == _Np);
1499 return __data(__x);
1500 }
1501 else
1502 {
1503 static_assert(_V::_S_size == _Np);
1504 return __x;
1505 }
1506 }
1507
1508// }}}
1509// __intrin_bitcast{{{
1510template <typename _To, typename _From>
1511 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1512 __intrin_bitcast(_From __v)
1513 {
1514 static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1515 && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1516 if constexpr (sizeof(_To) == sizeof(_From))
1517 return reinterpret_cast<_To>(__v);
1518 else if constexpr (sizeof(_From) > sizeof(_To))
1519 if constexpr (sizeof(_To) >= 16)
1520 return reinterpret_cast<const __may_alias<_To>&>(__v);
1521 else
1522 {
1523 _To __r;
1524 __builtin_memcpy(&__r, &__v, sizeof(_To));
1525 return __r;
1526 }
1527#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1528 else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1529 return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1530 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1531 else if constexpr (__have_avx512f && sizeof(_From) == 16
1532 && sizeof(_To) == 64)
1533 return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1534 reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1535 else if constexpr (__have_avx512f && sizeof(_From) == 32
1536 && sizeof(_To) == 64)
1537 return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1538 reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1539#endif // _GLIBCXX_SIMD_X86INTRIN
1540 else if constexpr (sizeof(__v) <= 8)
1541 return reinterpret_cast<_To>(
1542 __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1543 reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1544 else
1545 {
1546 static_assert(sizeof(_To) > sizeof(_From));
1547 _To __r = {};
1548 __builtin_memcpy(&__r, &__v, sizeof(_From));
1549 return __r;
1550 }
1551 }
1552
1553// }}}
1554// __vector_bitcast{{{
1555template <typename _To, size_t _NN = 0, typename _From,
1556 typename _FromVT = _VectorTraits<_From>,
1557 size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1558 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1559 __vector_bitcast(_From __x)
1560 {
1561 using _R = __vector_type_t<_To, _Np>;
1562 return __intrin_bitcast<_R>(__x);
1563 }
1564
1565template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1566 size_t _Np
1567 = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1568 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1569 __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1570 {
1571 static_assert(_Np > 1);
1572 return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1573 }
1574
1575// }}}
1576// __convert_x86 declarations {{{
1577#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1578template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1579 _To __convert_x86(_Tp);
1580
1581template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1582 _To __convert_x86(_Tp, _Tp);
1583
1584template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1585 _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1586
1587template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1588 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1589
1590template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1591 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1592 _Tp, _Tp, _Tp, _Tp);
1593#endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1594
1595//}}}
1596// __bit_cast {{{
1597template <typename _To, typename _From>
1598 _GLIBCXX_SIMD_INTRINSIC constexpr _To
1599 __bit_cast(const _From __x)
1600 {
1601 // TODO: implement with / replace by __builtin_bit_cast ASAP
1602 static_assert(sizeof(_To) == sizeof(_From));
1603 constexpr bool __to_is_vectorizable
1604 = is_arithmetic_v<_To> || is_enum_v<_To>;
1605 constexpr bool __from_is_vectorizable
1606 = is_arithmetic_v<_From> || is_enum_v<_From>;
1607 if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1608 return reinterpret_cast<_To>(__x);
1609 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1610 {
1611 using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1612 return reinterpret_cast<_To>(_FV{__x});
1613 }
1614 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1615 {
1616 using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1617 using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1618 return reinterpret_cast<_TV>(_FV{__x})[0];
1619 }
1620 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1621 {
1622 using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1623 return reinterpret_cast<_TV>(__x)[0];
1624 }
1625 else
1626 {
1627 _To __r;
1628 __builtin_memcpy(reinterpret_cast<char*>(&__r),
1629 reinterpret_cast<const char*>(&__x), sizeof(_To));
1630 return __r;
1631 }
1632 }
1633
1634// }}}
1635// __to_intrin {{{
1636template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1637 typename _R
1638 = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1639 _GLIBCXX_SIMD_INTRINSIC constexpr _R
1640 __to_intrin(_Tp __x)
1641 {
1642 static_assert(sizeof(__x) <= sizeof(_R),
1643 "__to_intrin may never drop values off the end");
1644 if constexpr (sizeof(__x) == sizeof(_R))
1645 return reinterpret_cast<_R>(__as_vector(__x));
1646 else
1647 {
1648 using _Up = __int_for_sizeof_t<_Tp>;
1649 return reinterpret_cast<_R>(
1650 __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1651 }
1652 }
1653
1654// }}}
1655// __make_vector{{{
1656template <typename _Tp, typename... _Args>
1657 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1658 __make_vector(const _Args&... __args)
1659 {
1660 return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...};
1661 }
1662
1663// }}}
1664// __vector_broadcast{{{
1665template <size_t _Np, typename _Tp>
1666 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1667 __vector_broadcast(_Tp __x)
1668 {
1669 return __call_with_n_evaluations<_Np>(
1670 [](auto... __xx) { return __vector_type_t<_Tp, _Np>{__xx...}; },
1671 [&__x](int) { return __x; });
1672 }
1673
1674// }}}
1675// __generate_vector{{{
1676 template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1677 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1678 __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1679 {
1680 return __vector_type_t<_Tp, _Np>{
1681 static_cast<_Tp>(__gen(_SizeConstant<_I>()))...};
1682 }
1683
1684template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1685 _GLIBCXX_SIMD_INTRINSIC constexpr _V
1686 __generate_vector(_Gp&& __gen)
1687 {
1688 if constexpr (__is_vector_type_v<_V>)
1689 return __generate_vector_impl<typename _VVT::value_type,
1690 _VVT::_S_full_size>(
1691 static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1692 else
1693 return __generate_vector_impl<typename _VVT::value_type,
1694 _VVT::_S_partial_width>(
1695 static_cast<_Gp&&>(__gen),
1696 make_index_sequence<_VVT::_S_partial_width>());
1697 }
1698
1699template <typename _Tp, size_t _Np, typename _Gp>
1700 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1701 __generate_vector(_Gp&& __gen)
1702 {
1703 return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1704 make_index_sequence<_Np>());
1705 }
1706
1707// }}}
1708// __xor{{{
1709template <typename _TW>
1710 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1711 __xor(_TW __a, _TW __b) noexcept
1712 {
1713 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1714 {
1715 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1716 _VectorTraitsImpl<_TW>>::value_type;
1717 if constexpr (is_floating_point_v<_Tp>)
1718 {
1719 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1720 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1721 ^ __vector_bitcast<_Ip>(__b));
1722 }
1723 else if constexpr (__is_vector_type_v<_TW>)
1724 return __a ^ __b;
1725 else
1726 return __a._M_data ^ __b._M_data;
1727 }
1728 else
1729 return __a ^ __b;
1730 }
1731
1732// }}}
1733// __or{{{
1734template <typename _TW>
1735 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1736 __or(_TW __a, _TW __b) noexcept
1737 {
1738 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1739 {
1740 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1741 _VectorTraitsImpl<_TW>>::value_type;
1742 if constexpr (is_floating_point_v<_Tp>)
1743 {
1744 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1745 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1746 | __vector_bitcast<_Ip>(__b));
1747 }
1748 else if constexpr (__is_vector_type_v<_TW>)
1749 return __a | __b;
1750 else
1751 return __a._M_data | __b._M_data;
1752 }
1753 else
1754 return __a | __b;
1755 }
1756
1757// }}}
1758// __and{{{
1759template <typename _TW>
1760 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1761 __and(_TW __a, _TW __b) noexcept
1762 {
1763 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1764 {
1765 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1766 _VectorTraitsImpl<_TW>>::value_type;
1767 if constexpr (is_floating_point_v<_Tp>)
1768 {
1769 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1770 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1771 & __vector_bitcast<_Ip>(__b));
1772 }
1773 else if constexpr (__is_vector_type_v<_TW>)
1774 return __a & __b;
1775 else
1776 return __a._M_data & __b._M_data;
1777 }
1778 else
1779 return __a & __b;
1780 }
1781
1782// }}}
1783// __andnot{{{
1784#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1785static constexpr struct
1786{
1787 _GLIBCXX_SIMD_INTRINSIC __v4sf
1788 operator()(__v4sf __a, __v4sf __b) const noexcept
1789 { return __builtin_ia32_andnps(__a, __b); }
1790
1791 _GLIBCXX_SIMD_INTRINSIC __v2df
1792 operator()(__v2df __a, __v2df __b) const noexcept
1793 { return __builtin_ia32_andnpd(__a, __b); }
1794
1795 _GLIBCXX_SIMD_INTRINSIC __v2di
1796 operator()(__v2di __a, __v2di __b) const noexcept
1797 { return __builtin_ia32_pandn128(__a, __b); }
1798
1799 _GLIBCXX_SIMD_INTRINSIC __v8sf
1800 operator()(__v8sf __a, __v8sf __b) const noexcept
1801 { return __builtin_ia32_andnps256(__a, __b); }
1802
1803 _GLIBCXX_SIMD_INTRINSIC __v4df
1804 operator()(__v4df __a, __v4df __b) const noexcept
1805 { return __builtin_ia32_andnpd256(__a, __b); }
1806
1807 _GLIBCXX_SIMD_INTRINSIC __v4di
1808 operator()(__v4di __a, __v4di __b) const noexcept
1809 {
1810 if constexpr (__have_avx2)
1811 return __builtin_ia32_andnotsi256(__a, __b);
1812 else
1813 return reinterpret_cast<__v4di>(
1814 __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1815 reinterpret_cast<__v4df>(__b)));
1816 }
1817
1818 _GLIBCXX_SIMD_INTRINSIC __v16sf
1819 operator()(__v16sf __a, __v16sf __b) const noexcept
1820 {
1821 if constexpr (__have_avx512dq)
1822 return _mm512_andnot_ps(__a, __b);
1823 else
1824 return reinterpret_cast<__v16sf>(
1825 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1826 reinterpret_cast<__v8di>(__b)));
1827 }
1828
1829 _GLIBCXX_SIMD_INTRINSIC __v8df
1830 operator()(__v8df __a, __v8df __b) const noexcept
1831 {
1832 if constexpr (__have_avx512dq)
1833 return _mm512_andnot_pd(__a, __b);
1834 else
1835 return reinterpret_cast<__v8df>(
1836 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1837 reinterpret_cast<__v8di>(__b)));
1838 }
1839
1840 _GLIBCXX_SIMD_INTRINSIC __v8di
1841 operator()(__v8di __a, __v8di __b) const noexcept
1842 { return _mm512_andnot_si512(__a, __b); }
1843} _S_x86_andnot;
1844#endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1845
1846template <typename _TW>
1847 _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1848 __andnot(_TW __a, _TW __b) noexcept
1849 {
1850 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1851 {
1852 using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1853 _VectorTraitsImpl<_TW>>;
1854 using _Tp = typename _TVT::value_type;
1855#if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1856 if constexpr (sizeof(_TW) >= 16)
1857 {
1858 const auto __ai = __to_intrin(__a);
1859 const auto __bi = __to_intrin(__b);
1860 if (!__builtin_is_constant_evaluated()
1861 && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1862 {
1863 const auto __r = _S_x86_andnot(__ai, __bi);
1864 if constexpr (is_convertible_v<decltype(__r), _TW>)
1865 return __r;
1866 else
1867 return reinterpret_cast<typename _TVT::type>(__r);
1868 }
1869 }
1870#endif // _GLIBCXX_SIMD_X86INTRIN
1871 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1872 return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
1873 & __vector_bitcast<_Ip>(__b));
1874 }
1875 else
1876 return ~__a & __b;
1877 }
1878
1879// }}}
1880// __not{{{
1881template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1882 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
1883 __not(_Tp __a) noexcept
1884 {
1885 if constexpr (is_floating_point_v<typename _TVT::value_type>)
1886 return reinterpret_cast<typename _TVT::type>(
1887 ~__vector_bitcast<unsigned>(__a));
1888 else
1889 return ~__a;
1890 }
1891
1892// }}}
1893// __concat{{{
1894template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1895 typename _R = __vector_type_t<typename _TVT::value_type,
1896 _TVT::_S_full_size * 2>>
1897 constexpr _R
1898 __concat(_Tp a_, _Tp b_)
1899 {
1900#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
1901 using _W
1902 = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
1903 conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
1904 long long, typename _TVT::value_type>>;
1905 constexpr int input_width = sizeof(_Tp) / sizeof(_W);
1906 const auto __a = __vector_bitcast<_W>(a_);
1907 const auto __b = __vector_bitcast<_W>(b_);
1908 using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
1909#else
1910 constexpr int input_width = _TVT::_S_full_size;
1911 const _Tp& __a = a_;
1912 const _Tp& __b = b_;
1913 using _Up = _R;
1914#endif
1915 if constexpr (input_width == 2)
1916 return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
1917 else if constexpr (input_width == 4)
1918 return reinterpret_cast<_R>(
1919 _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
1920 else if constexpr (input_width == 8)
1921 return reinterpret_cast<_R>(
1922 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
1923 __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
1924 else if constexpr (input_width == 16)
1925 return reinterpret_cast<_R>(
1926 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1927 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1928 __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
1929 __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
1930 __b[12], __b[13], __b[14], __b[15]});
1931 else if constexpr (input_width == 32)
1932 return reinterpret_cast<_R>(
1933 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
1934 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
1935 __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
1936 __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
1937 __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
1938 __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
1939 __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
1940 __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
1941 __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
1942 __b[31]});
1943 }
1944
1945// }}}
1946// __zero_extend {{{
1947template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1948 struct _ZeroExtendProxy
1949 {
1950 using value_type = typename _TVT::value_type;
1951 static constexpr size_t _Np = _TVT::_S_full_size;
1952 const _Tp __x;
1953
1954 template <typename _To, typename _ToVT = _VectorTraits<_To>,
1955 typename
1956 = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
1957 _GLIBCXX_SIMD_INTRINSIC operator _To() const
1958 {
1959 constexpr size_t _ToN = _ToVT::_S_full_size;
1960 if constexpr (_ToN == _Np)
1961 return __x;
1962 else if constexpr (_ToN == 2 * _Np)
1963 {
1964#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1965 if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
1966 return __vector_bitcast<value_type>(
1967 _mm256_insertf128_ps(__m256(), __x, 0));
1968 else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
1969 return __vector_bitcast<value_type>(
1970 _mm256_insertf128_pd(__m256d(), __x, 0));
1971 else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
1972 return __vector_bitcast<value_type>(
1973 _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
1974 else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
1975 {
1976 if constexpr (__have_avx512dq)
1977 return __vector_bitcast<value_type>(
1978 _mm512_insertf32x8(__m512(), __x, 0));
1979 else
1980 return reinterpret_cast<__m512>(
1981 _mm512_insertf64x4(__m512d(),
1982 reinterpret_cast<__m256d>(__x), 0));
1983 }
1984 else if constexpr (__have_avx512f
1985 && _TVT::template _S_is<double, 4>)
1986 return __vector_bitcast<value_type>(
1987 _mm512_insertf64x4(__m512d(), __x, 0));
1988 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
1989 return __vector_bitcast<value_type>(
1990 _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
1991#endif
1992 return __concat(__x, _Tp());
1993 }
1994 else if constexpr (_ToN == 4 * _Np)
1995 {
1996#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
1997 if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
1998 {
1999 return __vector_bitcast<value_type>(
2000 _mm512_insertf64x2(__m512d(), __x, 0));
2001 }
2002 else if constexpr (__have_avx512f
2003 && is_floating_point_v<value_type>)
2004 {
2005 return __vector_bitcast<value_type>(
2006 _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2007 0));
2008 }
2009 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2010 {
2011 return __vector_bitcast<value_type>(
2012 _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2013 }
2014#endif
2015 return __concat(__concat(__x, _Tp()),
2016 __vector_type_t<value_type, _Np * 2>());
2017 }
2018 else if constexpr (_ToN == 8 * _Np)
2019 return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2020 __vector_type_t<value_type, _Np * 4>());
2021 else if constexpr (_ToN == 16 * _Np)
2022 return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2023 __vector_type_t<value_type, _Np * 8>());
2024 else
2025 __assert_unreachable<_Tp>();
2026 }
2027 };
2028
2029template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2030 _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2031 __zero_extend(_Tp __x)
2032 { return {__x}; }
2033
2034// }}}
2035// __extract<_Np, By>{{{
2036template <int _Offset,
2037 int _SplitBy,
2038 typename _Tp,
2039 typename _TVT = _VectorTraits<_Tp>,
2040 typename _R = __vector_type_t<typename _TVT::value_type,
2041 _TVT::_S_full_size / _SplitBy>>
2042 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2043 __extract(_Tp __in)
2044 {
2045 using value_type = typename _TVT::value_type;
2046#if _GLIBCXX_SIMD_X86INTRIN // {{{
2047 if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2048 {
2049 if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2050 return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2051 else if constexpr (is_floating_point_v<value_type>)
2052 return __vector_bitcast<value_type>(
2053 _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2054 else
2055 return reinterpret_cast<_R>(
2056 _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2057 _Offset));
2058 }
2059 else
2060#endif // _GLIBCXX_SIMD_X86INTRIN }}}
2061 {
2062#ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2063 using _W = conditional_t<
2064 is_floating_point_v<value_type>, double,
2065 conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2066 static_assert(sizeof(_R) % sizeof(_W) == 0);
2067 constexpr int __return_width = sizeof(_R) / sizeof(_W);
2068 using _Up = __vector_type_t<_W, __return_width>;
2069 const auto __x = __vector_bitcast<_W>(__in);
2070#else
2071 constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2072 using _Up = _R;
2073 const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2074 = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2075#endif
2076 constexpr int _O = _Offset * __return_width;
2077 return __call_with_subscripts<__return_width, _O>(
2078 __x, [](auto... __entries) {
2079 return reinterpret_cast<_R>(_Up{__entries...});
2080 });
2081 }
2082 }
2083
2084// }}}
2085// __lo/__hi64[z]{{{
2086template <typename _Tp,
2087 typename _R
2088 = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2089 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2090 __lo64(_Tp __x)
2091 {
2092 _R __r{};
2093 __builtin_memcpy(&__r, &__x, 8);
2094 return __r;
2095 }
2096
2097template <typename _Tp,
2098 typename _R
2099 = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2100 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2101 __hi64(_Tp __x)
2102 {
2103 static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2104 _R __r{};
2105 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2106 return __r;
2107 }
2108
2109template <typename _Tp,
2110 typename _R
2111 = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2112 _GLIBCXX_SIMD_INTRINSIC constexpr _R
2113 __hi64z([[maybe_unused]] _Tp __x)
2114 {
2115 _R __r{};
2116 if constexpr (sizeof(_Tp) == 16)
2117 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2118 return __r;
2119 }
2120
2121// }}}
2122// __lo/__hi128{{{
2123template <typename _Tp>
2124 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2125 __lo128(_Tp __x)
2126 { return __extract<0, sizeof(_Tp) / 16>(__x); }
2127
2128template <typename _Tp>
2129 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2130 __hi128(_Tp __x)
2131 {
2132 static_assert(sizeof(__x) == 32);
2133 return __extract<1, 2>(__x);
2134 }
2135
2136// }}}
2137// __lo/__hi256{{{
2138template <typename _Tp>
2139 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2140 __lo256(_Tp __x)
2141 {
2142 static_assert(sizeof(__x) == 64);
2143 return __extract<0, 2>(__x);
2144 }
2145
2146template <typename _Tp>
2147 _GLIBCXX_SIMD_INTRINSIC constexpr auto
2148 __hi256(_Tp __x)
2149 {
2150 static_assert(sizeof(__x) == 64);
2151 return __extract<1, 2>(__x);
2152 }
2153
2154// }}}
2155// __auto_bitcast{{{
2156template <typename _Tp>
2157 struct _AutoCast
2158 {
2159 static_assert(__is_vector_type_v<_Tp>);
2160
2161 const _Tp __x;
2162
2163 template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2164 _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2165 { return __intrin_bitcast<typename _UVT::type>(__x); }
2166 };
2167
2168template <typename _Tp>
2169 _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2170 __auto_bitcast(const _Tp& __x)
2171 { return {__x}; }
2172
2173template <typename _Tp, size_t _Np>
2174 _GLIBCXX_SIMD_INTRINSIC constexpr
2175 _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2176 __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2177 { return {__x._M_data}; }
2178
2179// }}}
2180// ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2181
2182#if _GLIBCXX_SIMD_HAVE_SSE_ABI
2183// __bool_storage_member_type{{{
2184#if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2185template <size_t _Size>
2186 struct __bool_storage_member_type
2187 {
2188 static_assert((_Size & (_Size - 1)) != 0,
2189 "This trait may only be used for non-power-of-2 sizes. "
2190 "Power-of-2 sizes must be specialized.");
2191 using type =
2192 typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2193 };
2194
2195template <>
2196 struct __bool_storage_member_type<1> { using type = bool; };
2197
2198template <>
2199 struct __bool_storage_member_type<2> { using type = __mmask8; };
2200
2201template <>
2202 struct __bool_storage_member_type<4> { using type = __mmask8; };
2203
2204template <>
2205 struct __bool_storage_member_type<8> { using type = __mmask8; };
2206
2207template <>
2208 struct __bool_storage_member_type<16> { using type = __mmask16; };
2209
2210template <>
2211 struct __bool_storage_member_type<32> { using type = __mmask32; };
2212
2213template <>
2214 struct __bool_storage_member_type<64> { using type = __mmask64; };
2215#endif // _GLIBCXX_SIMD_HAVE_AVX512F
2216
2217// }}}
2218// __intrinsic_type (x86){{{
2219// the following excludes bool via __is_vectorizable
2220#if _GLIBCXX_SIMD_HAVE_SSE
2221template <typename _Tp, size_t _Bytes>
2222 struct __intrinsic_type<_Tp, _Bytes,
2223 enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2224 {
2225 static_assert(!is_same_v<_Tp, long double>,
2226 "no __intrinsic_type support for long double on x86");
2227
2228 static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16
2229 : _Bytes <= 32 ? 32
2230 : 64;
2231
2232 using type [[__gnu__::__vector_size__(_S_VBytes)]]
2233 = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2234 };
2235#endif // _GLIBCXX_SIMD_HAVE_SSE
2236
2237// }}}
2238#endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2239// __intrinsic_type (ARM){{{
2240#if _GLIBCXX_SIMD_HAVE_NEON
2241template <>
2242 struct __intrinsic_type<float, 8, void>
2243 { using type = float32x2_t; };
2244
2245template <>
2246 struct __intrinsic_type<float, 16, void>
2247 { using type = float32x4_t; };
2248
2249#if _GLIBCXX_SIMD_HAVE_NEON_A64
2250template <>
2251 struct __intrinsic_type<double, 8, void>
2252 { using type = float64x1_t; };
2253
2254template <>
2255 struct __intrinsic_type<double, 16, void>
2256 { using type = float64x2_t; };
2257#endif
2258
2259#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2260template <> \
2261 struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2262 _Np * _Bits / 8, void> \
2263 { using type = int##_Bits##x##_Np##_t; }; \
2264template <> \
2265 struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2266 _Np * _Bits / 8, void> \
2267 { using type = uint##_Bits##x##_Np##_t; }
2268_GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2269_GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2270_GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2271_GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2272_GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2273_GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2274_GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2275_GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2276#undef _GLIBCXX_SIMD_ARM_INTRIN
2277
2278template <typename _Tp, size_t _Bytes>
2279 struct __intrinsic_type<_Tp, _Bytes,
2280 enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2281 {
2282 static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2283 using _Ip = __int_for_sizeof_t<_Tp>;
2284 using _Up = conditional_t<
2285 is_floating_point_v<_Tp>, _Tp,
2286 conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2287 static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2288 "should use explicit specialization above");
2289 using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2290 };
2291#endif // _GLIBCXX_SIMD_HAVE_NEON
2292
2293// }}}
2294// __intrinsic_type (PPC){{{
2295#ifdef __ALTIVEC__
2296template <typename _Tp>
2297 struct __intrinsic_type_impl;
2298
2299#define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2300 template <> \
2301 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2302_GLIBCXX_SIMD_PPC_INTRIN(float);
2303_GLIBCXX_SIMD_PPC_INTRIN(double);
2304_GLIBCXX_SIMD_PPC_INTRIN(signed char);
2305_GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2306_GLIBCXX_SIMD_PPC_INTRIN(signed short);
2307_GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2308_GLIBCXX_SIMD_PPC_INTRIN(signed int);
2309_GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2310_GLIBCXX_SIMD_PPC_INTRIN(signed long);
2311_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2312_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2313_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2314#undef _GLIBCXX_SIMD_PPC_INTRIN
2315
2316template <typename _Tp, size_t _Bytes>
2317 struct __intrinsic_type<_Tp, _Bytes,
2318 enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2319 {
2320 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2321 // allow _Tp == long double with -mlong-double-64
2322 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2323 "no __intrinsic_type support for long double on PPC");
2324#ifndef __VSX__
2325 static_assert(!is_same_v<_Tp, double>,
2326 "no __intrinsic_type support for double on PPC w/o VSX");
2327#endif
2328 using type =
2329 typename __intrinsic_type_impl<
2330 conditional_t<is_floating_point_v<_Tp>,
2331 conditional_t<_S_is_ldouble, double, _Tp>,
2332 __int_for_sizeof_t<_Tp>>>::type;
2333 };
2334#endif // __ALTIVEC__
2335
2336// }}}
2337// _SimdWrapper<bool>{{{1
2338template <size_t _Width>
2339 struct _SimdWrapper<bool, _Width,
2340 void_t<typename __bool_storage_member_type<_Width>::type>>
2341 {
2342 using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2343 using value_type = bool;
2344
2345 static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2346
2347 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2348 __as_full_vector() const { return _M_data; }
2349
2350 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2351 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_BuiltinType __k)
2352 : _M_data(__k) {};
2353
2354 _GLIBCXX_SIMD_INTRINSIC operator const _BuiltinType&() const
2355 { return _M_data; }
2356
2357 _GLIBCXX_SIMD_INTRINSIC operator _BuiltinType&()
2358 { return _M_data; }
2359
2360 _GLIBCXX_SIMD_INTRINSIC _BuiltinType __intrin() const
2361 { return _M_data; }
2362
2363 _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](size_t __i) const
2364 { return _M_data & (_BuiltinType(1) << __i); }
2365
2366 template <size_t __i>
2367 _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2368 operator[](_SizeConstant<__i>) const
2369 { return _M_data & (_BuiltinType(1) << __i); }
2370
2371 _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, value_type __x)
2372 {
2373 if (__x)
2374 _M_data |= (_BuiltinType(1) << __i);
2375 else
2376 _M_data &= ~(_BuiltinType(1) << __i);
2377 }
2378
2379 _GLIBCXX_SIMD_INTRINSIC
2380 constexpr bool _M_is_constprop() const
2381 { return __builtin_constant_p(_M_data); }
2382
2383 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2384 {
2385 if (__builtin_constant_p(_M_data))
2386 {
2387 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2388 constexpr _BuiltinType __active_mask
2389 = ~_BuiltinType() >> (__nbits - _Width);
2390 return (_M_data & __active_mask) == 0;
2391 }
2392 return false;
2393 }
2394
2395 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2396 {
2397 if (__builtin_constant_p(_M_data))
2398 {
2399 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2400 constexpr _BuiltinType __active_mask
2401 = ~_BuiltinType() >> (__nbits - _Width);
2402 return (_M_data & __active_mask) == __active_mask;
2403 }
2404 return false;
2405 }
2406
2407 _BuiltinType _M_data;
2408 };
2409
2410// _SimdWrapperBase{{{1
2411template <bool _MustZeroInitPadding, typename _BuiltinType>
2412 struct _SimdWrapperBase;
2413
2414template <typename _BuiltinType>
2415 struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2416 {
2417 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() = default;
2418 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2419 : _M_data(__init)
2420 {}
2421
2422 _BuiltinType _M_data;
2423 };
2424
2425template <typename _BuiltinType>
2426 struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2427 // never become SNaN
2428 {
2429 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() : _M_data() {}
2430 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2431 : _M_data(__init)
2432 {}
2433
2434 _BuiltinType _M_data;
2435 };
2436
2437// }}}
2438// _SimdWrapper{{{
2439template <typename _Tp, size_t _Width>
2440 struct _SimdWrapper<
2441 _Tp, _Width,
2442 void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2443 : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2444 && sizeof(_Tp) * _Width
2445 == sizeof(__vector_type_t<_Tp, _Width>),
2446 __vector_type_t<_Tp, _Width>>
2447 {
2448 using _Base
2449 = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2450 && sizeof(_Tp) * _Width
2451 == sizeof(__vector_type_t<_Tp, _Width>),
2452 __vector_type_t<_Tp, _Width>>;
2453
2454 static_assert(__is_vectorizable_v<_Tp>);
2455 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2456
2457 using _BuiltinType = __vector_type_t<_Tp, _Width>;
2458 using value_type = _Tp;
2459
2460 static inline constexpr size_t _S_full_size
2461 = sizeof(_BuiltinType) / sizeof(value_type);
2462 static inline constexpr int _S_size = _Width;
2463 static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2464
2465 using _Base::_M_data;
2466
2467 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2468 __as_full_vector() const
2469 { return _M_data; }
2470
2471 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
2472 : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2473 [&](auto __i) { return __init.begin()[__i.value]; })) {}
2474
2475 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2476 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&)
2477 = default;
2478 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_SimdWrapper&&) = default;
2479
2480 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2481 operator=(const _SimdWrapper&) = default;
2482 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2483 operator=(_SimdWrapper&&) = default;
2484
2485 template <typename _V, typename = enable_if_t<disjunction_v<
2486 is_same<_V, __vector_type_t<_Tp, _Width>>,
2487 is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2488 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_V __x)
2489 // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2490 : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2491
2492 template <typename... _As,
2493 typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2494 && sizeof...(_As) <= _Width)>>
2495 _GLIBCXX_SIMD_INTRINSIC constexpr
2496 operator _SimdTuple<_Tp, _As...>() const
2497 {
2498 const auto& dd = _M_data; // workaround for GCC7 ICE
2499 return __generate_from_n_evaluations<sizeof...(_As),
2500 _SimdTuple<_Tp, _As...>>([&](
2501 auto __i) constexpr { return dd[int(__i)]; });
2502 }
2503
2504 _GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const
2505 { return _M_data; }
2506
2507 _GLIBCXX_SIMD_INTRINSIC constexpr operator _BuiltinType&()
2508 { return _M_data; }
2509
2510 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](size_t __i) const
2511 { return _M_data[__i]; }
2512
2513 template <size_t __i>
2514 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](_SizeConstant<__i>) const
2515 { return _M_data[__i]; }
2516
2517 _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, _Tp __x)
2518 { _M_data[__i] = __x; }
2519
2520 _GLIBCXX_SIMD_INTRINSIC
2521 constexpr bool _M_is_constprop() const
2522 { return __builtin_constant_p(_M_data); }
2523
2524 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2525 {
2526 if (__builtin_constant_p(_M_data))
2527 {
2528 bool __r = true;
2529 if constexpr (is_floating_point_v<_Tp>)
2530 {
2531 using _Ip = __int_for_sizeof_t<_Tp>;
2532 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2533 __execute_n_times<_Width>(
2534 [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2535 }
2536 else
2537 __execute_n_times<_Width>(
2538 [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2539 return __r;
2540 }
2541 return false;
2542 }
2543
2544 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2545 {
2546 if (__builtin_constant_p(_M_data))
2547 {
2548 bool __r = true;
2549 if constexpr (is_floating_point_v<_Tp>)
2550 {
2551 using _Ip = __int_for_sizeof_t<_Tp>;
2552 const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2553 __execute_n_times<_Width>(
2554 [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2555 }
2556 else
2557 __execute_n_times<_Width>(
2558 [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2559 return __r;
2560 }
2561 return false;
2562 }
2563 };
2564
2565// }}}
2566
2567// __vectorized_sizeof {{{
2568template <typename _Tp>
2569 constexpr size_t
2570 __vectorized_sizeof()
2571 {
2572 if constexpr (!__is_vectorizable_v<_Tp>)
2573 return 0;
2574
2575 if constexpr (sizeof(_Tp) <= 8)
2576 {
2577 // X86:
2578 if constexpr (__have_avx512bw)
2579 return 64;
2580 if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2581 return 64;
2582 if constexpr (__have_avx2)
2583 return 32;
2584 if constexpr (__have_avx && is_floating_point_v<_Tp>)
2585 return 32;
2586 if constexpr (__have_sse2)
2587 return 16;
2588 if constexpr (__have_sse && is_same_v<_Tp, float>)
2589 return 16;
2590 /* The following is too much trouble because of mixed MMX and x87 code.
2591 * While nothing here explicitly calls MMX instructions of registers,
2592 * they are still emitted but no EMMS cleanup is done.
2593 if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2594 return 8;
2595 */
2596
2597 // PowerPC:
2598 if constexpr (__have_power8vec
2599 || (__have_power_vmx && (sizeof(_Tp) < 8))
2600 || (__have_power_vsx && is_floating_point_v<_Tp>) )
2601 return 16;
2602
2603 // ARM:
2604 if constexpr (__have_neon_a64
2605 || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2606 return 16;
2607 if constexpr (__have_neon
2608 && sizeof(_Tp) < 8
2609 // Only allow fp if the user allows non-ICE559 fp (e.g.
2610 // via -ffast-math). ARMv7 NEON fp is not conforming to
2611 // IEC559.
2612 && (__support_neon_float || !is_floating_point_v<_Tp>))
2613 return 16;
2614 }
2615
2616 return sizeof(_Tp);
2617 }
2618
2619// }}}
2620namespace simd_abi {
2621// most of simd_abi is defined in simd_detail.h
2622template <typename _Tp>
2623 inline constexpr int max_fixed_size
2624 = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2625
2626// compatible {{{
2627#if defined __x86_64__ || defined __aarch64__
2628template <typename _Tp>
2629 using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2630#elif defined __ARM_NEON
2631// FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2632// ABI?)
2633template <typename _Tp>
2634 using compatible
2635 = conditional_t<(sizeof(_Tp) < 8
2636 && (__support_neon_float || !is_floating_point_v<_Tp>)),
2637 _VecBuiltin<16>, scalar>;
2638#else
2639template <typename>
2640 using compatible = scalar;
2641#endif
2642
2643// }}}
2644// native {{{
2645template <typename _Tp>
2646 constexpr auto
2647 __determine_native_abi()
2648 {
2649 constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2650 if constexpr (__bytes == sizeof(_Tp))
2651 return static_cast<scalar*>(nullptr);
2652 else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2653 return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2654 else
2655 return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2656 }
2657
2658template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2659 using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2660
2661// }}}
2662// __default_abi {{{
2663#if defined _GLIBCXX_SIMD_DEFAULT_ABI
2664template <typename _Tp>
2665 using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2666#else
2667template <typename _Tp>
2668 using __default_abi = compatible<_Tp>;
2669#endif
2670
2671// }}}
2672} // namespace simd_abi
2673
2674// traits {{{1
2675// is_abi_tag {{{2
2676template <typename _Tp, typename = void_t<>>
2677 struct is_abi_tag : false_type {};
2678
2679template <typename _Tp>
2680 struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2681 : public _Tp::_IsValidAbiTag {};
2682
2683template <typename _Tp>
2684 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2685
2686// is_simd(_mask) {{{2
2687template <typename _Tp>
2688 struct is_simd : public false_type {};
2689
2690template <typename _Tp>
2691 inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2692
2693template <typename _Tp>
2694 struct is_simd_mask : public false_type {};
2695
2696template <typename _Tp>
2697inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2698
2699// simd_size {{{2
2700template <typename _Tp, typename _Abi, typename = void>
2701 struct __simd_size_impl {};
2702
2703template <typename _Tp, typename _Abi>
2704 struct __simd_size_impl<
2705 _Tp, _Abi,
2706 enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2707 : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2708
2709template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2710 struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2711
2712template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2713 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2714
2715// simd_abi::deduce {{{2
2716template <typename _Tp, size_t _Np, typename = void>
2717 struct __deduce_impl;
2718
2719namespace simd_abi {
2720/**
2721 * @tparam _Tp The requested `value_type` for the elements.
2722 * @tparam _Np The requested number of elements.
2723 * @tparam _Abis This parameter is ignored, since this implementation cannot
2724 * make any use of it. Either __a good native ABI is matched and used as `type`
2725 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2726 * the best matching native ABIs.
2727 */
2728template <typename _Tp, size_t _Np, typename...>
2729 struct deduce : __deduce_impl<_Tp, _Np> {};
2730
2731template <typename _Tp, size_t _Np, typename... _Abis>
2732 using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2733} // namespace simd_abi
2734
2735// }}}2
2736// rebind_simd {{{2
2737template <typename _Tp, typename _V, typename = void>
2738 struct rebind_simd;
2739
2740template <typename _Tp, typename _Up, typename _Abi>
2741 struct rebind_simd<
2742 _Tp, simd<_Up, _Abi>,
2743 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2744 {
2745 using type
2746 = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2747 };
2748
2749template <typename _Tp, typename _Up, typename _Abi>
2750 struct rebind_simd<
2751 _Tp, simd_mask<_Up, _Abi>,
2752 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2753 {
2754 using type
2755 = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2756 };
2757
2758template <typename _Tp, typename _V>
2759 using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2760
2761// resize_simd {{{2
2762template <int _Np, typename _V, typename = void>
2763 struct resize_simd;
2764
2765template <int _Np, typename _Tp, typename _Abi>
2766 struct resize_simd<_Np, simd<_Tp, _Abi>,
2767 void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2768 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2769
2770template <int _Np, typename _Tp, typename _Abi>
2771 struct resize_simd<_Np, simd_mask<_Tp, _Abi>,
2772 void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2773 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2774
2775template <int _Np, typename _V>
2776 using resize_simd_t = typename resize_simd<_Np, _V>::type;
2777
2778// }}}2
2779// memory_alignment {{{2
2780template <typename _Tp, typename _Up = typename _Tp::value_type>
2781 struct memory_alignment
2782 : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
2783
2784template <typename _Tp, typename _Up = typename _Tp::value_type>
2785 inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
2786
2787// class template simd [simd] {{{1
2788template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2789 class simd;
2790
2791template <typename _Tp, typename _Abi>
2792 struct is_simd<simd<_Tp, _Abi>> : public true_type {};
2793
2794template <typename _Tp>
2795 using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
2796
2797template <typename _Tp, int _Np>
2798 using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
2799
2800template <typename _Tp, size_t _Np>
2801 using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2802
2803// class template simd_mask [simd_mask] {{{1
2804template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2805 class simd_mask;
2806
2807template <typename _Tp, typename _Abi>
2808 struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
2809
2810template <typename _Tp>
2811 using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
2812
2813template <typename _Tp, int _Np>
2814 using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
2815
2816template <typename _Tp, size_t _Np>
2817 using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2818
2819// casts [simd.casts] {{{1
2820// static_simd_cast {{{2
2821template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>,
2822 typename = void>
2823 struct __static_simd_cast_return_type;
2824
2825template <typename _Tp, typename _A0, typename _Up, typename _Ap>
2826 struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false,
2827 void>
2828 : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
2829
2830template <typename _Tp, typename _Up, typename _Ap>
2831 struct __static_simd_cast_return_type<
2832 _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
2833 { using type = _Tp; };
2834
2835template <typename _Tp, typename _Ap>
2836 struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
2837#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2838 enable_if_t<__is_vectorizable_v<_Tp>>
2839#else
2840 void
2841#endif
2842 >
2843 { using type = simd<_Tp, _Ap>; };
2844
2845template <typename _Tp, typename = void>
2846 struct __safe_make_signed { using type = _Tp;};
2847
2848template <typename _Tp>
2849 struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
2850 {
2851 // the extra make_unsigned_t is because of PR85951
2852 using type = make_signed_t<make_unsigned_t<_Tp>>;
2853 };
2854
2855template <typename _Tp>
2856 using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
2857
2858template <typename _Tp, typename _Up, typename _Ap>
2859 struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
2860#ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2861 enable_if_t<__is_vectorizable_v<_Tp>>
2862#else
2863 void
2864#endif
2865 >
2866 {
2867 using type = conditional_t<
2868 (is_integral_v<_Up> && is_integral_v<_Tp> &&
2869#ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
2870 is_signed_v<_Up> != is_signed_v<_Tp> &&
2871#endif
2872 is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
2873 simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
2874 };
2875
2876template <typename _Tp, typename _Up, typename _Ap,
2877 typename _R
2878 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2879 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
2880 static_simd_cast(const simd<_Up, _Ap>& __x)
2881 {
2882 if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
2883 return __x;
2884 else
2885 {
2886 _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
2887 __c;
2888 return _R(__private_init, __c(__data(__x)));
2889 }
2890 }
2891
2892namespace __proposed {
2893template <typename _Tp, typename _Up, typename _Ap,
2894 typename _R
2895 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
2896 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
2897 static_simd_cast(const simd_mask<_Up, _Ap>& __x)
2898 {
2899 using _RM = typename _R::mask_type;
2900 return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
2901 typename _RM::simd_type::value_type>(__x)};
2902 }
2903} // namespace __proposed
2904
2905// simd_cast {{{2
2906template <typename _Tp, typename _Up, typename _Ap,
2907 typename _To = __value_type_or_identity_t<_Tp>>
2908 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2909 simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
2910 -> decltype(static_simd_cast<_Tp>(__x))
2911 { return static_simd_cast<_Tp>(__x); }
2912
2913namespace __proposed {
2914template <typename _Tp, typename _Up, typename _Ap,
2915 typename _To = __value_type_or_identity_t<_Tp>>
2916 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
2917 simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
2918 -> decltype(static_simd_cast<_Tp>(__x))
2919 { return static_simd_cast<_Tp>(__x); }
2920} // namespace __proposed
2921
2922// }}}2
2923// resizing_simd_cast {{{
2924namespace __proposed {
2925/* Proposed spec:
2926
2927template <class T, class U, class Abi>
2928T resizing_simd_cast(const simd<U, Abi>& x)
2929
2930p1 Constraints:
2931 - is_simd_v<T> is true and
2932 - T::value_type is the same type as U
2933
2934p2 Returns:
2935 A simd object with the i^th element initialized to x[i] for all i in the
2936 range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2937 than simd_size_v<U, Abi>, the remaining elements are value-initialized.
2938
2939template <class T, class U, class Abi>
2940T resizing_simd_cast(const simd_mask<U, Abi>& x)
2941
2942p1 Constraints: is_simd_mask_v<T> is true
2943
2944p2 Returns:
2945 A simd_mask object with the i^th element initialized to x[i] for all i in
2946the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
2947 than simd_size_v<U, Abi>, the remaining elements are initialized to false.
2948
2949 */
2950
2951template <typename _Tp, typename _Up, typename _Ap>
2952 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
2953 conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
2954 resizing_simd_cast(const simd<_Up, _Ap>& __x)
2955 {
2956 if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
2957 return __x;
2958 else if constexpr (simd_size_v<_Up, _Ap> == 1)
2959 {
2960 _Tp __r{};
2961 __r[0] = __x[0];
2962 return __r;
2963 }
2964 else if constexpr (_Tp::size() == 1)
2965 return __x[0];
2966 else if constexpr (sizeof(_Tp) == sizeof(__x)
2967 && !__is_fixed_size_abi_v<_Ap>)
2968 return {__private_init,
2969 __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
2970 _Ap::_S_masked(__data(__x))._M_data)};
2971 else
2972 {
2973 _Tp __r{};
2974 __builtin_memcpy(&__data(__r), &__data(__x),
2975 sizeof(_Up)
2976 * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
2977 return __r;
2978 }
2979 }
2980
2981template <typename _Tp, typename _Up, typename _Ap>
2982 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
2983 enable_if_t<is_simd_mask_v<_Tp>, _Tp>
2984 resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
2985 {
2986 return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
2987 typename _Tp::simd_type::value_type>(__x)};
2988 }
2989} // namespace __proposed
2990
2991// }}}
2992// to_fixed_size {{{2
2993template <typename _Tp, int _Np>
2994 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
2995 to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
2996 { return __x; }
2997
2998template <typename _Tp, int _Np>
2999 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3000 to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3001 { return __x; }
3002
3003template <typename _Tp, typename _Ap>
3004 _GLIBCXX_SIMD_INTRINSIC auto
3005 to_fixed_size(const simd<_Tp, _Ap>& __x)
3006 {
3007 return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x](
3008 auto __i) constexpr { return __x[__i]; });
3009 }
3010
3011template <typename _Tp, typename _Ap>
3012 _GLIBCXX_SIMD_INTRINSIC auto
3013 to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3014 {
3015 constexpr int _Np = simd_mask<_Tp, _Ap>::size();
3016 fixed_size_simd_mask<_Tp, _Np> __r;
3017 __execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; });
3018 return __r;
3019 }
3020
3021// to_native {{{2
3022template <typename _Tp, int _Np>
3023 _GLIBCXX_SIMD_INTRINSIC
3024 enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3025 to_native(const fixed_size_simd<_Tp, _Np>& __x)
3026 {
3027 alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3028 __x.copy_to(__mem, vector_aligned);
3029 return {__mem, vector_aligned};
3030 }
3031
3032template <typename _Tp, size_t _Np>
3033 _GLIBCXX_SIMD_INTRINSIC
3034 enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3035 to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3036 {
3037 return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; });
3038 }
3039
3040// to_compatible {{{2
3041template <typename _Tp, size_t _Np>
3042 _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3043 to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3044 {
3045 alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3046 __x.copy_to(__mem, vector_aligned);
3047 return {__mem, vector_aligned};
3048 }
3049
3050template <typename _Tp, size_t _Np>
3051 _GLIBCXX_SIMD_INTRINSIC
3052 enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3053 to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3054 { return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); }
3055
3056// masked assignment [simd_mask.where] {{{1
3057
3058// where_expression {{{1
3059// const_where_expression<M, T> {{{2
3060template <typename _M, typename _Tp>
3061 class const_where_expression
3062 {
3063 using _V = _Tp;
3064 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3065
3066 struct _Wrapper { using value_type = _V; };
3067
3068 protected:
3069 using _Impl = typename _V::_Impl;
3070
3071 using value_type =
3072 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3073
3074 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3075 __get_mask(const const_where_expression& __x)
3076 { return __x._M_k; }
3077
3078 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3079 __get_lvalue(const const_where_expression& __x)
3080 { return __x._M_value; }
3081
3082 const _M& _M_k;
3083 _Tp& _M_value;
3084
3085 public:
3086 const_where_expression(const const_where_expression&) = delete;
3087 const_where_expression& operator=(const const_where_expression&) = delete;
3088
3089 _GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd)
3090 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3091
3092 _GLIBCXX_SIMD_INTRINSIC _V
3093 operator-() const&&
3094 {
3095 return {__private_init,
3096 _Impl::template _S_masked_unary<negate>(__data(_M_k),
3097 __data(_M_value))};
3098 }
3099
3100 template <typename _Up, typename _Flags>
3101 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3102 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3103 {
3104 return {__private_init,
3105 _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3106 _Flags::template _S_apply<_V>(__mem))};
3107 }
3108
3109 template <typename _Up, typename _Flags>
3110 _GLIBCXX_SIMD_INTRINSIC void
3111 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3112 {
3113 _Impl::_S_masked_store(__data(_M_value),
3114 _Flags::template _S_apply<_V>(__mem),
3115 __data(_M_k));
3116 }
3117 };
3118
3119// const_where_expression<bool, T> {{{2
3120template <typename _Tp>
3121 class const_where_expression<bool, _Tp>
3122 {
3123 using _M = bool;
3124 using _V = _Tp;
3125
3126 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3127
3128 struct _Wrapper { using value_type = _V; };
3129
3130 protected:
3131 using value_type =
3132 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3133
3134 _GLIBCXX_SIMD_INTRINSIC friend const _M&
3135 __get_mask(const const_where_expression& __x)
3136 { return __x._M_k; }
3137
3138 _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3139 __get_lvalue(const const_where_expression& __x)
3140 { return __x._M_value; }
3141
3142 const bool _M_k;
3143 _Tp& _M_value;
3144
3145 public:
3146 const_where_expression(const const_where_expression&) = delete;
3147 const_where_expression& operator=(const const_where_expression&) = delete;
3148
3149 _GLIBCXX_SIMD_INTRINSIC const_where_expression(const bool __kk, const _Tp& dd)
3150 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3151
3152 _GLIBCXX_SIMD_INTRINSIC _V operator-() const&&
3153 { return _M_k ? -_M_value : _M_value; }
3154
3155 template <typename _Up, typename _Flags>
3156 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3157 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3158 { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3159
3160 template <typename _Up, typename _Flags>
3161 _GLIBCXX_SIMD_INTRINSIC void
3162 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3163 {
3164 if (_M_k)
3165 __mem[0] = _M_value;
3166 }
3167 };
3168
3169// where_expression<M, T> {{{2
3170template <typename _M, typename _Tp>
3171 class where_expression : public const_where_expression<_M, _Tp>
3172 {
3173 using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3174
3175 static_assert(!is_const<_Tp>::value,
3176 "where_expression may only be instantiated with __a non-const "
3177 "_Tp parameter");
3178
3179 using typename const_where_expression<_M, _Tp>::value_type;
3180 using const_where_expression<_M, _Tp>::_M_k;
3181 using const_where_expression<_M, _Tp>::_M_value;
3182
3183 static_assert(
3184 is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3185 static_assert(_M::size() == _Tp::size(), "");
3186
3187 _GLIBCXX_SIMD_INTRINSIC friend _Tp& __get_lvalue(where_expression& __x)
3188 { return __x._M_value; }
3189
3190 public:
3191 where_expression(const where_expression&) = delete;
3192 where_expression& operator=(const where_expression&) = delete;
3193
3194 _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3195 : const_where_expression<_M, _Tp>(__kk, dd) {}
3196
3197 template <typename _Up>
3198 _GLIBCXX_SIMD_INTRINSIC void operator=(_Up&& __x) &&
3199 {
3200 _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3201 __to_value_type_or_member_type<_Tp>(
3202 static_cast<_Up&&>(__x)));
3203 }
3204
3205#define _GLIBCXX_SIMD_OP_(__op, __name) \
3206 template <typename _Up> \
3207 _GLIBCXX_SIMD_INTRINSIC void operator __op##=(_Up&& __x)&& \
3208 { \
3209 _Impl::template _S_masked_cassign( \
3210 __data(_M_k), __data(_M_value), \
3211 __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3212 [](auto __impl, auto __lhs, auto __rhs) constexpr { \
3213 return __impl.__name(__lhs, __rhs); \
3214 }); \
3215 } \
3216 static_assert(true)
3217 _GLIBCXX_SIMD_OP_(+, _S_plus);
3218 _GLIBCXX_SIMD_OP_(-, _S_minus);
3219 _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3220 _GLIBCXX_SIMD_OP_(/, _S_divides);
3221 _GLIBCXX_SIMD_OP_(%, _S_modulus);
3222 _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3223 _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3224 _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3225 _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3226 _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3227#undef _GLIBCXX_SIMD_OP_
3228
3229 _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3230 {
3231 __data(_M_value)
3232 = _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3233 __data(_M_value));
3234 }
3235
3236 _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3237 {
3238 __data(_M_value)
3239 = _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3240 __data(_M_value));
3241 }
3242
3243 _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3244 {
3245 __data(_M_value)
3246 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3247 __data(_M_value));
3248 }
3249
3250 _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3251 {
3252 __data(_M_value)
3253 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3254 __data(_M_value));
3255 }
3256
3257 // intentionally hides const_where_expression::copy_from
3258 template <typename _Up, typename _Flags>
3259 _GLIBCXX_SIMD_INTRINSIC void
3260 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3261 {
3262 __data(_M_value)
3263 = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3264 _Flags::template _S_apply<_Tp>(__mem));
3265 }
3266 };
3267
3268// where_expression<bool, T> {{{2
3269template <typename _Tp>
3270 class where_expression<bool, _Tp> : public const_where_expression<bool, _Tp>
3271 {
3272 using _M = bool;
3273 using typename const_where_expression<_M, _Tp>::value_type;
3274 using const_where_expression<_M, _Tp>::_M_k;
3275 using const_where_expression<_M, _Tp>::_M_value;
3276
3277 public:
3278 where_expression(const where_expression&) = delete;
3279 where_expression& operator=(const where_expression&) = delete;
3280
3281 _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3282 : const_where_expression<_M, _Tp>(__kk, dd) {}
3283
3284#define _GLIBCXX_SIMD_OP_(__op) \
3285 template <typename _Up> \
3286 _GLIBCXX_SIMD_INTRINSIC void operator __op(_Up&& __x)&& \
3287 { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3288
3289 _GLIBCXX_SIMD_OP_(=)
3290 _GLIBCXX_SIMD_OP_(+=)
3291 _GLIBCXX_SIMD_OP_(-=)
3292 _GLIBCXX_SIMD_OP_(*=)
3293 _GLIBCXX_SIMD_OP_(/=)
3294 _GLIBCXX_SIMD_OP_(%=)
3295 _GLIBCXX_SIMD_OP_(&=)
3296 _GLIBCXX_SIMD_OP_(|=)
3297 _GLIBCXX_SIMD_OP_(^=)
3298 _GLIBCXX_SIMD_OP_(<<=)
3299 _GLIBCXX_SIMD_OP_(>>=)
3300 #undef _GLIBCXX_SIMD_OP_
3301
3302 _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3303 { if (_M_k) ++_M_value; }
3304
3305 _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3306 { if (_M_k) ++_M_value; }
3307
3308 _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3309 { if (_M_k) --_M_value; }
3310
3311 _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3312 { if (_M_k) --_M_value; }
3313
3314 // intentionally hides const_where_expression::copy_from
3315 template <typename _Up, typename _Flags>
3316 _GLIBCXX_SIMD_INTRINSIC void
3317 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3318 { if (_M_k) _M_value = __mem[0]; }
3319 };
3320
3321// where {{{1
3322template <typename _Tp, typename _Ap>
3323 _GLIBCXX_SIMD_INTRINSIC where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3324 where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3325 { return {__k, __value}; }
3326
3327template <typename _Tp, typename _Ap>
3328 _GLIBCXX_SIMD_INTRINSIC
3329 const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3330 where(const typename simd<_Tp, _Ap>::mask_type& __k,
3331 const simd<_Tp, _Ap>& __value)
3332 { return {__k, __value}; }
3333
3334template <typename _Tp, typename _Ap>
3335 _GLIBCXX_SIMD_INTRINSIC
3336 where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3337 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3338 simd_mask<_Tp, _Ap>& __value)
3339 { return {__k, __value}; }
3340
3341template <typename _Tp, typename _Ap>
3342 _GLIBCXX_SIMD_INTRINSIC
3343 const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3344 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3345 const simd_mask<_Tp, _Ap>& __value)
3346 { return {__k, __value}; }
3347
3348template <typename _Tp>
3349 _GLIBCXX_SIMD_INTRINSIC where_expression<bool, _Tp>
3350 where(_ExactBool __k, _Tp& __value)
3351 { return {__k, __value}; }
3352
3353template <typename _Tp>
3354 _GLIBCXX_SIMD_INTRINSIC const_where_expression<bool, _Tp>
3355 where(_ExactBool __k, const _Tp& __value)
3356 { return {__k, __value}; }
3357
3358 template <typename _Tp, typename _Ap>
3359 void where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3360
3361 template <typename _Tp, typename _Ap>
3362 void where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3363
3364// proposed mask iterations {{{1
3365namespace __proposed {
3366template <size_t _Np>
3367 class where_range
3368 {
3369 const bitset<_Np> __bits;
3370
3371 public:
3372 where_range(bitset<_Np> __b) : __bits(__b) {}
3373
3374 class iterator
3375 {
3376 size_t __mask;
3377 size_t __bit;
3378
3379 _GLIBCXX_SIMD_INTRINSIC void __next_bit()
3380 { __bit = __builtin_ctzl(__mask); }
3381
3382 _GLIBCXX_SIMD_INTRINSIC void __reset_lsb()
3383 {
3384 // 01100100 - 1 = 01100011
3385 __mask &= (__mask - 1);
3386 // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3387 }
3388
3389 public:
3390 iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3391 iterator(const iterator&) = default;
3392 iterator(iterator&&) = default;
3393
3394 _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator->() const
3395 { return __bit; }
3396
3397 _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator*() const
3398 { return __bit; }
3399
3400 _GLIBCXX_SIMD_ALWAYS_INLINE iterator& operator++()
3401 {
3402 __reset_lsb();
3403 __next_bit();
3404 return *this;
3405 }
3406
3407 _GLIBCXX_SIMD_ALWAYS_INLINE iterator operator++(int)
3408 {
3409 iterator __tmp = *this;
3410 __reset_lsb();
3411 __next_bit();
3412 return __tmp;
3413 }
3414
3415 _GLIBCXX_SIMD_ALWAYS_INLINE bool operator==(const iterator& __rhs) const
3416 { return __mask == __rhs.__mask; }
3417
3418 _GLIBCXX_SIMD_ALWAYS_INLINE bool operator!=(const iterator& __rhs) const
3419 { return __mask != __rhs.__mask; }
3420 };
3421
3422 iterator begin() const
3423 { return __bits.to_ullong(); }
3424
3425 iterator end() const
3426 { return 0; }
3427 };
3428
3429template <typename _Tp, typename _Ap>
3430 where_range<simd_size_v<_Tp, _Ap>>
3431 where(const simd_mask<_Tp, _Ap>& __k)
3432 { return __k.__to_bitset(); }
3433
3434} // namespace __proposed
3435
3436// }}}1
3437// reductions [simd.reductions] {{{1
3438template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3439 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3440 reduce(const simd<_Tp, _Abi>& __v,
3441 _BinaryOperation __binary_op = _BinaryOperation())
3442 { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3443
3444template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3445 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3446 reduce(const const_where_expression<_M, _V>& __x,
3447 typename _V::value_type __identity_element,
3448 _BinaryOperation __binary_op)
3449 {
3450 if (__builtin_expect(none_of(__get_mask(__x)), false))
3451 return __identity_element;
3452
3453 _V __tmp = __identity_element;
3454 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3455 __data(__get_lvalue(__x)));
3456 return reduce(__tmp, __binary_op);
3457 }
3458
3459template <typename _M, typename _V>
3460 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3461 reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3462 { return reduce(__x, 0, __binary_op); }
3463
3464template <typename _M, typename _V>
3465 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3466 reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3467 { return reduce(__x, 1, __binary_op); }
3468
3469template <typename _M, typename _V>
3470 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3471 reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3472 { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3473
3474template <typename _M, typename _V>
3475 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3476 reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3477 { return reduce(__x, 0, __binary_op); }
3478
3479template <typename _M, typename _V>
3480 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3481 reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3482 { return reduce(__x, 0, __binary_op); }
3483
3484template <typename _Tp, typename _Abi>
3485 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3486 hmin(const simd<_Tp, _Abi>& __v) noexcept
3487 {
3488 return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum());
3489 }
3490
3491template <typename _Tp, typename _Abi>
3492 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3493 hmax(const simd<_Tp, _Abi>& __v) noexcept
3494 {
3495 return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum());
3496 }
3497
3498template <typename _M, typename _V>
3499 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3500 typename _V::value_type
3501 hmin(const const_where_expression<_M, _V>& __x) noexcept
3502 {
3503 using _Tp = typename _V::value_type;
3504 constexpr _Tp __id_elem =
3505#ifdef __FINITE_MATH_ONLY__
3506 __finite_max_v<_Tp>;
3507#else
3508 __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3509#endif
3510 _V __tmp = __id_elem;
3511 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3512 __data(__get_lvalue(__x)));
3513 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3514 }
3515
3516template <typename _M, typename _V>
3517 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3518 typename _V::value_type
3519 hmax(const const_where_expression<_M, _V>& __x) noexcept
3520 {
3521 using _Tp = typename _V::value_type;
3522 constexpr _Tp __id_elem =
3523#ifdef __FINITE_MATH_ONLY__
3524 __finite_min_v<_Tp>;
3525#else
3526 [] {
3527 if constexpr (__value_exists_v<__infinity, _Tp>)
3528 return -__infinity_v<_Tp>;
3529 else
3530 return __finite_min_v<_Tp>;
3531 }();
3532#endif
3533 _V __tmp = __id_elem;
3534 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3535 __data(__get_lvalue(__x)));
3536 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3537 }
3538
3539// }}}1
3540// algorithms [simd.alg] {{{
3541template <typename _Tp, typename _Ap>
3542 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3543 min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3544 { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3545
3546template <typename _Tp, typename _Ap>
3547 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3548 max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3549 { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3550
3551template <typename _Tp, typename _Ap>
3552 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3553 pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3554 minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3555 {
3556 const auto pair_of_members
3557 = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3558 return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3559 simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3560 }
3561
3562template <typename _Tp, typename _Ap>
3563 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3564 clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo,
3565 const simd<_Tp, _Ap>& __hi)
3566 {
3567 using _Impl = typename _Ap::_SimdImpl;
3568 return {__private_init,
3569 _Impl::_S_min(__data(__hi),
3570 _Impl::_S_max(__data(__lo), __data(__v)))};
3571 }
3572
3573// }}}
3574
3575template <size_t... _Sizes, typename _Tp, typename _Ap,
3576 typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3577 inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3578 split(const simd<_Tp, _Ap>&);
3579
3580// __extract_part {{{
3581template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3582 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
3583 _SimdWrapper<_Tp, _Np / _Total * _Combine>
3584 __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3585
3586template <int Index, int Parts, int _Combine = 1, typename _Tp, typename _A0,
3587 typename... _As>
3588 _GLIBCXX_SIMD_INTRINSIC auto
3589 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3590
3591// }}}
3592// _SizeList {{{
3593template <size_t _V0, size_t... _Values>
3594 struct _SizeList
3595 {
3596 template <size_t _I>
3597 static constexpr size_t _S_at(_SizeConstant<_I> = {})
3598 {
3599 if constexpr (_I == 0)
3600 return _V0;
3601 else
3602 return _SizeList<_Values...>::template _S_at<_I - 1>();
3603 }
3604
3605 template <size_t _I>
3606 static constexpr auto _S_before(_SizeConstant<_I> = {})
3607 {
3608 if constexpr (_I == 0)
3609 return _SizeConstant<0>();
3610 else
3611 return _SizeConstant<
3612 _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3613 }
3614
3615 template <size_t _Np>
3616 static constexpr auto _S_pop_front(_SizeConstant<_Np> = {})
3617 {
3618 if constexpr (_Np == 0)
3619 return _SizeList();
3620 else
3621 return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3622 }
3623 };
3624
3625// }}}
3626// __extract_center {{{
3627template <typename _Tp, size_t _Np>
3628 _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3629 __extract_center(_SimdWrapper<_Tp, _Np> __x)
3630 {
3631 static_assert(_Np >= 4);
3632 static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3633#if _GLIBCXX_SIMD_X86INTRIN // {{{
3634 if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3635 {
3636 const auto __intrin = __to_intrin(__x);
3637 if constexpr (is_integral_v<_Tp>)
3638 return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3639 _mm512_shuffle_i32x4(__intrin, __intrin,
3640 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3641 else if constexpr (sizeof(_Tp) == 4)
3642 return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3643 _mm512_shuffle_f32x4(__intrin, __intrin,
3644 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3645 else if constexpr (sizeof(_Tp) == 8)
3646 return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3647 _mm512_shuffle_f64x2(__intrin, __intrin,
3648 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3649 else
3650 __assert_unreachable<_Tp>();
3651 }
3652 else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3653 return __vector_bitcast<_Tp>(
3654 _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3655 __hi128(__vector_bitcast<double>(__x)), 1));
3656 else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3657 return __vector_bitcast<_Tp>(
3658 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3659 __lo128(__vector_bitcast<_LLong>(__x)),
3660 sizeof(_Tp) * _Np / 4));
3661 else
3662#endif // _GLIBCXX_SIMD_X86INTRIN }}}
3663 {
3664 __vector_type_t<_Tp, _Np / 2> __r;
3665 __builtin_memcpy(&__r,
3666 reinterpret_cast<const char*>(&__x)
3667 + sizeof(_Tp) * _Np / 4,
3668 sizeof(_Tp) * _Np / 2);
3669 return __r;
3670 }
3671 }
3672
3673template <typename _Tp, typename _A0, typename... _As>
3674 _GLIBCXX_SIMD_INTRINSIC
3675 _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3676 __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3677 {
3678 if constexpr (sizeof...(_As) == 0)
3679 return __extract_center(__x.first);
3680 else
3681 return __extract_part<1, 4, 2>(__x);
3682 }
3683
3684// }}}
3685// __split_wrapper {{{
3686template <size_t... _Sizes, typename _Tp, typename... _As>
3687 auto
3688 __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3689 {
3690 return split<_Sizes...>(
3691 fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
3692 __x));
3693 }
3694
3695// }}}
3696
3697// split<simd>(simd) {{{
3698template <typename _V, typename _Ap,
3699 size_t Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
3700 enable_if_t<simd_size_v<typename _V::value_type, _Ap> == Parts * _V::size()
3701 && is_simd_v<_V>, array<_V, Parts>>
3702 split(const simd<typename _V::value_type, _Ap>& __x)
3703 {
3704 using _Tp = typename _V::value_type;
3705 if constexpr (Parts == 1)
3706 {
3707 return {simd_cast<_V>(__x)};
3708 }
3709 else if (__x._M_is_constprop())
3710 {
3711 return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3712 auto __i) constexpr {
3713 return _V([&](auto __j) constexpr {
3714 return __x[__i * _V::size() + __j];
3715 });
3716 });
3717 }
3718 else if constexpr (
3719 __is_fixed_size_abi_v<_Ap>
3720 && (is_same_v<typename _V::abi_type, simd_abi::scalar>
3721 || (__is_fixed_size_abi_v<typename _V::abi_type>
3722 && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
3723 )))
3724 {
3725 // fixed_size -> fixed_size (w/o padding) or scalar
3726#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3727 const __may_alias<_Tp>* const __element_ptr
3728 = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
3729 return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3730 auto __i) constexpr {
3731 return _V(__element_ptr + __i * _V::size(), vector_aligned);
3732 });
3733#else
3734 const auto& __xx = __data(__x);
3735 return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3736 auto __i) constexpr {
3737 [[maybe_unused]] constexpr size_t __offset
3738 = decltype(__i)::value * _V::size();
3739 return _V([&](auto __j) constexpr {
3740 constexpr _SizeConstant<__j + __offset> __k;
3741 return __xx[__k];
3742 });
3743 });
3744#endif
3745 }
3746 else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
3747 {
3748 // normally memcpy should work here as well
3749 return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3750 auto __i) constexpr { return __x[__i]; });
3751 }
3752 else
3753 {
3754 return __generate_from_n_evaluations<Parts, array<_V, Parts>>([&](
3755 auto __i) constexpr {
3756 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
3757 return _V([&](auto __j) constexpr {
3758 return __x[__i * _V::size() + __j];
3759 });
3760 else
3761 return _V(__private_init,
3762 __extract_part<decltype(__i)::value, Parts>(__data(__x)));
3763 });
3764 }
3765 }
3766
3767// }}}
3768// split<simd_mask>(simd_mask) {{{
3769template <typename _V, typename _Ap,
3770 size_t _Parts
3771 = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
3772 enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
3773 _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
3774 split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
3775 {
3776 if constexpr (is_same_v<_Ap, typename _V::abi_type>)
3777 return {__x};
3778 else if constexpr (_Parts == 1)
3779 return {__proposed::static_simd_cast<_V>(__x)};
3780 else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
3781 && __is_avx_abi<_Ap>())
3782 return {_V(__private_init, __lo128(__data(__x))),
3783 _V(__private_init, __hi128(__data(__x)))};
3784 else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
3785 {
3786 const bitset __bits = __x.__to_bitset();
3787 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3788 auto __i) constexpr {
3789 constexpr size_t __offset = __i * _V::size();
3790 return _V(__bitset_init, (__bits >> __offset).to_ullong());
3791 });
3792 }
3793 else
3794 {
3795 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3796 auto __i) constexpr {
3797 constexpr size_t __offset = __i * _V::size();
3798 return _V(
3799 __private_init, [&](auto __j) constexpr {
3800 return __x[__j + __offset];
3801 });
3802 });
3803 }
3804 }
3805
3806// }}}
3807// split<_Sizes...>(simd) {{{
3808template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
3809 _GLIBCXX_SIMD_ALWAYS_INLINE
3810 tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3811 split(const simd<_Tp, _Ap>& __x)
3812 {
3813 using _SL = _SizeList<_Sizes...>;
3814 using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
3815 constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
3816 constexpr size_t _N0 = _SL::template _S_at<0>();
3817 using _V = __deduced_simd<_Tp, _N0>;
3818
3819 if (__x._M_is_constprop())
3820 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3821 auto __i) constexpr {
3822 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3823 constexpr size_t __offset = _SL::_S_before(__i);
3824 return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; });
3825 });
3826 else if constexpr (_Np == _N0)
3827 {
3828 static_assert(sizeof...(_Sizes) == 1);
3829 return {simd_cast<_V>(__x)};
3830 }
3831 else if constexpr // split from fixed_size, such that __x::first.size == _N0
3832 (__is_fixed_size_abi_v<
3833 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
3834 {
3835 static_assert(
3836 !__is_fixed_size_abi_v<typename _V::abi_type>,
3837 "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
3838 "fixed_size_simd "
3839 "when deduced?");
3840 // extract first and recurse (__split_wrapper is needed to deduce a new
3841 // _Sizes pack)
3842 return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
3843 __split_wrapper(_SL::template _S_pop_front<1>(),
3844 __data(__x).second));
3845 }
3846 else if constexpr ((!is_same_v<simd_abi::scalar,
3847 simd_abi::deduce_t<_Tp, _Sizes>> && ...)
3848 && (!__is_fixed_size_abi_v<
3849 simd_abi::deduce_t<_Tp, _Sizes>> && ...))
3850 {
3851 if constexpr (((_Sizes * 2 == _Np) && ...))
3852 return {{__private_init, __extract_part<0, 2>(__data(__x))},
3853 {__private_init, __extract_part<1, 2>(__data(__x))}};
3854 else if constexpr (is_same_v<_SizeList<_Sizes...>,
3855 _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
3856 return {{__private_init, __extract_part<0, 3>(__data(__x))},
3857 {__private_init, __extract_part<1, 3>(__data(__x))},
3858 {__private_init, __extract_part<2, 3>(__data(__x))}};
3859 else if constexpr (is_same_v<_SizeList<_Sizes...>,
3860 _SizeList<2 * _Np / 3, _Np / 3>>)
3861 return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
3862 {__private_init, __extract_part<2, 3>(__data(__x))}};
3863 else if constexpr (is_same_v<_SizeList<_Sizes...>,
3864 _SizeList<_Np / 3, 2 * _Np / 3>>)
3865 return {{__private_init, __extract_part<0, 3>(__data(__x))},
3866 {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
3867 else if constexpr (is_same_v<_SizeList<_Sizes...>,
3868 _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
3869 return {{__private_init, __extract_part<0, 2>(__data(__x))},
3870 {__private_init, __extract_part<2, 4>(__data(__x))},
3871 {__private_init, __extract_part<3, 4>(__data(__x))}};
3872 else if constexpr (is_same_v<_SizeList<_Sizes...>,
3873 _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
3874 return {{__private_init, __extract_part<0, 4>(__data(__x))},
3875 {__private_init, __extract_part<1, 4>(__data(__x))},
3876 {__private_init, __extract_part<1, 2>(__data(__x))}};
3877 else if constexpr (is_same_v<_SizeList<_Sizes...>,
3878 _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
3879 return {{__private_init, __extract_part<0, 4>(__data(__x))},
3880 {__private_init, __extract_center(__data(__x))},
3881 {__private_init, __extract_part<3, 4>(__data(__x))}};
3882 else if constexpr (((_Sizes * 4 == _Np) && ...))
3883 return {{__private_init, __extract_part<0, 4>(__data(__x))},
3884 {__private_init, __extract_part<1, 4>(__data(__x))},
3885 {__private_init, __extract_part<2, 4>(__data(__x))},
3886 {__private_init, __extract_part<3, 4>(__data(__x))}};
3887 // else fall through
3888 }
3889#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3890 const __may_alias<_Tp>* const __element_ptr
3891 = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
3892 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3893 auto __i) constexpr {
3894 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3895 constexpr size_t __offset = _SL::_S_before(__i);
3896 constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
3897 constexpr size_t __a
3898 = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
3899 constexpr size_t __b = ((__a - 1) & __a) ^ __a;
3900 constexpr size_t __alignment = __b == 0 ? __a : __b;
3901 return _Vi(__element_ptr + __offset, overaligned<__alignment>);
3902 });
3903#else
3904 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
3905 auto __i) constexpr {
3906 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
3907 const auto& __xx = __data(__x);
3908 using _Offset = decltype(_SL::_S_before(__i));
3909 return _Vi([&](auto __j) constexpr {
3910 constexpr _SizeConstant<_Offset::value + __j> __k;
3911 return __xx[__k];
3912 });
3913 });
3914#endif
3915 }
3916
3917// }}}
3918
3919// __subscript_in_pack {{{
3920template <size_t _I, typename _Tp, typename _Ap, typename... _As>
3921 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
3922 __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
3923 {
3924 if constexpr (_I < simd_size_v<_Tp, _Ap>)
3925 return __x[_I];
3926 else
3927 return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
3928 }
3929
3930// }}}
3931// __store_pack_of_simd {{{
3932template <typename _Tp, typename _A0, typename... _As>
3933 _GLIBCXX_SIMD_INTRINSIC void
3934 __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0,
3935 const simd<_Tp, _As>&... __xs)
3936 {
3937 constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
3938 __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
3939 if constexpr (sizeof...(__xs) > 0)
3940 __store_pack_of_simd(__mem + __n_bytes, __xs...);
3941 }
3942
3943// }}}
3944// concat(simd...) {{{
3945template <typename _Tp, typename... _As>
3946 inline _GLIBCXX_SIMD_CONSTEXPR
3947 simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
3948 concat(const simd<_Tp, _As>&... __xs)
3949 {
3950 using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
3951 if constexpr (sizeof...(__xs) == 1)
3952 return simd_cast<_Rp>(__xs...);
3953 else if ((... && __xs._M_is_constprop()))
3954 return simd<_Tp,
3955 simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&](
3956 auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); });
3957 else
3958 {
3959 _Rp __r{};
3960 __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
3961 return __r;
3962 }
3963 }
3964
3965// }}}
3966// concat(array<simd>) {{{
3967template <typename _Tp, typename _Abi, size_t _Np>
3968 _GLIBCXX_SIMD_ALWAYS_INLINE
3969 _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
3970 concat(const array<simd<_Tp, _Abi>, _Np>& __x)
3971 {
3972 return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) {
3973 return concat(__xs...);
3974 });
3975 }
3976
3977// }}}
3978
3979// _SmartReference {{{
3980template <typename _Up, typename _Accessor = _Up,
3981 typename _ValueType = typename _Up::value_type>
3982 class _SmartReference
3983 {
3984 friend _Accessor;
3985 int _M_index;
3986 _Up& _M_obj;
3987
3988 _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType _M_read() const noexcept
3989 {
3990 if constexpr (is_arithmetic_v<_Up>)
3991 return _M_obj;
3992 else
3993 return _M_obj[_M_index];
3994 }
3995
3996 template <typename _Tp>
3997 _GLIBCXX_SIMD_INTRINSIC constexpr void _M_write(_Tp&& __x) const
3998 { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
3999
4000 public:
4001 _GLIBCXX_SIMD_INTRINSIC constexpr
4002 _SmartReference(_Up& __o, int __i) noexcept
4003 : _M_index(__i), _M_obj(__o) {}
4004
4005 using value_type = _ValueType;
4006
4007 _GLIBCXX_SIMD_INTRINSIC _SmartReference(const _SmartReference&) = delete;
4008
4009 _GLIBCXX_SIMD_INTRINSIC constexpr operator value_type() const noexcept
4010 { return _M_read(); }
4011
4012 template <typename _Tp,
4013 typename
4014 = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4015 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator=(_Tp&& __x) &&
4016 {
4017 _M_write(static_cast<_Tp&&>(__x));
4018 return {_M_obj, _M_index};
4019 }
4020
4021#define _GLIBCXX_SIMD_OP_(__op) \
4022 template <typename _Tp, \
4023 typename _TT \
4024 = decltype(declval<value_type>() __op declval<_Tp>()), \
4025 typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4026 typename = _ValuePreservingOrInt<_TT, value_type>> \
4027 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4028 operator __op##=(_Tp&& __x) && \
4029 { \
4030 const value_type& __lhs = _M_read(); \
4031 _M_write(__lhs __op __x); \
4032 return {_M_obj, _M_index}; \
4033 }
4034 _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4035 _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4036 _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4037#undef _GLIBCXX_SIMD_OP_
4038
4039 template <typename _Tp = void,
4040 typename
4041 = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4042 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator++() &&
4043 {
4044 value_type __x = _M_read();
4045 _M_write(++__x);
4046 return {_M_obj, _M_index};
4047 }
4048
4049 template <typename _Tp = void,
4050 typename
4051 = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4052 _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator++(int) &&
4053 {
4054 const value_type __r = _M_read();
4055 value_type __x = __r;
4056 _M_write(++__x);
4057 return __r;
4058 }
4059
4060 template <typename _Tp = void,
4061 typename
4062 = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4063 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator--() &&
4064 {
4065 value_type __x = _M_read();
4066 _M_write(--__x);
4067 return {_M_obj, _M_index};
4068 }
4069
4070 template <typename _Tp = void,
4071 typename
4072 = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4073 _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator--(int) &&
4074 {
4075 const value_type __r = _M_read();
4076 value_type __x = __r;
4077 _M_write(--__x);
4078 return __r;
4079 }
4080
4081 _GLIBCXX_SIMD_INTRINSIC friend void
4082 swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4083 conjunction<
4084 is_nothrow_constructible<value_type, _SmartReference&&>,
4085 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4086 {
4087 value_type __tmp = static_cast<_SmartReference&&>(__a);
4088 static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4089 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4090 }
4091
4092 _GLIBCXX_SIMD_INTRINSIC friend void
4093 swap(value_type& __a, _SmartReference&& __b) noexcept(
4094 conjunction<
4095 is_nothrow_constructible<value_type, value_type&&>,
4096 is_nothrow_assignable<value_type&, value_type&&>,
4097 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4098 {
4099 value_type __tmp(std::move(__a));
4100 __a = static_cast<value_type>(__b);
4101 static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4102 }
4103
4104 _GLIBCXX_SIMD_INTRINSIC friend void
4105 swap(_SmartReference&& __a, value_type& __b) noexcept(
4106 conjunction<
4107 is_nothrow_constructible<value_type, _SmartReference&&>,
4108 is_nothrow_assignable<value_type&, value_type&&>,
4109 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4110 {
4111 value_type __tmp(__a);
4112 static_cast<_SmartReference&&>(__a) = std::move(__b);
4113 __b = std::move(__tmp);
4114 }
4115 };
4116
4117// }}}
4118// __scalar_abi_wrapper {{{
4119template <int _Bytes>
4120 struct __scalar_abi_wrapper
4121 {
4122 template <typename _Tp> static constexpr size_t _S_full_size = 1;
4123 template <typename _Tp> static constexpr size_t _S_size = 1;
4124 template <typename _Tp> static constexpr size_t _S_is_partial = false;
4125
4126 template <typename _Tp, typename _Abi = simd_abi::scalar>
4127 static constexpr bool _S_is_valid_v
4128 = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4129 };
4130
4131// }}}
4132// __decay_abi metafunction {{{
4133template <typename _Tp>
4134 struct __decay_abi { using type = _Tp; };
4135
4136template <int _Bytes>
4137 struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4138 { using type = simd_abi::scalar; };
4139
4140// }}}
4141// __find_next_valid_abi metafunction {{{1
4142// Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4143// true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4144// recursion at 2 elements in the resulting ABI tag. In this case
4145// type::_S_is_valid_v<_Tp> may be false.
4146template <template <int> class _Abi, int _Bytes, typename _Tp>
4147 struct __find_next_valid_abi
4148 {
4149 static constexpr auto _S_choose()
4150 {
4151 constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4152 using _NextAbi = _Abi<_NextBytes>;
4153 if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4154 return _Abi<_Bytes>();
4155 else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4156 && _NextAbi::template _S_is_valid_v<_Tp>)
4157 return _NextAbi();
4158 else
4159 return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4160 }
4161
4162 using type = decltype(_S_choose());
4163 };
4164
4165template <int _Bytes, typename _Tp>
4166 struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4167 { using type = simd_abi::scalar; };
4168
4169// _AbiList {{{1
4170template <template <int> class...>
4171 struct _AbiList
4172 {
4173 template <typename, int> static constexpr bool _S_has_valid_abi = false;
4174 template <typename, int> using _FirstValidAbi = void;
4175 template <typename, int> using _BestAbi = void;
4176 };
4177
4178template <template <int> class _A0, template <int> class... _Rest>
4179 struct _AbiList<_A0, _Rest...>
4180 {
4181 template <typename _Tp, int _Np>
4182 static constexpr bool _S_has_valid_abi
4183 = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4184 _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4185
4186 template <typename _Tp, int _Np>
4187 using _FirstValidAbi = conditional_t<
4188 _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4189 typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4190 typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4191
4192 template <typename _Tp, int _Np>
4193 static constexpr auto _S_determine_best_abi()
4194 {
4195 static_assert(_Np >= 1);
4196 constexpr int _Bytes = sizeof(_Tp) * _Np;
4197 if constexpr (_Np == 1)
4198 return __make_dependent_t<_Tp, simd_abi::scalar>{};
4199 else
4200 {
4201 constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4202 // _A0<_Bytes> is good if:
4203 // 1. The ABI tag is valid for _Tp
4204 // 2. The storage overhead is no more than padding to fill the next
4205 // power-of-2 number of bytes
4206 if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4207 _Tp> && __fullsize / 2 < _Np)
4208 return typename __decay_abi<_A0<_Bytes>>::type{};
4209 else
4210 {
4211 using _Bp =
4212 typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4213 if constexpr (_Bp::template _S_is_valid_v<
4214 _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4215 return _Bp{};
4216 else
4217 return
4218 typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4219 }
4220 }
4221 }
4222
4223 template <typename _Tp, int _Np>
4224 using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4225 };
4226
4227// }}}1
4228
4229// the following lists all native ABIs, which makes them accessible to
4230// simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4231// matters: Whatever comes first has higher priority.
4232using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4233 __scalar_abi_wrapper>;
4234
4235// valid _SimdTraits specialization {{{1
4236template <typename _Tp, typename _Abi>
4237 struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4238 : _Abi::template __traits<_Tp> {};
4239
4240// __deduce_impl specializations {{{1
4241// try all native ABIs (including scalar) first
4242template <typename _Tp, size_t _Np>
4243 struct __deduce_impl<
4244 _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4245 { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4246
4247// fall back to fixed_size only if scalar and native ABIs don't match
4248template <typename _Tp, size_t _Np, typename = void>
4249 struct __deduce_fixed_size_fallback {};
4250
4251template <typename _Tp, size_t _Np>
4252 struct __deduce_fixed_size_fallback<_Tp, _Np,
4253 enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4254 { using type = simd_abi::fixed_size<_Np>; };
4255
4256template <typename _Tp, size_t _Np, typename>
4257 struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4258
4259//}}}1
4260
4261// simd_mask {{{
4262template <typename _Tp, typename _Abi>
4263 class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4264 {
4265 // types, tags, and friends {{{
4266 using _Traits = _SimdTraits<_Tp, _Abi>;
4267 using _MemberType = typename _Traits::_MaskMember;
4268
4269 // We map all masks with equal element sizeof to a single integer type, the
4270 // one given by __int_for_sizeof_t<_Tp>. This is the approach
4271 // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4272 // template specializations in the implementation classes.
4273 using _Ip = __int_for_sizeof_t<_Tp>;
4274 static constexpr _Ip* _S_type_tag = nullptr;
4275
4276 friend typename _Traits::_MaskBase;
4277 friend class simd<_Tp, _Abi>; // to construct masks on return
4278 friend typename _Traits::_SimdImpl; // to construct masks on return and
4279 // inspect data on masked operations
4280 public:
4281 using _Impl = typename _Traits::_MaskImpl;
4282 friend _Impl;
4283
4284 // }}}
4285 // member types {{{
4286 using value_type = bool;
4287 using reference = _SmartReference<_MemberType, _Impl, value_type>;
4288 using simd_type = simd<_Tp, _Abi>;
4289 using abi_type = _Abi;
4290
4291 // }}}
4292 static constexpr size_t size() // {{{
4293 { return __size_or_zero_v<_Tp, _Abi>; }
4294
4295 // }}}
4296 // constructors & assignment {{{
4297 simd_mask() = default;
4298 simd_mask(const simd_mask&) = default;
4299 simd_mask(simd_mask&&) = default;
4300 simd_mask& operator=(const simd_mask&) = default;
4301 simd_mask& operator=(simd_mask&&) = default;
4302
4303 // }}}
4304 // access to internal representation (optional feature) {{{
4305 _GLIBCXX_SIMD_ALWAYS_INLINE explicit
4306 simd_mask(typename _Traits::_MaskCastType __init)
4307 : _M_data{__init} {}
4308 // conversions to internal type is done in _MaskBase
4309
4310 // }}}
4311 // bitset interface (extension to be proposed) {{{
4312 // TS_FEEDBACK:
4313 // Conversion of simd_mask to and from bitset makes it much easier to
4314 // interface with other facilities. I suggest adding `static
4315 // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4316 _GLIBCXX_SIMD_ALWAYS_INLINE static simd_mask
4317 __from_bitset(bitset<size()> bs)
4318 { return {__bitset_init, bs}; }
4319
4320 _GLIBCXX_SIMD_ALWAYS_INLINE bitset<size()>
4321 __to_bitset() const
4322 { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4323
4324 // }}}
4325 // explicit broadcast constructor {{{
4326 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4327 simd_mask(value_type __x)
4328 : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4329
4330 // }}}
4331 // implicit type conversion constructor {{{
4332 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4333 // proposed improvement
4334 template <typename _Up, typename _A2,
4335 typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4336 _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4337 != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4338 simd_mask(const simd_mask<_Up, _A2>& __x)
4339 : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4340 #else
4341 // conforming to ISO/IEC 19570:2018
4342 template <typename _Up, typename = enable_if_t<conjunction<
4343 is_same<abi_type, simd_abi::fixed_size<size()>>,
4344 is_same<_Up, _Up>>::value>>
4345 _GLIBCXX_SIMD_ALWAYS_INLINE
4346 simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4347 : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4348 #endif
4349
4350 // }}}
4351 // load constructor {{{
4352 template <typename _Flags>
4353 _GLIBCXX_SIMD_ALWAYS_INLINE
4354 simd_mask(const value_type* __mem, _Flags)
4355 : _M_data(_Impl::template _S_load<_Ip>(
4356 _Flags::template _S_apply<simd_mask>(__mem))) {}
4357
4358 template <typename _Flags>
4359 _GLIBCXX_SIMD_ALWAYS_INLINE
4360 simd_mask(const value_type* __mem, simd_mask __k, _Flags)
4361 : _M_data{}
4362 {
4363 _M_data
4364 = _Impl::_S_masked_load(_M_data, __k._M_data,
4365 _Flags::template _S_apply<simd_mask>(__mem));
4366 }
4367
4368 // }}}
4369 // loads [simd_mask.load] {{{
4370 template <typename _Flags>
4371 _GLIBCXX_SIMD_ALWAYS_INLINE void
4372 copy_from(const value_type* __mem, _Flags)
4373 {
4374 _M_data = _Impl::template _S_load<_Ip>(
4375 _Flags::template _S_apply<simd_mask>(__mem));
4376 }
4377
4378 // }}}
4379 // stores [simd_mask.store] {{{
4380 template <typename _Flags>
4381 _GLIBCXX_SIMD_ALWAYS_INLINE void
4382 copy_to(value_type* __mem, _Flags) const
4383 { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4384
4385 // }}}
4386 // scalar access {{{
4387 _GLIBCXX_SIMD_ALWAYS_INLINE reference
4388 operator[](size_t __i)
4389 {
4390 if (__i >= size())
4391 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4392 return {_M_data, int(__i)};
4393 }
4394
4395 _GLIBCXX_SIMD_ALWAYS_INLINE value_type
4396 operator[](size_t __i) const
4397 {
4398 if (__i >= size())
4399 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4400 if constexpr (__is_scalar_abi<_Abi>())
4401 return _M_data;
4402 else
4403 return static_cast<bool>(_M_data[__i]);
4404 }
4405
4406 // }}}
4407 // negation {{{
4408 _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask
4409 operator!() const
4410 { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4411
4412 // }}}
4413 // simd_mask binary operators [simd_mask.binary] {{{
4414 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4415 // simd_mask<int> && simd_mask<uint> needs disambiguation
4416 template <typename _Up, typename _A2,
4417 typename
4418 = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4419 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4420 operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4421 {
4422 return {__private_init,
4423 _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4424 }
4425
4426 template <typename _Up, typename _A2,
4427 typename
4428 = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4429 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4430 operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4431 {
4432 return {__private_init,
4433 _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4434 }
4435 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4436
4437 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4438 operator&&(const simd_mask& __x, const simd_mask& __y)
4439 {
4440 return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)};
4441 }
4442
4443 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4444 operator||(const simd_mask& __x, const simd_mask& __y)
4445 {
4446 return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)};
4447 }
4448
4449 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4450 operator&(const simd_mask& __x, const simd_mask& __y)
4451 { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4452
4453 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4454 operator|(const simd_mask& __x, const simd_mask& __y)
4455 { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4456
4457 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4458 operator^(const simd_mask& __x, const simd_mask& __y)
4459 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4460
4461 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4462 operator&=(simd_mask& __x, const simd_mask& __y)
4463 {
4464 __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4465 return __x;
4466 }
4467
4468 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4469 operator|=(simd_mask& __x, const simd_mask& __y)
4470 {
4471 __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4472 return __x;
4473 }
4474
4475 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4476 operator^=(simd_mask& __x, const simd_mask& __y)
4477 {
4478 __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4479 return __x;
4480 }
4481
4482 // }}}
4483 // simd_mask compares [simd_mask.comparison] {{{
4484 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4485 operator==(const simd_mask& __x, const simd_mask& __y)
4486 { return !operator!=(__x, __y); }
4487
4488 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4489 operator!=(const simd_mask& __x, const simd_mask& __y)
4490 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4491
4492 // }}}
4493 // private_init ctor {{{
4494 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4495 simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4496 : _M_data(__init) {}
4497
4498 // }}}
4499 // private_init generator ctor {{{
4500 template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4501 _GLIBCXX_SIMD_INTRINSIC constexpr
4502 simd_mask(_PrivateInit, _Fp&& __gen)
4503 : _M_data()
4504 {
4505 __execute_n_times<size()>([&](auto __i) constexpr {
4506 _Impl::_S_set(_M_data, __i, __gen(__i));
4507 });
4508 }
4509
4510 // }}}
4511 // bitset_init ctor {{{
4512 _GLIBCXX_SIMD_INTRINSIC simd_mask(_BitsetInit, bitset<size()> __init)
4513 : _M_data(
4514 _Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4515 {}
4516
4517 // }}}
4518 // __cvt {{{
4519 // TS_FEEDBACK:
4520 // The conversion operator this implements should be a ctor on simd_mask.
4521 // Once you call .__cvt() on a simd_mask it converts conveniently.
4522 // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4523 struct _CvtProxy
4524 {
4525 template <typename _Up, typename _A2,
4526 typename
4527 = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4528 operator simd_mask<_Up, _A2>() &&
4529 {
4530 using namespace std::experimental::__proposed;
4531 return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4532 }
4533
4534 const simd_mask<_Tp, _Abi>& _M_data;
4535 };
4536
4537 _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4538 __cvt() const
4539 { return {*this}; }
4540
4541 // }}}
4542 // operator?: overloads (suggested extension) {{{
4543 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4544 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4545 operator?:(const simd_mask& __k, const simd_mask& __where_true,
4546 const simd_mask& __where_false)
4547 {
4548 auto __ret = __where_false;
4549 _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4550 return __ret;
4551 }
4552
4553 template <typename _U1, typename _U2,
4554 typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4555 typename = enable_if_t<conjunction_v<
4556 is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4557 is_convertible<simd_mask, typename _Rp::mask_type>>>>
4558 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4559 operator?:(const simd_mask& __k, const _U1& __where_true,
4560 const _U2& __where_false)
4561 {
4562 _Rp __ret = __where_false;
4563 _Rp::_Impl::_S_masked_assign(
4564 __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4565 __data(static_cast<_Rp>(__where_true)));
4566 return __ret;
4567 }
4568
4569 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4570 template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4571 typename = enable_if_t<
4572 conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4573 is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4574 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4575 operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4576 const simd_mask<_Up, _Au>& __where_false)
4577 {
4578 simd_mask __ret = __where_false;
4579 _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4580 __where_true._M_data);
4581 return __ret;
4582 }
4583 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4584 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4585
4586 // }}}
4587 // _M_is_constprop {{{
4588 _GLIBCXX_SIMD_INTRINSIC constexpr bool
4589 _M_is_constprop() const
4590 {
4591 if constexpr (__is_scalar_abi<_Abi>())
4592 return __builtin_constant_p(_M_data);
4593 else
4594 return _M_data._M_is_constprop();
4595 }
4596
4597 // }}}
4598
4599 private:
4600 friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4601 friend auto& __data<_Tp, abi_type>(simd_mask&);
4602 alignas(_Traits::_S_mask_align) _MemberType _M_data;
4603 };
4604
4605// }}}
4606
4607// __data(simd_mask) {{{
4608template <typename _Tp, typename _Ap>
4609 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4610 __data(const simd_mask<_Tp, _Ap>& __x)
4611 { return __x._M_data; }
4612
4613template <typename _Tp, typename _Ap>
4614 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4615 __data(simd_mask<_Tp, _Ap>& __x)
4616 { return __x._M_data; }
4617
4618// }}}
4619
4620// simd_mask reductions [simd_mask.reductions] {{{
4621template <typename _Tp, typename _Abi>
4622 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4623 all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4624 {
4625 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4626 {
4627 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4628 if (!__k[__i])
4629 return false;
4630 return true;
4631 }
4632 else
4633 return _Abi::_MaskImpl::_S_all_of(__k);
4634 }
4635
4636template <typename _Tp, typename _Abi>
4637 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4638 any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4639 {
4640 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4641 {
4642 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4643 if (__k[__i])
4644 return true;
4645 return false;
4646 }
4647 else
4648 return _Abi::_MaskImpl::_S_any_of(__k);
4649 }
4650
4651template <typename _Tp, typename _Abi>
4652 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4653 none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4654 {
4655 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4656 {
4657 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4658 if (__k[__i])
4659 return false;
4660 return true;
4661 }
4662 else
4663 return _Abi::_MaskImpl::_S_none_of(__k);
4664 }
4665
4666template <typename _Tp, typename _Abi>
4667 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4668 some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4669 {
4670 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4671 {
4672 for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4673 if (__k[__i] != __k[__i - 1])
4674 return true;
4675 return false;
4676 }
4677 else
4678 return _Abi::_MaskImpl::_S_some_of(__k);
4679 }
4680
4681template <typename _Tp, typename _Abi>
4682 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4683 popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4684 {
4685 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4686 {
4687 const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4688 __k, [](auto... __elements) { return ((__elements != 0) + ...); });
4689 if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4690 return __r;
4691 }
4692 return _Abi::_MaskImpl::_S_popcount(__k);
4693 }
4694
4695template <typename _Tp, typename _Abi>
4696 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4697 find_first_set(const simd_mask<_Tp, _Abi>& __k)
4698 {
4699 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4700 {
4701 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4702 const size_t _Idx = __call_with_n_evaluations<_Np>(
4703 [](auto... __indexes) { return std::min({__indexes...}); },
4704 [&](auto __i) { return __k[__i] ? +__i : _Np; });
4705 if (_Idx >= _Np)
4706 __invoke_ub("find_first_set(empty mask) is UB");
4707 if (__builtin_constant_p(_Idx))
4708 return _Idx;
4709 }
4710 return _Abi::_MaskImpl::_S_find_first_set(__k);
4711 }
4712
4713template <typename _Tp, typename _Abi>
4714 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4715 find_last_set(const simd_mask<_Tp, _Abi>& __k)
4716 {
4717 if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4718 {
4719 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4720 const int _Idx = __call_with_n_evaluations<_Np>(
4721 [](auto... __indexes) { return std::max({__indexes...}); },
4722 [&](auto __i) { return __k[__i] ? int(__i) : -1; });
4723 if (_Idx < 0)
4724 __invoke_ub("find_first_set(empty mask) is UB");
4725 if (__builtin_constant_p(_Idx))
4726 return _Idx;
4727 }
4728 return _Abi::_MaskImpl::_S_find_last_set(__k);
4729 }
4730
4731_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4732all_of(_ExactBool __x) noexcept
4733{ return __x; }
4734
4735_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4736any_of(_ExactBool __x) noexcept
4737{ return __x; }
4738
4739_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4740none_of(_ExactBool __x) noexcept
4741{ return !__x; }
4742
4743_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4744some_of(_ExactBool) noexcept
4745{ return false; }
4746
4747_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4748popcount(_ExactBool __x) noexcept
4749{ return __x; }
4750
4751_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4752find_first_set(_ExactBool)
4753{ return 0; }
4754
4755_GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4756find_last_set(_ExactBool)
4757{ return 0; }
4758
4759// }}}
4760
4761// _SimdIntOperators{{{1
4762template <typename _V, typename _Impl, bool>
4763 class _SimdIntOperators {};
4764
4765template <typename _V, typename _Impl>
4766 class _SimdIntOperators<_V, _Impl, true>
4767 {
4768 _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
4769 { return *static_cast<const _V*>(this); }
4770
4771 template <typename _Tp>
4772 _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
4773 _S_make_derived(_Tp&& __d)
4774 { return {__private_init, static_cast<_Tp&&>(__d)}; }
4775
4776 public:
4777 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x)
4778 { return __lhs = __lhs % __x; }
4779
4780 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x)
4781 { return __lhs = __lhs & __x; }
4782
4783 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x)
4784 { return __lhs = __lhs | __x; }
4785
4786 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x)
4787 { return __lhs = __lhs ^ __x; }
4788
4789 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x)
4790 { return __lhs = __lhs << __x; }
4791
4792 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x)
4793 { return __lhs = __lhs >> __x; }
4794
4795 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x)
4796 { return __lhs = __lhs << __x; }
4797
4798 _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x)
4799 { return __lhs = __lhs >> __x; }
4800
4801 _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y)
4802 {
4803 return _SimdIntOperators::_S_make_derived(
4804 _Impl::_S_modulus(__data(__x), __data(__y)));
4805 }
4806
4807 _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y)
4808 {
4809 return _SimdIntOperators::_S_make_derived(
4810 _Impl::_S_bit_and(__data(__x), __data(__y)));
4811 }
4812
4813 _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y)
4814 {
4815 return _SimdIntOperators::_S_make_derived(
4816 _Impl::_S_bit_or(__data(__x), __data(__y)));
4817 }
4818
4819 _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y)
4820 {
4821 return _SimdIntOperators::_S_make_derived(
4822 _Impl::_S_bit_xor(__data(__x), __data(__y)));
4823 }
4824
4825 _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y)
4826 {
4827 return _SimdIntOperators::_S_make_derived(
4828 _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
4829 }
4830
4831 _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y)
4832 {
4833 return _SimdIntOperators::_S_make_derived(
4834 _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
4835 }
4836
4837 template <typename _VV = _V>
4838 _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y)
4839 {
4840 using _Tp = typename _VV::value_type;
4841 if (__y < 0)
4842 __invoke_ub("The behavior is undefined if the right operand of a "
4843 "shift operation is negative. [expr.shift]\nA shift by "
4844 "%d was requested",
4845 __y);
4846 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4847 __invoke_ub(
4848 "The behavior is undefined if the right operand of a "
4849 "shift operation is greater than or equal to the width of the "
4850 "promoted left operand. [expr.shift]\nA shift by %d was requested",
4851 __y);
4852 return _SimdIntOperators::_S_make_derived(
4853 _Impl::_S_bit_shift_left(__data(__x), __y));
4854 }
4855
4856 template <typename _VV = _V>
4857 _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y)
4858 {
4859 using _Tp = typename _VV::value_type;
4860 if (__y < 0)
4861 __invoke_ub(
4862 "The behavior is undefined if the right operand of a shift "
4863 "operation is negative. [expr.shift]\nA shift by %d was requested",
4864 __y);
4865 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
4866 __invoke_ub(
4867 "The behavior is undefined if the right operand of a shift "
4868 "operation is greater than or equal to the width of the promoted "
4869 "left operand. [expr.shift]\nA shift by %d was requested",
4870 __y);
4871 return _SimdIntOperators::_S_make_derived(
4872 _Impl::_S_bit_shift_right(__data(__x), __y));
4873 }
4874
4875 // unary operators (for integral _Tp)
4876 _GLIBCXX_SIMD_CONSTEXPR _V operator~() const
4877 { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
4878 };
4879
4880//}}}1
4881
4882// simd {{{
4883template <typename _Tp, typename _Abi>
4884 class simd : public _SimdIntOperators<
4885 simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl,
4886 conjunction<is_integral<_Tp>,
4887 typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
4888 public _SimdTraits<_Tp, _Abi>::_SimdBase
4889 {
4890 using _Traits = _SimdTraits<_Tp, _Abi>;
4891 using _MemberType = typename _Traits::_SimdMember;
4892 using _CastType = typename _Traits::_SimdCastType;
4893 static constexpr _Tp* _S_type_tag = nullptr;
4894 friend typename _Traits::_SimdBase;
4895
4896 public:
4897 using _Impl = typename _Traits::_SimdImpl;
4898 friend _Impl;
4899 friend _SimdIntOperators<simd, _Impl, true>;
4900
4901 using value_type = _Tp;
4902 using reference = _SmartReference<_MemberType, _Impl, value_type>;
4903 using mask_type = simd_mask<_Tp, _Abi>;
4904 using abi_type = _Abi;
4905
4906 static constexpr size_t size()
4907 { return __size_or_zero_v<_Tp, _Abi>; }
4908
4909 _GLIBCXX_SIMD_CONSTEXPR simd() = default;
4910 _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
4911 _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
4912 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
4913 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
4914
4915 // implicit broadcast constructor
4916 template <typename _Up,
4917 typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
4918 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4919 simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
4920 : _M_data(
4921 _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
4922 {}
4923
4924 // implicit type conversion constructor (convert from fixed_size to
4925 // fixed_size)
4926 template <typename _Up>
4927 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4928 simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
4929 enable_if_t<
4930 conjunction<
4931 is_same<simd_abi::fixed_size<size()>, abi_type>,
4932 negation<__is_narrowing_conversion<_Up, value_type>>,
4933 __converts_to_higher_integer_rank<_Up, value_type>>::value,
4934 void*> = nullptr)
4935 : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
4936
4937 // explicit type conversion constructor
4938#ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4939 template <typename _Up, typename _A2,
4940 typename = decltype(static_simd_cast<simd>(
4941 declval<const simd<_Up, _A2>&>()))>
4942 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4943 simd(const simd<_Up, _A2>& __x)
4944 : simd(static_simd_cast<simd>(__x)) {}
4945#endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
4946
4947 // generator constructor
4948 template <typename _Fp>
4949 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4950 simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
4951 declval<_SizeConstant<0>&>())),
4952 value_type>* = nullptr)
4953 : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
4954
4955 // load constructor
4956 template <typename _Up, typename _Flags>
4957 _GLIBCXX_SIMD_ALWAYS_INLINE
4958 simd(const _Up* __mem, _Flags)
4959 : _M_data(
4960 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
4961 {}
4962
4963 // loads [simd.load]
4964 template <typename _Up, typename _Flags>
4965 _GLIBCXX_SIMD_ALWAYS_INLINE void
4966 copy_from(const _Vectorizable<_Up>* __mem, _Flags)
4967 {
4968 _M_data = static_cast<decltype(_M_data)>(
4969 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
4970 }
4971
4972 // stores [simd.store]
4973 template <typename _Up, typename _Flags>
4974 _GLIBCXX_SIMD_ALWAYS_INLINE void
4975 copy_to(_Vectorizable<_Up>* __mem, _Flags) const
4976 {
4977 _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
4978 _S_type_tag);
4979 }
4980
4981 // scalar access
4982 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4983 operator[](size_t __i)
4984 { return {_M_data, int(__i)}; }
4985
4986 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4987 operator[]([[maybe_unused]] size_t __i) const
4988 {
4989 if constexpr (__is_scalar_abi<_Abi>())
4990 {
4991 _GLIBCXX_DEBUG_ASSERT(__i == 0);
4992 return _M_data;
4993 }
4994 else
4995 return _M_data[__i];
4996 }
4997
4998 // increment and decrement:
4999 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5000 operator++()
5001 {
5002 _Impl::_S_increment(_M_data);
5003 return *this;
5004 }
5005
5006 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5007 operator++(int)
5008 {
5009 simd __r = *this;
5010 _Impl::_S_increment(_M_data);
5011 return __r;
5012 }
5013
5014 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5015 operator--()
5016 {
5017 _Impl::_S_decrement(_M_data);
5018 return *this;
5019 }
5020
5021 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5022 operator--(int)
5023 {
5024 simd __r = *this;
5025 _Impl::_S_decrement(_M_data);
5026 return __r;
5027 }
5028
5029 // unary operators (for any _Tp)
5030 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5031 operator!() const
5032 { return {__private_init, _Impl::_S_negate(_M_data)}; }
5033
5034 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5035 operator+() const
5036 { return *this; }
5037
5038 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5039 operator-() const
5040 { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5041
5042 // access to internal representation (suggested extension)
5043 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5044 simd(_CastType __init) : _M_data(__init) {}
5045
5046 // compound assignment [simd.cassign]
5047 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5048 operator+=(simd& __lhs, const simd& __x)
5049 { return __lhs = __lhs + __x; }
5050
5051 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5052 operator-=(simd& __lhs, const simd& __x)
5053 { return __lhs = __lhs - __x; }
5054
5055 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5056 operator*=(simd& __lhs, const simd& __x)
5057 { return __lhs = __lhs * __x; }
5058
5059 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5060 operator/=(simd& __lhs, const simd& __x)
5061 { return __lhs = __lhs / __x; }
5062
5063 // binary operators [simd.binary]
5064 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5065 operator+(const simd& __x, const simd& __y)
5066 { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5067
5068 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5069 operator-(const simd& __x, const simd& __y)
5070 { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5071
5072 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5073 operator*(const simd& __x, const simd& __y)
5074 { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5075
5076 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5077 operator/(const simd& __x, const simd& __y)
5078 { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5079
5080 // compares [simd.comparison]
5081 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5082 operator==(const simd& __x, const simd& __y)
5083 { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5084
5085 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5086 operator!=(const simd& __x, const simd& __y)
5087 {
5088 return simd::_S_make_mask(
5089 _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5090 }
5091
5092 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5093 operator<(const simd& __x, const simd& __y)
5094 { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5095
5096 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5097 operator<=(const simd& __x, const simd& __y)
5098 {
5099 return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5100 }
5101
5102 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5103 operator>(const simd& __x, const simd& __y)
5104 { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5105
5106 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5107 operator>=(const simd& __x, const simd& __y)
5108 {
5109 return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5110 }
5111
5112 // operator?: overloads (suggested extension) {{{
5113#ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5114 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5115 operator?:(const mask_type& __k, const simd& __where_true,
5116 const simd& __where_false)
5117 {
5118 auto __ret = __where_false;
5119 _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5120 return __ret;
5121 }
5122
5123#endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5124 // }}}
5125
5126 // "private" because of the first arguments's namespace
5127 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5128 simd(_PrivateInit, const _MemberType& __init)
5129 : _M_data(__init) {}
5130
5131 // "private" because of the first arguments's namespace
5132 _GLIBCXX_SIMD_INTRINSIC
5133 simd(_BitsetInit, bitset<size()> __init) : _M_data()
5134 { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5135
5136 _GLIBCXX_SIMD_INTRINSIC constexpr bool
5137 _M_is_constprop() const
5138 {
5139 if constexpr (__is_scalar_abi<_Abi>())
5140 return __builtin_constant_p(_M_data);
5141 else
5142 return _M_data._M_is_constprop();
5143 }
5144
5145 private:
5146 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR static mask_type
5147 _S_make_mask(typename mask_type::_MemberType __k)
5148 { return {__private_init, __k}; }
5149
5150 friend const auto& __data<value_type, abi_type>(const simd&);
5151 friend auto& __data<value_type, abi_type>(simd&);
5152 alignas(_Traits::_S_simd_align) _MemberType _M_data;
5153 };
5154
5155// }}}
5156// __data {{{
5157template <typename _Tp, typename _Ap>
5158 _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5159 __data(const simd<_Tp, _Ap>& __x)
5160 { return __x._M_data; }
5161
5162template <typename _Tp, typename _Ap>
5163 _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5164 __data(simd<_Tp, _Ap>& __x)
5165 { return __x._M_data; }
5166
5167// }}}
5168namespace __float_bitwise_operators { //{{{
5169template <typename _Tp, typename _Ap>
5170 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5171 operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5172 {
5173 return {__private_init,
5174 _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))};
5175 }
5176
5177template <typename _Tp, typename _Ap>
5178 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5179 operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5180 {
5181 return {__private_init,
5182 _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))};
5183 }
5184
5185template <typename _Tp, typename _Ap>
5186 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5187 operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5188 {
5189 return {__private_init,
5190 _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))};
5191 }
5192} // namespace __float_bitwise_operators }}}
5193
5194_GLIBCXX_SIMD_END_NAMESPACE
5195
5196#endif // __cplusplus >= 201703L
5197#endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5198
5199// vim: foldmethod=marker foldmarker={{{,}}}
Note: See TracBrowser for help on using the repository browser.