[1166] | 1 | // Simd PowerPC specific implementations -*- C++ -*-
|
---|
| 2 |
|
---|
| 3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
---|
| 4 | //
|
---|
| 5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
| 6 | // software; you can redistribute it and/or modify it under the
|
---|
| 7 | // terms of the GNU General Public License as published by the
|
---|
| 8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
| 9 | // any later version.
|
---|
| 10 |
|
---|
| 11 | // This library is distributed in the hope that it will be useful,
|
---|
| 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | // GNU General Public License for more details.
|
---|
| 15 |
|
---|
| 16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
| 17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
| 18 | // 3.1, as published by the Free Software Foundation.
|
---|
| 19 |
|
---|
| 20 | // You should have received a copy of the GNU General Public License and
|
---|
| 21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
| 22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
| 23 | // <http://www.gnu.org/licenses/>.
|
---|
| 24 |
|
---|
| 25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
|
---|
| 26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
|
---|
| 27 |
|
---|
| 28 | #if __cplusplus >= 201703L
|
---|
| 29 |
|
---|
| 30 | #ifndef __ALTIVEC__
|
---|
| 31 | #error "simd_ppc.h may only be included when AltiVec/VMX is available"
|
---|
| 32 | #endif
|
---|
| 33 | #include <altivec.h>
|
---|
| 34 |
|
---|
| 35 | _GLIBCXX_SIMD_BEGIN_NAMESPACE
|
---|
| 36 |
|
---|
| 37 | // _SimdImplPpc {{{
|
---|
| 38 | template <typename _Abi>
|
---|
| 39 | struct _SimdImplPpc : _SimdImplBuiltin<_Abi>
|
---|
| 40 | {
|
---|
| 41 | using _Base = _SimdImplBuiltin<_Abi>;
|
---|
| 42 |
|
---|
| 43 | // Byte and halfword shift instructions on PPC only consider the low 3 or 4
|
---|
| 44 | // bits of the RHS. Consequently, shifting by sizeof(_Tp)*CHAR_BIT (or more)
|
---|
| 45 | // is UB without extra measures. To match scalar behavior, byte and halfword
|
---|
| 46 | // shifts need an extra fixup step.
|
---|
| 47 |
|
---|
| 48 | // _S_bit_shift_left {{{
|
---|
| 49 | template <typename _Tp, size_t _Np>
|
---|
| 50 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
---|
| 51 | _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
|
---|
| 52 | {
|
---|
| 53 | __x = _Base::_S_bit_shift_left(__x, __y);
|
---|
| 54 | if constexpr (sizeof(_Tp) < sizeof(int))
|
---|
| 55 | __x._M_data
|
---|
| 56 | = (__y._M_data < sizeof(_Tp) * __CHAR_BIT__) & __x._M_data;
|
---|
| 57 | return __x;
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | template <typename _Tp, size_t _Np>
|
---|
| 61 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
---|
| 62 | _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
|
---|
| 63 | {
|
---|
| 64 | __x = _Base::_S_bit_shift_left(__x, __y);
|
---|
| 65 | if constexpr (sizeof(_Tp) < sizeof(int))
|
---|
| 66 | {
|
---|
| 67 | if (__y >= sizeof(_Tp) * __CHAR_BIT__)
|
---|
| 68 | return {};
|
---|
| 69 | }
|
---|
| 70 | return __x;
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | // }}}
|
---|
| 74 | // _S_bit_shift_right {{{
|
---|
| 75 | template <typename _Tp, size_t _Np>
|
---|
| 76 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
---|
| 77 | _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
|
---|
| 78 | {
|
---|
| 79 | if constexpr (sizeof(_Tp) < sizeof(int))
|
---|
| 80 | {
|
---|
| 81 | constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
|
---|
| 82 | if constexpr (is_unsigned_v<_Tp>)
|
---|
| 83 | return (__y._M_data < __nbits)
|
---|
| 84 | & _Base::_S_bit_shift_right(__x, __y)._M_data;
|
---|
| 85 | else
|
---|
| 86 | {
|
---|
| 87 | _Base::_S_masked_assign(_SimdWrapper<_Tp, _Np>(__y._M_data
|
---|
| 88 | >= __nbits),
|
---|
| 89 | __y, __nbits - 1);
|
---|
| 90 | return _Base::_S_bit_shift_right(__x, __y);
|
---|
| 91 | }
|
---|
| 92 | }
|
---|
| 93 | else
|
---|
| 94 | return _Base::_S_bit_shift_right(__x, __y);
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 | template <typename _Tp, size_t _Np>
|
---|
| 98 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
|
---|
| 99 | _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
|
---|
| 100 | {
|
---|
| 101 | if constexpr (sizeof(_Tp) < sizeof(int))
|
---|
| 102 | {
|
---|
| 103 | constexpr int __nbits = sizeof(_Tp) * __CHAR_BIT__;
|
---|
| 104 | if (__y >= __nbits)
|
---|
| 105 | {
|
---|
| 106 | if constexpr (is_unsigned_v<_Tp>)
|
---|
| 107 | return {};
|
---|
| 108 | else
|
---|
| 109 | return _Base::_S_bit_shift_right(__x, __nbits - 1);
|
---|
| 110 | }
|
---|
| 111 | }
|
---|
| 112 | return _Base::_S_bit_shift_right(__x, __y);
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | // }}}
|
---|
| 116 | };
|
---|
| 117 |
|
---|
| 118 | // }}}
|
---|
| 119 | // _MaskImplPpc {{{
|
---|
| 120 | template <typename _Abi>
|
---|
| 121 | struct _MaskImplPpc : _MaskImplBuiltin<_Abi>
|
---|
| 122 | {
|
---|
| 123 | using _Base = _MaskImplBuiltin<_Abi>;
|
---|
| 124 |
|
---|
| 125 | // _S_popcount {{{
|
---|
| 126 | template <typename _Tp>
|
---|
| 127 | _GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k)
|
---|
| 128 | {
|
---|
| 129 | const auto __kv = __as_vector(__k);
|
---|
| 130 | if constexpr (__have_power10vec)
|
---|
| 131 | {
|
---|
| 132 | return vec_cntm(__to_intrin(__kv), 1);
|
---|
| 133 | }
|
---|
| 134 | else if constexpr (sizeof(_Tp) >= sizeof(int))
|
---|
| 135 | {
|
---|
| 136 | using _Intrin = __intrinsic_type16_t<int>;
|
---|
| 137 | const int __sum = -vec_sums(__intrin_bitcast<_Intrin>(__kv), _Intrin())[3];
|
---|
| 138 | return __sum / (sizeof(_Tp) / sizeof(int));
|
---|
| 139 | }
|
---|
| 140 | else
|
---|
| 141 | {
|
---|
| 142 | const auto __summed_to_int = vec_sum4s(__to_intrin(__kv), __intrinsic_type16_t<int>());
|
---|
| 143 | return -vec_sums(__summed_to_int, __intrinsic_type16_t<int>())[3];
|
---|
| 144 | }
|
---|
| 145 | }
|
---|
| 146 |
|
---|
| 147 | // }}}
|
---|
| 148 | };
|
---|
| 149 |
|
---|
| 150 | // }}}
|
---|
| 151 |
|
---|
| 152 | _GLIBCXX_SIMD_END_NAMESPACE
|
---|
| 153 | #endif // __cplusplus >= 201703L
|
---|
| 154 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
|
---|
| 155 |
|
---|
| 156 | // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100
|
---|