1 | // Internal macros for the simd implementation -*- C++ -*-
|
---|
2 |
|
---|
3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
---|
4 | //
|
---|
5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
6 | // software; you can redistribute it and/or modify it under the
|
---|
7 | // terms of the GNU General Public License as published by the
|
---|
8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
9 | // any later version.
|
---|
10 |
|
---|
11 | // This library is distributed in the hope that it will be useful,
|
---|
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | // GNU General Public License for more details.
|
---|
15 |
|
---|
16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
18 | // 3.1, as published by the Free Software Foundation.
|
---|
19 |
|
---|
20 | // You should have received a copy of the GNU General Public License and
|
---|
21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
23 | // <http://www.gnu.org/licenses/>.
|
---|
24 |
|
---|
25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
|
---|
26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
|
---|
27 |
|
---|
28 | #if __cplusplus >= 201703L
|
---|
29 |
|
---|
30 | #include <cstddef>
|
---|
31 | #include <cstdint>
|
---|
32 |
|
---|
33 |
|
---|
34 | #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
|
---|
35 | namespace std _GLIBCXX_VISIBILITY(default) \
|
---|
36 | { \
|
---|
37 | _GLIBCXX_BEGIN_NAMESPACE_VERSION \
|
---|
38 | namespace experimental { \
|
---|
39 | inline namespace parallelism_v2 {
|
---|
40 | #define _GLIBCXX_SIMD_END_NAMESPACE \
|
---|
41 | } \
|
---|
42 | } \
|
---|
43 | _GLIBCXX_END_NAMESPACE_VERSION \
|
---|
44 | }
|
---|
45 |
|
---|
46 | // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
|
---|
47 | // macros ARM{{{
|
---|
48 | #if defined __ARM_NEON
|
---|
49 | #define _GLIBCXX_SIMD_HAVE_NEON 1
|
---|
50 | #else
|
---|
51 | #define _GLIBCXX_SIMD_HAVE_NEON 0
|
---|
52 | #endif
|
---|
53 | #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
|
---|
54 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 1
|
---|
55 | #else
|
---|
56 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 0
|
---|
57 | #endif
|
---|
58 | #if defined __ARM_NEON && defined __aarch64__
|
---|
59 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 1
|
---|
60 | #else
|
---|
61 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
|
---|
62 | #endif
|
---|
63 | //}}}
|
---|
64 | // x86{{{
|
---|
65 | #ifdef __MMX__
|
---|
66 | #define _GLIBCXX_SIMD_HAVE_MMX 1
|
---|
67 | #else
|
---|
68 | #define _GLIBCXX_SIMD_HAVE_MMX 0
|
---|
69 | #endif
|
---|
70 | #if defined __SSE__ || defined __x86_64__
|
---|
71 | #define _GLIBCXX_SIMD_HAVE_SSE 1
|
---|
72 | #else
|
---|
73 | #define _GLIBCXX_SIMD_HAVE_SSE 0
|
---|
74 | #endif
|
---|
75 | #if defined __SSE2__ || defined __x86_64__
|
---|
76 | #define _GLIBCXX_SIMD_HAVE_SSE2 1
|
---|
77 | #else
|
---|
78 | #define _GLIBCXX_SIMD_HAVE_SSE2 0
|
---|
79 | #endif
|
---|
80 | #ifdef __SSE3__
|
---|
81 | #define _GLIBCXX_SIMD_HAVE_SSE3 1
|
---|
82 | #else
|
---|
83 | #define _GLIBCXX_SIMD_HAVE_SSE3 0
|
---|
84 | #endif
|
---|
85 | #ifdef __SSSE3__
|
---|
86 | #define _GLIBCXX_SIMD_HAVE_SSSE3 1
|
---|
87 | #else
|
---|
88 | #define _GLIBCXX_SIMD_HAVE_SSSE3 0
|
---|
89 | #endif
|
---|
90 | #ifdef __SSE4_1__
|
---|
91 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 1
|
---|
92 | #else
|
---|
93 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 0
|
---|
94 | #endif
|
---|
95 | #ifdef __SSE4_2__
|
---|
96 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 1
|
---|
97 | #else
|
---|
98 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 0
|
---|
99 | #endif
|
---|
100 | #ifdef __XOP__
|
---|
101 | #define _GLIBCXX_SIMD_HAVE_XOP 1
|
---|
102 | #else
|
---|
103 | #define _GLIBCXX_SIMD_HAVE_XOP 0
|
---|
104 | #endif
|
---|
105 | #ifdef __AVX__
|
---|
106 | #define _GLIBCXX_SIMD_HAVE_AVX 1
|
---|
107 | #else
|
---|
108 | #define _GLIBCXX_SIMD_HAVE_AVX 0
|
---|
109 | #endif
|
---|
110 | #ifdef __AVX2__
|
---|
111 | #define _GLIBCXX_SIMD_HAVE_AVX2 1
|
---|
112 | #else
|
---|
113 | #define _GLIBCXX_SIMD_HAVE_AVX2 0
|
---|
114 | #endif
|
---|
115 | #ifdef __BMI__
|
---|
116 | #define _GLIBCXX_SIMD_HAVE_BMI1 1
|
---|
117 | #else
|
---|
118 | #define _GLIBCXX_SIMD_HAVE_BMI1 0
|
---|
119 | #endif
|
---|
120 | #ifdef __BMI2__
|
---|
121 | #define _GLIBCXX_SIMD_HAVE_BMI2 1
|
---|
122 | #else
|
---|
123 | #define _GLIBCXX_SIMD_HAVE_BMI2 0
|
---|
124 | #endif
|
---|
125 | #ifdef __LZCNT__
|
---|
126 | #define _GLIBCXX_SIMD_HAVE_LZCNT 1
|
---|
127 | #else
|
---|
128 | #define _GLIBCXX_SIMD_HAVE_LZCNT 0
|
---|
129 | #endif
|
---|
130 | #ifdef __SSE4A__
|
---|
131 | #define _GLIBCXX_SIMD_HAVE_SSE4A 1
|
---|
132 | #else
|
---|
133 | #define _GLIBCXX_SIMD_HAVE_SSE4A 0
|
---|
134 | #endif
|
---|
135 | #ifdef __FMA__
|
---|
136 | #define _GLIBCXX_SIMD_HAVE_FMA 1
|
---|
137 | #else
|
---|
138 | #define _GLIBCXX_SIMD_HAVE_FMA 0
|
---|
139 | #endif
|
---|
140 | #ifdef __FMA4__
|
---|
141 | #define _GLIBCXX_SIMD_HAVE_FMA4 1
|
---|
142 | #else
|
---|
143 | #define _GLIBCXX_SIMD_HAVE_FMA4 0
|
---|
144 | #endif
|
---|
145 | #ifdef __F16C__
|
---|
146 | #define _GLIBCXX_SIMD_HAVE_F16C 1
|
---|
147 | #else
|
---|
148 | #define _GLIBCXX_SIMD_HAVE_F16C 0
|
---|
149 | #endif
|
---|
150 | #ifdef __POPCNT__
|
---|
151 | #define _GLIBCXX_SIMD_HAVE_POPCNT 1
|
---|
152 | #else
|
---|
153 | #define _GLIBCXX_SIMD_HAVE_POPCNT 0
|
---|
154 | #endif
|
---|
155 | #ifdef __AVX512F__
|
---|
156 | #define _GLIBCXX_SIMD_HAVE_AVX512F 1
|
---|
157 | #else
|
---|
158 | #define _GLIBCXX_SIMD_HAVE_AVX512F 0
|
---|
159 | #endif
|
---|
160 | #ifdef __AVX512DQ__
|
---|
161 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
|
---|
162 | #else
|
---|
163 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
|
---|
164 | #endif
|
---|
165 | #ifdef __AVX512VL__
|
---|
166 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 1
|
---|
167 | #else
|
---|
168 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 0
|
---|
169 | #endif
|
---|
170 | #ifdef __AVX512BW__
|
---|
171 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 1
|
---|
172 | #else
|
---|
173 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
|
---|
174 | #endif
|
---|
175 |
|
---|
176 | #if _GLIBCXX_SIMD_HAVE_SSE
|
---|
177 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
|
---|
178 | #else
|
---|
179 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
|
---|
180 | #endif
|
---|
181 | #if _GLIBCXX_SIMD_HAVE_SSE2
|
---|
182 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
|
---|
183 | #else
|
---|
184 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
|
---|
185 | #endif
|
---|
186 |
|
---|
187 | #if _GLIBCXX_SIMD_HAVE_AVX
|
---|
188 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
|
---|
189 | #else
|
---|
190 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
|
---|
191 | #endif
|
---|
192 | #if _GLIBCXX_SIMD_HAVE_AVX2
|
---|
193 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
|
---|
194 | #else
|
---|
195 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
|
---|
196 | #endif
|
---|
197 |
|
---|
198 | #if _GLIBCXX_SIMD_HAVE_AVX512F
|
---|
199 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
|
---|
200 | #else
|
---|
201 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
|
---|
202 | #endif
|
---|
203 | #if _GLIBCXX_SIMD_HAVE_AVX512BW
|
---|
204 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
|
---|
205 | #else
|
---|
206 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
|
---|
207 | #endif
|
---|
208 |
|
---|
209 | #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
|
---|
210 | #error "Use of SSE2 is required on AMD64"
|
---|
211 | #endif
|
---|
212 | //}}}
|
---|
213 |
|
---|
214 | #ifdef __clang__
|
---|
215 | #define _GLIBCXX_SIMD_NORMAL_MATH
|
---|
216 | #else
|
---|
217 | #define _GLIBCXX_SIMD_NORMAL_MATH \
|
---|
218 | [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
|
---|
219 | #endif
|
---|
220 | #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
|
---|
221 | #define _GLIBCXX_SIMD_INTRINSIC \
|
---|
222 | [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
|
---|
223 | #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
|
---|
224 | #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
|
---|
225 | #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
|
---|
226 |
|
---|
227 | #if defined __STRICT_ANSI__ && __STRICT_ANSI__
|
---|
228 | #define _GLIBCXX_SIMD_CONSTEXPR
|
---|
229 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
|
---|
230 | #else
|
---|
231 | #define _GLIBCXX_SIMD_CONSTEXPR constexpr
|
---|
232 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
|
---|
233 | #endif
|
---|
234 |
|
---|
235 | #if defined __clang__
|
---|
236 | #define _GLIBCXX_SIMD_USE_CONSTEXPR const
|
---|
237 | #else
|
---|
238 | #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
|
---|
239 | #endif
|
---|
240 |
|
---|
241 | #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
|
---|
242 | #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
|
---|
243 | #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
|
---|
244 | __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
|
---|
245 |
|
---|
246 | #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
|
---|
247 | _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
|
---|
248 | #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
|
---|
249 | _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
|
---|
250 | #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
|
---|
251 | _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
|
---|
252 |
|
---|
253 | #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
|
---|
254 | #undef _GLIBCXX_SIMD_ALWAYS_INLINE
|
---|
255 | #define _GLIBCXX_SIMD_ALWAYS_INLINE inline
|
---|
256 | #undef _GLIBCXX_SIMD_INTRINSIC
|
---|
257 | #define _GLIBCXX_SIMD_INTRINSIC inline
|
---|
258 | #endif
|
---|
259 |
|
---|
260 | #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
|
---|
261 | #define _GLIBCXX_SIMD_X86INTRIN 1
|
---|
262 | #else
|
---|
263 | #define _GLIBCXX_SIMD_X86INTRIN 0
|
---|
264 | #endif
|
---|
265 |
|
---|
266 | // workaround macros {{{
|
---|
267 | // use aliasing loads to help GCC understand the data accesses better
|
---|
268 | // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
|
---|
269 | // fixed_size_simd<float, 16> x.
|
---|
270 | #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
|
---|
271 |
|
---|
272 | // vector conversions on x86 not optimized:
|
---|
273 | #if _GLIBCXX_SIMD_X86INTRIN
|
---|
274 | #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
|
---|
275 | #endif
|
---|
276 |
|
---|
277 | // integer division not optimized
|
---|
278 | #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
|
---|
279 |
|
---|
280 | // very bad codegen for extraction and concatenation of 128/256 "subregisters"
|
---|
281 | // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
|
---|
282 | #if _GLIBCXX_SIMD_X86INTRIN
|
---|
283 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
|
---|
284 | #endif
|
---|
285 |
|
---|
286 | // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
|
---|
287 | #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
|
---|
288 |
|
---|
289 | // bad codegen for zero-extend using simple concat(__x, 0)
|
---|
290 | #if _GLIBCXX_SIMD_X86INTRIN
|
---|
291 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
|
---|
292 | #endif
|
---|
293 |
|
---|
294 | // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
|
---|
295 | // of static_simd_cast)
|
---|
296 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
|
---|
297 |
|
---|
298 | // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
|
---|
299 | // constraint on (static)_simd_cast)
|
---|
300 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
|
---|
301 | // }}}
|
---|
302 |
|
---|
303 | #endif // __cplusplus >= 201703L
|
---|
304 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
|
---|
305 |
|
---|
306 | // vim: foldmethod=marker
|
---|