source: Daodan/MSYS2/mingw32/include/c++/11.2.0/experimental/bits/simd_detail.h@ 1177

Last change on this file since 1177 was 1166, checked in by rossy, 3 years ago

Daodan: Replace MinGW build env with an up-to-date MSYS2 env

File size: 8.7 KB
Line 
1// Internal macros for the simd implementation -*- C++ -*-
2
3// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
27
28#if __cplusplus >= 201703L
29
30#include <cstddef>
31#include <cstdint>
32
33
34#define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
35 namespace std _GLIBCXX_VISIBILITY(default) \
36 { \
37 _GLIBCXX_BEGIN_NAMESPACE_VERSION \
38 namespace experimental { \
39 inline namespace parallelism_v2 {
40#define _GLIBCXX_SIMD_END_NAMESPACE \
41 } \
42 } \
43 _GLIBCXX_END_NAMESPACE_VERSION \
44 }
45
46// ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
47// macros ARM{{{
48#if defined __ARM_NEON
49#define _GLIBCXX_SIMD_HAVE_NEON 1
50#else
51#define _GLIBCXX_SIMD_HAVE_NEON 0
52#endif
53#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
54#define _GLIBCXX_SIMD_HAVE_NEON_A32 1
55#else
56#define _GLIBCXX_SIMD_HAVE_NEON_A32 0
57#endif
58#if defined __ARM_NEON && defined __aarch64__
59#define _GLIBCXX_SIMD_HAVE_NEON_A64 1
60#else
61#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
62#endif
63//}}}
64// x86{{{
65#ifdef __MMX__
66#define _GLIBCXX_SIMD_HAVE_MMX 1
67#else
68#define _GLIBCXX_SIMD_HAVE_MMX 0
69#endif
70#if defined __SSE__ || defined __x86_64__
71#define _GLIBCXX_SIMD_HAVE_SSE 1
72#else
73#define _GLIBCXX_SIMD_HAVE_SSE 0
74#endif
75#if defined __SSE2__ || defined __x86_64__
76#define _GLIBCXX_SIMD_HAVE_SSE2 1
77#else
78#define _GLIBCXX_SIMD_HAVE_SSE2 0
79#endif
80#ifdef __SSE3__
81#define _GLIBCXX_SIMD_HAVE_SSE3 1
82#else
83#define _GLIBCXX_SIMD_HAVE_SSE3 0
84#endif
85#ifdef __SSSE3__
86#define _GLIBCXX_SIMD_HAVE_SSSE3 1
87#else
88#define _GLIBCXX_SIMD_HAVE_SSSE3 0
89#endif
90#ifdef __SSE4_1__
91#define _GLIBCXX_SIMD_HAVE_SSE4_1 1
92#else
93#define _GLIBCXX_SIMD_HAVE_SSE4_1 0
94#endif
95#ifdef __SSE4_2__
96#define _GLIBCXX_SIMD_HAVE_SSE4_2 1
97#else
98#define _GLIBCXX_SIMD_HAVE_SSE4_2 0
99#endif
100#ifdef __XOP__
101#define _GLIBCXX_SIMD_HAVE_XOP 1
102#else
103#define _GLIBCXX_SIMD_HAVE_XOP 0
104#endif
105#ifdef __AVX__
106#define _GLIBCXX_SIMD_HAVE_AVX 1
107#else
108#define _GLIBCXX_SIMD_HAVE_AVX 0
109#endif
110#ifdef __AVX2__
111#define _GLIBCXX_SIMD_HAVE_AVX2 1
112#else
113#define _GLIBCXX_SIMD_HAVE_AVX2 0
114#endif
115#ifdef __BMI__
116#define _GLIBCXX_SIMD_HAVE_BMI1 1
117#else
118#define _GLIBCXX_SIMD_HAVE_BMI1 0
119#endif
120#ifdef __BMI2__
121#define _GLIBCXX_SIMD_HAVE_BMI2 1
122#else
123#define _GLIBCXX_SIMD_HAVE_BMI2 0
124#endif
125#ifdef __LZCNT__
126#define _GLIBCXX_SIMD_HAVE_LZCNT 1
127#else
128#define _GLIBCXX_SIMD_HAVE_LZCNT 0
129#endif
130#ifdef __SSE4A__
131#define _GLIBCXX_SIMD_HAVE_SSE4A 1
132#else
133#define _GLIBCXX_SIMD_HAVE_SSE4A 0
134#endif
135#ifdef __FMA__
136#define _GLIBCXX_SIMD_HAVE_FMA 1
137#else
138#define _GLIBCXX_SIMD_HAVE_FMA 0
139#endif
140#ifdef __FMA4__
141#define _GLIBCXX_SIMD_HAVE_FMA4 1
142#else
143#define _GLIBCXX_SIMD_HAVE_FMA4 0
144#endif
145#ifdef __F16C__
146#define _GLIBCXX_SIMD_HAVE_F16C 1
147#else
148#define _GLIBCXX_SIMD_HAVE_F16C 0
149#endif
150#ifdef __POPCNT__
151#define _GLIBCXX_SIMD_HAVE_POPCNT 1
152#else
153#define _GLIBCXX_SIMD_HAVE_POPCNT 0
154#endif
155#ifdef __AVX512F__
156#define _GLIBCXX_SIMD_HAVE_AVX512F 1
157#else
158#define _GLIBCXX_SIMD_HAVE_AVX512F 0
159#endif
160#ifdef __AVX512DQ__
161#define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
162#else
163#define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
164#endif
165#ifdef __AVX512VL__
166#define _GLIBCXX_SIMD_HAVE_AVX512VL 1
167#else
168#define _GLIBCXX_SIMD_HAVE_AVX512VL 0
169#endif
170#ifdef __AVX512BW__
171#define _GLIBCXX_SIMD_HAVE_AVX512BW 1
172#else
173#define _GLIBCXX_SIMD_HAVE_AVX512BW 0
174#endif
175
176#if _GLIBCXX_SIMD_HAVE_SSE
177#define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
178#else
179#define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
180#endif
181#if _GLIBCXX_SIMD_HAVE_SSE2
182#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
183#else
184#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
185#endif
186
187#if _GLIBCXX_SIMD_HAVE_AVX
188#define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
189#else
190#define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
191#endif
192#if _GLIBCXX_SIMD_HAVE_AVX2
193#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
194#else
195#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
196#endif
197
198#if _GLIBCXX_SIMD_HAVE_AVX512F
199#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
200#else
201#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
202#endif
203#if _GLIBCXX_SIMD_HAVE_AVX512BW
204#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
205#else
206#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
207#endif
208
209#if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
210#error "Use of SSE2 is required on AMD64"
211#endif
212//}}}
213
214#ifdef __clang__
215#define _GLIBCXX_SIMD_NORMAL_MATH
216#else
217#define _GLIBCXX_SIMD_NORMAL_MATH \
218 [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
219#endif
220#define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
221#define _GLIBCXX_SIMD_INTRINSIC \
222 [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
223#define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
224#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
225#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
226
227#if defined __STRICT_ANSI__ && __STRICT_ANSI__
228#define _GLIBCXX_SIMD_CONSTEXPR
229#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
230#else
231#define _GLIBCXX_SIMD_CONSTEXPR constexpr
232#define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
233#endif
234
235#if defined __clang__
236#define _GLIBCXX_SIMD_USE_CONSTEXPR const
237#else
238#define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
239#endif
240
241#define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
242#define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
243#define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
244 __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
245
246#define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
247 _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
248#define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
249 _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
250#define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
251 _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
252
253#ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
254#undef _GLIBCXX_SIMD_ALWAYS_INLINE
255#define _GLIBCXX_SIMD_ALWAYS_INLINE inline
256#undef _GLIBCXX_SIMD_INTRINSIC
257#define _GLIBCXX_SIMD_INTRINSIC inline
258#endif
259
260#if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
261#define _GLIBCXX_SIMD_X86INTRIN 1
262#else
263#define _GLIBCXX_SIMD_X86INTRIN 0
264#endif
265
266// workaround macros {{{
267// use aliasing loads to help GCC understand the data accesses better
268// This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
269// fixed_size_simd<float, 16> x.
270#define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
271
272// vector conversions on x86 not optimized:
273#if _GLIBCXX_SIMD_X86INTRIN
274#define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
275#endif
276
277// integer division not optimized
278#define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
279
280// very bad codegen for extraction and concatenation of 128/256 "subregisters"
281// with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
282#if _GLIBCXX_SIMD_X86INTRIN
283#define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
284#endif
285
286// bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
287#define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
288
289// bad codegen for zero-extend using simple concat(__x, 0)
290#if _GLIBCXX_SIMD_X86INTRIN
291#define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
292#endif
293
294// https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
295// of static_simd_cast)
296#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
297
298// https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
299// constraint on (static)_simd_cast)
300#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
301// }}}
302
303#endif // __cplusplus >= 201703L
304#endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
305
306// vim: foldmethod=marker
Note: See TracBrowser for help on using the repository browser.