source: Daodan/MSYS2/mingw32/lib/gcc/i686-w64-mingw32/11.2.0/include/avx512vbmi2vlintrin.h@ 1174

Last change on this file since 1174 was 1166, checked in by rossy, 3 years ago

Daodan: Replace MinGW build env with an up-to-date MSYS2 env

File size: 36.2 KB
RevLine 
[1166]1/* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
29#define _AVX512VBMI2VLINTRIN_H_INCLUDED
30
31#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
32#pragma GCC push_options
33#pragma GCC target("avx512vbmi2,avx512vl")
34#define __DISABLE_AVX512VBMI2VL__
35#endif /* __AVX512VBMIVL__ */
36
37extern __inline __m128i
38__attribute__((__gnu_inline__, __always_inline__, __artificial__))
39_mm_mask_compress_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
40{
41 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi)__C,
42 (__v16qi)__A, (__mmask16)__B);
43}
44
45extern __inline __m128i
46__attribute__((__gnu_inline__, __always_inline__, __artificial__))
47_mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B)
48{
49 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B,
50 (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
51}
52
53
54extern __inline void
55__attribute__((__gnu_inline__, __always_inline__, __artificial__))
56_mm256_mask_compressstoreu_epi16 (void * __A, __mmask16 __B, __m256i __C)
57{
58 __builtin_ia32_compressstoreuhi256_mask ((__v16hi *) __A, (__v16hi) __C,
59 (__mmask16) __B);
60}
61
62extern __inline __m128i
63__attribute__((__gnu_inline__, __always_inline__, __artificial__))
64_mm_mask_compress_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
65{
66 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi)__C, (__v8hi)__A,
67 (__mmask8)__B);
68}
69
70extern __inline __m128i
71__attribute__((__gnu_inline__, __always_inline__, __artificial__))
72_mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B)
73{
74 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B,
75 (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
76}
77
78extern __inline __m256i
79__attribute__((__gnu_inline__, __always_inline__, __artificial__))
80_mm256_mask_compress_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
81{
82 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi)__C,
83 (__v16hi)__A, (__mmask16)__B);
84}
85
86extern __inline __m256i
87__attribute__((__gnu_inline__, __always_inline__, __artificial__))
88_mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B)
89{
90 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B,
91 (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
92}
93
94extern __inline void
95__attribute__((__gnu_inline__, __always_inline__, __artificial__))
96_mm_mask_compressstoreu_epi8 (void * __A, __mmask16 __B, __m128i __C)
97{
98 __builtin_ia32_compressstoreuqi128_mask ((__v16qi *) __A, (__v16qi) __C,
99 (__mmask16) __B);
100}
101
102extern __inline void
103__attribute__((__gnu_inline__, __always_inline__, __artificial__))
104_mm_mask_compressstoreu_epi16 (void * __A, __mmask8 __B, __m128i __C)
105{
106 __builtin_ia32_compressstoreuhi128_mask ((__v8hi *) __A, (__v8hi) __C,
107 (__mmask8) __B);
108}
109
110extern __inline __m128i
111__attribute__((__gnu_inline__, __always_inline__, __artificial__))
112_mm_mask_expand_epi8 (__m128i __A, __mmask16 __B, __m128i __C)
113{
114 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __C,
115 (__v16qi) __A,
116 (__mmask16) __B);
117}
118
119extern __inline __m128i
120__attribute__((__gnu_inline__, __always_inline__, __artificial__))
121_mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B)
122{
123 return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B,
124 (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
125}
126
127extern __inline __m128i
128__attribute__((__gnu_inline__, __always_inline__, __artificial__))
129_mm_mask_expandloadu_epi8 (__m128i __A, __mmask16 __B, const void * __C)
130{
131 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *) __C,
132 (__v16qi) __A, (__mmask16) __B);
133}
134
135extern __inline __m128i
136__attribute__((__gnu_inline__, __always_inline__, __artificial__))
137_mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B)
138{
139 return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B,
140 (__v16qi) _mm_setzero_si128 (), (__mmask16) __A);
141}
142
143extern __inline __m128i
144__attribute__((__gnu_inline__, __always_inline__, __artificial__))
145_mm_mask_expand_epi16 (__m128i __A, __mmask8 __B, __m128i __C)
146{
147 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __C,
148 (__v8hi) __A,
149 (__mmask8) __B);
150}
151
152extern __inline __m128i
153__attribute__((__gnu_inline__, __always_inline__, __artificial__))
154_mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B)
155{
156 return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B,
157 (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
158}
159
160extern __inline __m128i
161__attribute__((__gnu_inline__, __always_inline__, __artificial__))
162_mm_mask_expandloadu_epi16 (__m128i __A, __mmask8 __B, const void * __C)
163{
164 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *) __C,
165 (__v8hi) __A, (__mmask8) __B);
166}
167
168extern __inline __m128i
169__attribute__((__gnu_inline__, __always_inline__, __artificial__))
170_mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B)
171{
172 return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B,
173 (__v8hi) _mm_setzero_si128 (), (__mmask8) __A);
174}
175extern __inline __m256i
176__attribute__((__gnu_inline__, __always_inline__, __artificial__))
177_mm256_mask_expand_epi16 (__m256i __A, __mmask16 __B, __m256i __C)
178{
179 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __C,
180 (__v16hi) __A,
181 (__mmask16) __B);
182}
183
184extern __inline __m256i
185__attribute__((__gnu_inline__, __always_inline__, __artificial__))
186_mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B)
187{
188 return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B,
189 (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
190}
191
192extern __inline __m256i
193__attribute__((__gnu_inline__, __always_inline__, __artificial__))
194_mm256_mask_expandloadu_epi16 (__m256i __A, __mmask16 __B, const void * __C)
195{
196 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *) __C,
197 (__v16hi) __A, (__mmask16) __B);
198}
199
200extern __inline __m256i
201__attribute__((__gnu_inline__, __always_inline__, __artificial__))
202_mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B)
203{
204 return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B,
205 (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A);
206}
207
208#ifdef __OPTIMIZE__
209extern __inline __m256i
210__attribute__((__gnu_inline__, __always_inline__, __artificial__))
211_mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C)
212{
213 return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B,
214 __C);
215}
216
217extern __inline __m256i
218__attribute__((__gnu_inline__, __always_inline__, __artificial__))
219_mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
220 int __E)
221{
222 return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C,
223 (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
224}
225
226extern __inline __m256i
227__attribute__((__gnu_inline__, __always_inline__, __artificial__))
228_mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
229{
230 return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B,
231 (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
232}
233
234extern __inline __m256i
235__attribute__((__gnu_inline__, __always_inline__, __artificial__))
236_mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
237 int __E)
238{
239 return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D,
240 __E, (__v8si) __A, (__mmask8)__B);
241}
242
243extern __inline __m256i
244__attribute__((__gnu_inline__, __always_inline__, __artificial__))
245_mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
246{
247 return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C,
248 __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
249}
250
251extern __inline __m256i
252__attribute__((__gnu_inline__, __always_inline__, __artificial__))
253_mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C)
254{
255 return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C);
256}
257
258extern __inline __m256i
259__attribute__((__gnu_inline__, __always_inline__, __artificial__))
260_mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
261 int __E)
262{
263 return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D,
264 __E, (__v4di) __A, (__mmask8)__B);
265}
266
267extern __inline __m256i
268__attribute__((__gnu_inline__, __always_inline__, __artificial__))
269_mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
270{
271 return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C,
272 __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
273}
274
275extern __inline __m256i
276__attribute__((__gnu_inline__, __always_inline__, __artificial__))
277_mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C)
278{
279 return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C);
280}
281
282extern __inline __m128i
283__attribute__((__gnu_inline__, __always_inline__, __artificial__))
284_mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
285 int __E)
286{
287 return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
288 __E, (__v8hi) __A, (__mmask8)__B);
289}
290
291extern __inline __m128i
292__attribute__((__gnu_inline__, __always_inline__, __artificial__))
293_mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
294{
295 return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
296 __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
297}
298
299extern __inline __m128i
300__attribute__((__gnu_inline__, __always_inline__, __artificial__))
301_mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C)
302{
303 return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
304}
305
306extern __inline __m128i
307__attribute__((__gnu_inline__, __always_inline__, __artificial__))
308_mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
309 int __E)
310{
311 return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D,
312 __E, (__v4si) __A, (__mmask8)__B);
313}
314
315extern __inline __m128i
316__attribute__((__gnu_inline__, __always_inline__, __artificial__))
317_mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
318{
319 return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C,
320 __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
321}
322
323extern __inline __m128i
324__attribute__((__gnu_inline__, __always_inline__, __artificial__))
325_mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C)
326{
327 return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C);
328}
329
330extern __inline __m128i
331__attribute__((__gnu_inline__, __always_inline__, __artificial__))
332_mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
333 int __E)
334{
335 return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D,
336 __E, (__v2di) __A, (__mmask8)__B);
337}
338
339extern __inline __m128i
340__attribute__((__gnu_inline__, __always_inline__, __artificial__))
341_mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
342{
343 return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C,
344 __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
345}
346
347extern __inline __m128i
348__attribute__((__gnu_inline__, __always_inline__, __artificial__))
349_mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C)
350{
351 return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C);
352}
353
354extern __inline __m256i
355__attribute__((__gnu_inline__, __always_inline__, __artificial__))
356_mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C)
357{
358 return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B,
359 __C);
360}
361
362extern __inline __m256i
363__attribute__((__gnu_inline__, __always_inline__, __artificial__))
364_mm256_mask_shldi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
365 int __E)
366{
367 return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__C,
368 (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
369}
370
371extern __inline __m256i
372__attribute__((__gnu_inline__, __always_inline__, __artificial__))
373_mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
374{
375 return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B,
376 (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
377}
378
379extern __inline __m256i
380__attribute__((__gnu_inline__, __always_inline__, __artificial__))
381_mm256_mask_shldi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
382 int __E)
383{
384 return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__C, (__v8si) __D,
385 __E, (__v8si) __A, (__mmask8)__B);
386}
387
388extern __inline __m256i
389__attribute__((__gnu_inline__, __always_inline__, __artificial__))
390_mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
391{
392 return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C,
393 __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
394}
395
396extern __inline __m256i
397__attribute__((__gnu_inline__, __always_inline__, __artificial__))
398_mm256_shldi_epi32 (__m256i __A, __m256i __B, int __C)
399{
400 return (__m256i) __builtin_ia32_vpshld_v8si ((__v8si)__A, (__v8si) __B, __C);
401}
402
403extern __inline __m256i
404__attribute__((__gnu_inline__, __always_inline__, __artificial__))
405_mm256_mask_shldi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
406 int __E)
407{
408 return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__C, (__v4di) __D,
409 __E, (__v4di) __A, (__mmask8)__B);
410}
411
412extern __inline __m256i
413__attribute__((__gnu_inline__, __always_inline__, __artificial__))
414_mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
415{
416 return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C,
417 __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
418}
419
420extern __inline __m256i
421__attribute__((__gnu_inline__, __always_inline__, __artificial__))
422_mm256_shldi_epi64 (__m256i __A, __m256i __B, int __C)
423{
424 return (__m256i) __builtin_ia32_vpshld_v4di ((__v4di)__A, (__v4di) __B, __C);
425}
426
427extern __inline __m128i
428__attribute__((__gnu_inline__, __always_inline__, __artificial__))
429_mm_mask_shldi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
430 int __E)
431{
432 return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
433 __E, (__v8hi) __A, (__mmask8)__B);
434}
435
436extern __inline __m128i
437__attribute__((__gnu_inline__, __always_inline__, __artificial__))
438_mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
439{
440 return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
441 __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
442}
443
444extern __inline __m128i
445__attribute__((__gnu_inline__, __always_inline__, __artificial__))
446_mm_shldi_epi16 (__m128i __A, __m128i __B, int __C)
447{
448 return (__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
449}
450
451extern __inline __m128i
452__attribute__((__gnu_inline__, __always_inline__, __artificial__))
453_mm_mask_shldi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
454 int __E)
455{
456 return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__C, (__v4si) __D,
457 __E, (__v4si) __A, (__mmask8)__B);
458}
459
460extern __inline __m128i
461__attribute__((__gnu_inline__, __always_inline__, __artificial__))
462_mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
463{
464 return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C,
465 __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
466}
467
468extern __inline __m128i
469__attribute__((__gnu_inline__, __always_inline__, __artificial__))
470_mm_shldi_epi32 (__m128i __A, __m128i __B, int __C)
471{
472 return (__m128i) __builtin_ia32_vpshld_v4si ((__v4si)__A, (__v4si) __B, __C);
473}
474
475extern __inline __m128i
476__attribute__((__gnu_inline__, __always_inline__, __artificial__))
477_mm_mask_shldi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
478 int __E)
479{
480 return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__C, (__v2di) __D,
481 __E, (__v2di) __A, (__mmask8)__B);
482}
483
484extern __inline __m128i
485__attribute__((__gnu_inline__, __always_inline__, __artificial__))
486_mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
487{
488 return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C,
489 __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
490}
491
492extern __inline __m128i
493__attribute__((__gnu_inline__, __always_inline__, __artificial__))
494_mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
495{
496 return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C);
497}
498#else
499#define _mm256_shrdi_epi16(A, B, C) \
500 ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \
501 (__v16hi)(__m256i)(B),(int)(C)))
502#define _mm256_mask_shrdi_epi16(A, B, C, D, E) \
503 ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \
504 (__v16hi)(__m256i)(D), \
505 (int)(E), \
506 (__v16hi)(__m256i)(A), \
507 (__mmask16)(B)))
508#define _mm256_maskz_shrdi_epi16(A, B, C, D) \
509 ((__m256i) \
510 __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B), \
511 (__v16hi)(__m256i)(C),(int)(D), \
512 (__v16hi)(__m256i)_mm256_setzero_si256 (), \
513 (__mmask16)(A)))
514#define _mm256_shrdi_epi32(A, B, C) \
515 ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
516 (__v8si)(__m256i)(B),(int)(C)))
517#define _mm256_mask_shrdi_epi32(A, B, C, D, E) \
518 ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \
519 (__v8si)(__m256i)(D), \
520 (int)(E), \
521 (__v8si)(__m256i)(A), \
522 (__mmask8)(B)))
523#define _mm256_maskz_shrdi_epi32(A, B, C, D) \
524 ((__m256i) \
525 __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B), \
526 (__v8si)(__m256i)(C),(int)(D), \
527 (__v8si)(__m256i)_mm256_setzero_si256 (), \
528 (__mmask8)(A)))
529#define _mm256_shrdi_epi64(A, B, C) \
530 ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
531 (__v4di)(__m256i)(B),(int)(C)))
532#define _mm256_mask_shrdi_epi64(A, B, C, D, E) \
533 ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \
534 (__v4di)(__m256i)(D), (int)(E), \
535 (__v4di)(__m256i)(A), \
536 (__mmask8)(B)))
537#define _mm256_maskz_shrdi_epi64(A, B, C, D) \
538 ((__m256i) \
539 __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B), \
540 (__v4di)(__m256i)(C),(int)(D), \
541 (__v4di)(__m256i)_mm256_setzero_si256 (), \
542 (__mmask8)(A)))
543#define _mm_shrdi_epi16(A, B, C) \
544 ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
545 (__v8hi)(__m128i)(B),(int)(C)))
546#define _mm_mask_shrdi_epi16(A, B, C, D, E) \
547 ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \
548 (__v8hi)(__m128i)(D), (int)(E), \
549 (__v8hi)(__m128i)(A), \
550 (__mmask8)(B)))
551#define _mm_maskz_shrdi_epi16(A, B, C, D) \
552 ((__m128i) \
553 __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B), \
554 (__v8hi)(__m128i)(C),(int)(D), \
555 (__v8hi)(__m128i)_mm_setzero_si128 (), \
556 (__mmask8)(A)))
557#define _mm_shrdi_epi32(A, B, C) \
558 ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
559 (__v4si)(__m128i)(B),(int)(C)))
560#define _mm_mask_shrdi_epi32(A, B, C, D, E) \
561 ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
562 (__v4si)(__m128i)(D), (int)(E), \
563 (__v4si)(__m128i)(A), \
564 (__mmask8)(B)))
565#define _mm_maskz_shrdi_epi32(A, B, C, D) \
566 ((__m128i) \
567 __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
568 (__v4si)(__m128i)(C),(int)(D), \
569 (__v4si)(__m128i)_mm_setzero_si128 (), \
570 (__mmask8)(A)))
571#define _mm_shrdi_epi64(A, B, C) \
572 ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
573 (__v2di)(__m128i)(B),(int)(C)))
574#define _mm_mask_shrdi_epi64(A, B, C, D, E) \
575 ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \
576 (__v2di)(__m128i)(D), (int)(E), \
577 (__v2di)(__m128i)(A), \
578 (__mmask8)(B)))
579#define _mm_maskz_shrdi_epi64(A, B, C, D) \
580 ((__m128i) \
581 __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B), \
582 (__v2di)(__m128i)(C),(int)(D), \
583 (__v2di)(__m128i)_mm_setzero_si128 (), \
584 (__mmask8)(A)))
585#define _mm256_shldi_epi16(A, B, C) \
586 ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
587 (__v16hi)(__m256i)(B),(int)(C)))
588#define _mm256_mask_shldi_epi16(A, B, C, D, E) \
589 ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \
590 (__v16hi)(__m256i)(D), \
591 (int)(E), \
592 (__v16hi)(__m256i)(A), \
593 (__mmask16)(B)))
594#define _mm256_maskz_shldi_epi16(A, B, C, D) \
595 ((__m256i) \
596 __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B), \
597 (__v16hi)(__m256i)(C),(int)(D), \
598 (__v16hi)(__m256i)_mm256_setzero_si256 (), \
599 (__mmask16)(A)))
600#define _mm256_shldi_epi32(A, B, C) \
601 ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \
602 (__v8si)(__m256i)(B),(int)(C)))
603#define _mm256_mask_shldi_epi32(A, B, C, D, E) \
604 ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \
605 (__v8si)(__m256i)(D), (int)(E), \
606 (__v8si)(__m256i)(A), \
607 (__mmask8)(B)))
608#define _mm256_maskz_shldi_epi32(A, B, C, D) \
609 ((__m256i) \
610 __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B), \
611 (__v8si)(__m256i)(C),(int)(D), \
612 (__v8si)(__m256i)_mm256_setzero_si256 (), \
613 (__mmask8)(A)))
614#define _mm256_shldi_epi64(A, B, C) \
615 ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \
616 (__v4di)(__m256i)(B),(int)(C)))
617#define _mm256_mask_shldi_epi64(A, B, C, D, E) \
618 ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \
619 (__v4di)(__m256i)(D), (int)(E), \
620 (__v4di)(__m256i)(A), \
621 (__mmask8)(B)))
622#define _mm256_maskz_shldi_epi64(A, B, C, D) \
623 ((__m256i) \
624 __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B), \
625 (__v4di)(__m256i)(C),(int)(D), \
626 (__v4di)(__m256i)_mm256_setzero_si256 (), \
627 (__mmask8)(A)))
628#define _mm_shldi_epi16(A, B, C) \
629 ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \
630 (__v8hi)(__m128i)(B),(int)(C)))
631#define _mm_mask_shldi_epi16(A, B, C, D, E) \
632 ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \
633 (__v8hi)(__m128i)(D), (int)(E), \
634 (__v8hi)(__m128i)(A), \
635 (__mmask8)(B)))
636#define _mm_maskz_shldi_epi16(A, B, C, D) \
637 ((__m128i) \
638 __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B), \
639 (__v8hi)(__m128i)(C),(int)(D), \
640 (__v8hi)(__m128i)_mm_setzero_si128 (), \
641 (__mmask8)(A)))
642#define _mm_shldi_epi32(A, B, C) \
643 ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \
644 (__v4si)(__m128i)(B),(int)(C)))
645#define _mm_mask_shldi_epi32(A, B, C, D, E) \
646 ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
647 (__v4si)(__m128i)(D), (int)(E), \
648 (__v4si)(__m128i)(A), \
649 (__mmask8)(B)))
650#define _mm_maskz_shldi_epi32(A, B, C, D) \
651 ((__m128i) \
652 __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
653 (__v4si)(__m128i)(C),(int)(D), \
654 (__v4si)(__m128i)_mm_setzero_si128 (), \
655 (__mmask8)(A)))
656#define _mm_shldi_epi64(A, B, C) \
657 ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \
658 (__v2di)(__m128i)(B),(int)(C)))
659#define _mm_mask_shldi_epi64(A, B, C, D, E) \
660 ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \
661 (__v2di)(__m128i)(D), (int)(E), \
662 (__v2di)(__m128i)(A), \
663 (__mmask8)(B)))
664#define _mm_maskz_shldi_epi64(A, B, C, D) \
665 ((__m128i) \
666 __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B), \
667 (__v2di)(__m128i)(C),(int)(D), \
668 (__v2di)(__m128i)_mm_setzero_si128 (), \
669 (__mmask8)(A)))
670#endif
671
672extern __inline __m256i
673__attribute__((__gnu_inline__, __always_inline__, __artificial__))
674_mm256_shrdv_epi16 (__m256i __A, __m256i __B, __m256i __C)
675{
676 return (__m256i) __builtin_ia32_vpshrdv_v16hi ((__v16hi)__A, (__v16hi) __B,
677 (__v16hi) __C);
678}
679
680extern __inline __m256i
681__attribute__((__gnu_inline__, __always_inline__, __artificial__))
682_mm256_mask_shrdv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
683{
684 return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask ((__v16hi)__A,
685 (__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
686}
687
688extern __inline __m256i
689__attribute__((__gnu_inline__, __always_inline__, __artificial__))
690_mm256_maskz_shrdv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
691{
692 return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz ((__v16hi)__B,
693 (__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
694}
695
696extern __inline __m256i
697__attribute__((__gnu_inline__, __always_inline__, __artificial__))
698_mm256_shrdv_epi32 (__m256i __A, __m256i __B, __m256i __C)
699{
700 return (__m256i) __builtin_ia32_vpshrdv_v8si ((__v8si)__A, (__v8si) __B,
701 (__v8si) __C);
702}
703
704extern __inline __m256i
705__attribute__((__gnu_inline__, __always_inline__, __artificial__))
706_mm256_mask_shrdv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
707{
708 return (__m256i)__builtin_ia32_vpshrdv_v8si_mask ((__v8si)__A, (__v8si) __C,
709 (__v8si) __D, (__mmask8)__B);
710}
711
712extern __inline __m256i
713__attribute__((__gnu_inline__, __always_inline__, __artificial__))
714_mm256_maskz_shrdv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
715{
716 return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz ((__v8si)__B, (__v8si) __C,
717 (__v8si) __D, (__mmask8)__A);
718}
719
720extern __inline __m256i
721__attribute__((__gnu_inline__, __always_inline__, __artificial__))
722_mm256_shrdv_epi64 (__m256i __A, __m256i __B, __m256i __C)
723{
724 return (__m256i) __builtin_ia32_vpshrdv_v4di ((__v4di)__A, (__v4di) __B,
725 (__v4di) __C);
726}
727
728extern __inline __m256i
729__attribute__((__gnu_inline__, __always_inline__, __artificial__))
730_mm256_mask_shrdv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
731{
732 return (__m256i)__builtin_ia32_vpshrdv_v4di_mask ((__v4di)__A, (__v4di) __C,
733 (__v4di) __D, (__mmask8)__B);
734}
735
736extern __inline __m256i
737__attribute__((__gnu_inline__, __always_inline__, __artificial__))
738_mm256_maskz_shrdv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
739{
740 return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz ((__v4di)__B, (__v4di) __C,
741 (__v4di) __D, (__mmask8)__A);
742}
743
744extern __inline __m128i
745__attribute__((__gnu_inline__, __always_inline__, __artificial__))
746_mm_shrdv_epi16 (__m128i __A, __m128i __B, __m128i __C)
747{
748 return (__m128i) __builtin_ia32_vpshrdv_v8hi ((__v8hi)__A, (__v8hi) __B,
749 (__v8hi) __C);
750}
751
752extern __inline __m128i
753__attribute__((__gnu_inline__, __always_inline__, __artificial__))
754_mm_mask_shrdv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
755{
756 return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
757 (__v8hi) __D, (__mmask8)__B);
758}
759
760extern __inline __m128i
761__attribute__((__gnu_inline__, __always_inline__, __artificial__))
762_mm_maskz_shrdv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
763{
764 return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
765 (__v8hi) __D, (__mmask8)__A);
766}
767
768extern __inline __m128i
769__attribute__((__gnu_inline__, __always_inline__, __artificial__))
770_mm_shrdv_epi32 (__m128i __A, __m128i __B, __m128i __C)
771{
772 return (__m128i) __builtin_ia32_vpshrdv_v4si ((__v4si)__A, (__v4si) __B,
773 (__v4si) __C);
774}
775
776extern __inline __m128i
777__attribute__((__gnu_inline__, __always_inline__, __artificial__))
778_mm_mask_shrdv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
779{
780 return (__m128i)__builtin_ia32_vpshrdv_v4si_mask ((__v4si)__A, (__v4si) __C,
781 (__v4si) __D, (__mmask8)__B);
782}
783
784extern __inline __m128i
785__attribute__((__gnu_inline__, __always_inline__, __artificial__))
786_mm_maskz_shrdv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
787{
788 return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz ((__v4si)__B, (__v4si) __C,
789 (__v4si) __D, (__mmask8)__A);
790}
791
792extern __inline __m128i
793__attribute__((__gnu_inline__, __always_inline__, __artificial__))
794_mm_shrdv_epi64 (__m128i __A, __m128i __B, __m128i __C)
795{
796 return (__m128i) __builtin_ia32_vpshrdv_v2di ((__v2di)__A, (__v2di) __B,
797 (__v2di) __C);
798}
799
800extern __inline __m128i
801__attribute__((__gnu_inline__, __always_inline__, __artificial__))
802_mm_mask_shrdv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
803{
804 return (__m128i)__builtin_ia32_vpshrdv_v2di_mask ((__v2di)__A, (__v2di) __C,
805 (__v2di) __D, (__mmask8)__B);
806}
807
808extern __inline __m128i
809__attribute__((__gnu_inline__, __always_inline__, __artificial__))
810_mm_maskz_shrdv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
811{
812 return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz ((__v2di)__B, (__v2di) __C,
813 (__v2di) __D, (__mmask8)__A);
814}
815
816extern __inline __m256i
817__attribute__((__gnu_inline__, __always_inline__, __artificial__))
818_mm256_shldv_epi16 (__m256i __A, __m256i __B, __m256i __C)
819{
820 return (__m256i) __builtin_ia32_vpshldv_v16hi ((__v16hi)__A, (__v16hi) __B,
821 (__v16hi) __C);
822}
823
824extern __inline __m256i
825__attribute__((__gnu_inline__, __always_inline__, __artificial__))
826_mm256_mask_shldv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D)
827{
828 return (__m256i)__builtin_ia32_vpshldv_v16hi_mask ((__v16hi)__A,
829 (__v16hi) __C, (__v16hi) __D, (__mmask16)__B);
830}
831
832extern __inline __m256i
833__attribute__((__gnu_inline__, __always_inline__, __artificial__))
834_mm256_maskz_shldv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D)
835{
836 return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz ((__v16hi)__B,
837 (__v16hi) __C, (__v16hi) __D, (__mmask16)__A);
838}
839
840extern __inline __m256i
841__attribute__((__gnu_inline__, __always_inline__, __artificial__))
842_mm256_shldv_epi32 (__m256i __A, __m256i __B, __m256i __C)
843{
844 return (__m256i) __builtin_ia32_vpshldv_v8si ((__v8si)__A, (__v8si) __B,
845 (__v8si) __C);
846}
847
848extern __inline __m256i
849__attribute__((__gnu_inline__, __always_inline__, __artificial__))
850_mm256_mask_shldv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
851{
852 return (__m256i)__builtin_ia32_vpshldv_v8si_mask ((__v8si)__A, (__v8si) __C,
853 (__v8si) __D, (__mmask8)__B) ;
854}
855
856extern __inline __m256i
857__attribute__((__gnu_inline__, __always_inline__, __artificial__))
858_mm256_maskz_shldv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
859{
860 return (__m256i)__builtin_ia32_vpshldv_v8si_maskz ((__v8si)__B, (__v8si) __C,
861 (__v8si) __D, (__mmask8)__A);
862}
863
864extern __inline __m256i
865__attribute__((__gnu_inline__, __always_inline__, __artificial__))
866_mm256_shldv_epi64 (__m256i __A, __m256i __B, __m256i __C)
867{
868 return (__m256i) __builtin_ia32_vpshldv_v4di ((__v4di)__A, (__v4di) __B,
869 (__v4di) __C);
870}
871
872extern __inline __m256i
873__attribute__((__gnu_inline__, __always_inline__, __artificial__))
874_mm256_mask_shldv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
875{
876 return (__m256i)__builtin_ia32_vpshldv_v4di_mask ((__v4di)__A, (__v4di) __C,
877 (__v4di) __D, (__mmask8)__B);
878}
879
880extern __inline __m256i
881__attribute__((__gnu_inline__, __always_inline__, __artificial__))
882_mm256_maskz_shldv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
883{
884 return (__m256i)__builtin_ia32_vpshldv_v4di_maskz ((__v4di)__B, (__v4di) __C,
885 (__v4di) __D, (__mmask8)__A);
886}
887
888extern __inline __m128i
889__attribute__((__gnu_inline__, __always_inline__, __artificial__))
890_mm_shldv_epi16 (__m128i __A, __m128i __B, __m128i __C)
891{
892 return (__m128i) __builtin_ia32_vpshldv_v8hi ((__v8hi)__A, (__v8hi) __B,
893 (__v8hi) __C);
894}
895
896extern __inline __m128i
897__attribute__((__gnu_inline__, __always_inline__, __artificial__))
898_mm_mask_shldv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
899{
900 return (__m128i)__builtin_ia32_vpshldv_v8hi_mask ((__v8hi)__A, (__v8hi) __C,
901 (__v8hi) __D, (__mmask8)__B);
902}
903
904extern __inline __m128i
905__attribute__((__gnu_inline__, __always_inline__, __artificial__))
906_mm_maskz_shldv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
907{
908 return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C,
909 (__v8hi) __D, (__mmask8)__A);
910}
911
912extern __inline __m128i
913__attribute__((__gnu_inline__, __always_inline__, __artificial__))
914_mm_shldv_epi32 (__m128i __A, __m128i __B, __m128i __C)
915{
916 return (__m128i) __builtin_ia32_vpshldv_v4si ((__v4si)__A, (__v4si) __B,
917 (__v4si) __C);
918}
919
920extern __inline __m128i
921__attribute__((__gnu_inline__, __always_inline__, __artificial__))
922_mm_mask_shldv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
923{
924 return (__m128i)__builtin_ia32_vpshldv_v4si_mask ((__v4si)__A, (__v4si) __C,
925 (__v4si) __D, (__mmask8)__B);
926}
927
928extern __inline __m128i
929__attribute__((__gnu_inline__, __always_inline__, __artificial__))
930_mm_maskz_shldv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
931{
932 return (__m128i)__builtin_ia32_vpshldv_v4si_maskz ((__v4si)__B, (__v4si) __C,
933 (__v4si) __D, (__mmask8)__A);
934}
935
936extern __inline __m128i
937__attribute__((__gnu_inline__, __always_inline__, __artificial__))
938_mm_shldv_epi64 (__m128i __A, __m128i __B, __m128i __C)
939{
940 return (__m128i) __builtin_ia32_vpshldv_v2di ((__v2di)__A, (__v2di) __B,
941 (__v2di) __C);
942}
943
944extern __inline __m128i
945__attribute__((__gnu_inline__, __always_inline__, __artificial__))
946_mm_mask_shldv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
947{
948 return (__m128i)__builtin_ia32_vpshldv_v2di_mask ((__v2di)__A, (__v2di) __C,
949 (__v2di) __D, (__mmask8)__B);
950}
951
952extern __inline __m128i
953__attribute__((__gnu_inline__, __always_inline__, __artificial__))
954_mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
955{
956 return (__m128i)__builtin_ia32_vpshldv_v2di_maskz ((__v2di)__B, (__v2di) __C,
957 (__v2di) __D, (__mmask8)__A);
958}
959
960
961
962
963#ifdef __DISABLE_AVX512VBMI2VL__
964#undef __DISABLE_AVX512VBMI2VL__
965#pragma GCC pop_options
966#endif /* __DISABLE_AVX512VBMIVL__ */
967
968#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \
969 !defined(__AVX512BW__)
970#pragma GCC push_options
971#pragma GCC target("avx512vbmi2,avx512vl,avx512bw")
972#define __DISABLE_AVX512VBMI2VLBW__
973#endif /* __AVX512VBMIVLBW__ */
974
975extern __inline __m256i
976__attribute__((__gnu_inline__, __always_inline__, __artificial__))
977_mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
978{
979 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi)__C,
980 (__v32qi)__A, (__mmask32)__B);
981}
982
983extern __inline __m256i
984__attribute__((__gnu_inline__, __always_inline__, __artificial__))
985_mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B)
986{
987 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B,
988 (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
989}
990
991extern __inline void
992__attribute__((__gnu_inline__, __always_inline__, __artificial__))
993_mm256_mask_compressstoreu_epi8 (void * __A, __mmask32 __B, __m256i __C)
994{
995 __builtin_ia32_compressstoreuqi256_mask ((__v32qi *) __A, (__v32qi) __C,
996 (__mmask32) __B);
997}
998
999extern __inline __m256i
1000__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1001_mm256_mask_expand_epi8 (__m256i __A, __mmask32 __B, __m256i __C)
1002{
1003 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __C,
1004 (__v32qi) __A,
1005 (__mmask32) __B);
1006}
1007
1008extern __inline __m256i
1009__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1010_mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B)
1011{
1012 return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B,
1013 (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
1014}
1015
1016extern __inline __m256i
1017__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1018_mm256_mask_expandloadu_epi8 (__m256i __A, __mmask32 __B, const void * __C)
1019{
1020 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *) __C,
1021 (__v32qi) __A, (__mmask32) __B);
1022}
1023
1024extern __inline __m256i
1025__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1026_mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B)
1027{
1028 return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B,
1029 (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A);
1030}
1031
1032#ifdef __DISABLE_AVX512VBMI2VLBW__
1033#undef __DISABLE_AVX512VBMI2VLBW__
1034#pragma GCC pop_options
1035#endif /* __DISABLE_AVX512VBMIVLBW__ */
1036
1037#endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
Note: See TracBrowser for help on using the repository browser.