source: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx2intrin.h@ 1088

Last change on this file since 1088 was 1046, checked in by alloc, 8 years ago

Daodan: Added Windows MinGW and build batch file

File size: 56.0 KB
Line 
1/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX2INTRIN_H_INCLUDED
29#define _AVX2INTRIN_H_INCLUDED
30
31#ifndef __AVX2__
32#pragma GCC push_options
33#pragma GCC target("avx2")
34#define __DISABLE_AVX2__
35#endif /* __AVX2__ */
36
37/* Sum absolute 8-bit integer difference of adjacent groups of 4
38 byte integers in the first 2 operands. Starting offsets within
39 operands are determined by the 3rd mask operand. */
40#ifdef __OPTIMIZE__
41extern __inline __m256i
42__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
43_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
44{
45 return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
46 (__v32qi)__Y, __M);
47}
48#else
49#define _mm256_mpsadbw_epu8(X, Y, M) \
50 ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X), \
51 (__v32qi)(__m256i)(Y), (int)(M)))
52#endif
53
54extern __inline __m256i
55__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
56_mm256_abs_epi8 (__m256i __A)
57{
58 return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
59}
60
61extern __inline __m256i
62__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
63_mm256_abs_epi16 (__m256i __A)
64{
65 return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
66}
67
68extern __inline __m256i
69__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70_mm256_abs_epi32 (__m256i __A)
71{
72 return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
73}
74
75extern __inline __m256i
76__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
77_mm256_packs_epi32 (__m256i __A, __m256i __B)
78{
79 return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
80}
81
82extern __inline __m256i
83__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
84_mm256_packs_epi16 (__m256i __A, __m256i __B)
85{
86 return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
87}
88
89extern __inline __m256i
90__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91_mm256_packus_epi32 (__m256i __A, __m256i __B)
92{
93 return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
94}
95
96extern __inline __m256i
97__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98_mm256_packus_epi16 (__m256i __A, __m256i __B)
99{
100 return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
101}
102
103extern __inline __m256i
104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105_mm256_add_epi8 (__m256i __A, __m256i __B)
106{
107 return (__m256i) ((__v32qu)__A + (__v32qu)__B);
108}
109
110extern __inline __m256i
111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112_mm256_add_epi16 (__m256i __A, __m256i __B)
113{
114 return (__m256i) ((__v16hu)__A + (__v16hu)__B);
115}
116
117extern __inline __m256i
118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119_mm256_add_epi32 (__m256i __A, __m256i __B)
120{
121 return (__m256i) ((__v8su)__A + (__v8su)__B);
122}
123
124extern __inline __m256i
125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126_mm256_add_epi64 (__m256i __A, __m256i __B)
127{
128 return (__m256i) ((__v4du)__A + (__v4du)__B);
129}
130
131extern __inline __m256i
132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133_mm256_adds_epi8 (__m256i __A, __m256i __B)
134{
135 return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
136}
137
138extern __inline __m256i
139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
140_mm256_adds_epi16 (__m256i __A, __m256i __B)
141{
142 return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
143}
144
145extern __inline __m256i
146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147_mm256_adds_epu8 (__m256i __A, __m256i __B)
148{
149 return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
150}
151
152extern __inline __m256i
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154_mm256_adds_epu16 (__m256i __A, __m256i __B)
155{
156 return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
157}
158
159#ifdef __OPTIMIZE__
160extern __inline __m256i
161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
162_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
163{
164 return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
165 (__v4di)__B,
166 __N * 8);
167}
168#else
169/* In that case (__N*8) will be in vreg, and insn will not be matched. */
170/* Use define instead */
171#define _mm256_alignr_epi8(A, B, N) \
172 ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \
173 (__v4di)(__m256i)(B), \
174 (int)(N) * 8))
175#endif
176
177extern __inline __m256i
178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
179_mm256_and_si256 (__m256i __A, __m256i __B)
180{
181 return (__m256i) ((__v4du)__A & (__v4du)__B);
182}
183
184extern __inline __m256i
185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186_mm256_andnot_si256 (__m256i __A, __m256i __B)
187{
188 return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
189}
190
191extern __inline __m256i
192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193_mm256_avg_epu8 (__m256i __A, __m256i __B)
194{
195 return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
196}
197
198extern __inline __m256i
199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
200_mm256_avg_epu16 (__m256i __A, __m256i __B)
201{
202 return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
203}
204
205extern __inline __m256i
206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
207_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
208{
209 return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
210 (__v32qi)__Y,
211 (__v32qi)__M);
212}
213
214#ifdef __OPTIMIZE__
215extern __inline __m256i
216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
217_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
218{
219 return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
220 (__v16hi)__Y,
221 __M);
222}
223#else
224#define _mm256_blend_epi16(X, Y, M) \
225 ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X), \
226 (__v16hi)(__m256i)(Y), (int)(M)))
227#endif
228
229extern __inline __m256i
230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
231_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
232{
233 return (__m256i) ((__v32qi)__A == (__v32qi)__B);
234}
235
236extern __inline __m256i
237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
239{
240 return (__m256i) ((__v16hi)__A == (__v16hi)__B);
241}
242
243extern __inline __m256i
244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
246{
247 return (__m256i) ((__v8si)__A == (__v8si)__B);
248}
249
250extern __inline __m256i
251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
253{
254 return (__m256i) ((__v4di)__A == (__v4di)__B);
255}
256
257extern __inline __m256i
258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
260{
261 return (__m256i) ((__v32qi)__A > (__v32qi)__B);
262}
263
264extern __inline __m256i
265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
267{
268 return (__m256i) ((__v16hi)__A > (__v16hi)__B);
269}
270
271extern __inline __m256i
272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
273_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
274{
275 return (__m256i) ((__v8si)__A > (__v8si)__B);
276}
277
278extern __inline __m256i
279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
281{
282 return (__m256i) ((__v4di)__A > (__v4di)__B);
283}
284
285extern __inline __m256i
286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
288{
289 return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
290 (__v16hi)__Y);
291}
292
293extern __inline __m256i
294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
295_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
296{
297 return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
298}
299
300extern __inline __m256i
301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
302_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
303{
304 return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
305 (__v16hi)__Y);
306}
307
308extern __inline __m256i
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
311{
312 return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
313 (__v16hi)__Y);
314}
315
316extern __inline __m256i
317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
319{
320 return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
321}
322
323extern __inline __m256i
324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
325_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
326{
327 return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
328 (__v16hi)__Y);
329}
330
331extern __inline __m256i
332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
333_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
334{
335 return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
336 (__v32qi)__Y);
337}
338
339extern __inline __m256i
340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341_mm256_madd_epi16 (__m256i __A, __m256i __B)
342{
343 return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
344 (__v16hi)__B);
345}
346
347extern __inline __m256i
348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
349_mm256_max_epi8 (__m256i __A, __m256i __B)
350{
351 return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
352}
353
354extern __inline __m256i
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm256_max_epi16 (__m256i __A, __m256i __B)
357{
358 return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
359}
360
361extern __inline __m256i
362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
363_mm256_max_epi32 (__m256i __A, __m256i __B)
364{
365 return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
366}
367
368extern __inline __m256i
369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
370_mm256_max_epu8 (__m256i __A, __m256i __B)
371{
372 return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
373}
374
375extern __inline __m256i
376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377_mm256_max_epu16 (__m256i __A, __m256i __B)
378{
379 return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
380}
381
382extern __inline __m256i
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm256_max_epu32 (__m256i __A, __m256i __B)
385{
386 return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
387}
388
389extern __inline __m256i
390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391_mm256_min_epi8 (__m256i __A, __m256i __B)
392{
393 return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
394}
395
396extern __inline __m256i
397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398_mm256_min_epi16 (__m256i __A, __m256i __B)
399{
400 return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
401}
402
403extern __inline __m256i
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm256_min_epi32 (__m256i __A, __m256i __B)
406{
407 return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
408}
409
410extern __inline __m256i
411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412_mm256_min_epu8 (__m256i __A, __m256i __B)
413{
414 return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
415}
416
417extern __inline __m256i
418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
419_mm256_min_epu16 (__m256i __A, __m256i __B)
420{
421 return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
422}
423
424extern __inline __m256i
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm256_min_epu32 (__m256i __A, __m256i __B)
427{
428 return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
429}
430
431extern __inline int
432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
433_mm256_movemask_epi8 (__m256i __A)
434{
435 return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
436}
437
438extern __inline __m256i
439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
440_mm256_cvtepi8_epi16 (__m128i __X)
441{
442 return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
443}
444
445extern __inline __m256i
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm256_cvtepi8_epi32 (__m128i __X)
448{
449 return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
450}
451
452extern __inline __m256i
453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
454_mm256_cvtepi8_epi64 (__m128i __X)
455{
456 return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
457}
458
459extern __inline __m256i
460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461_mm256_cvtepi16_epi32 (__m128i __X)
462{
463 return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
464}
465
466extern __inline __m256i
467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
468_mm256_cvtepi16_epi64 (__m128i __X)
469{
470 return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
471}
472
473extern __inline __m256i
474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475_mm256_cvtepi32_epi64 (__m128i __X)
476{
477 return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
478}
479
480extern __inline __m256i
481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
482_mm256_cvtepu8_epi16 (__m128i __X)
483{
484 return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
485}
486
487extern __inline __m256i
488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
489_mm256_cvtepu8_epi32 (__m128i __X)
490{
491 return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
492}
493
494extern __inline __m256i
495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496_mm256_cvtepu8_epi64 (__m128i __X)
497{
498 return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
499}
500
501extern __inline __m256i
502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
503_mm256_cvtepu16_epi32 (__m128i __X)
504{
505 return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
506}
507
508extern __inline __m256i
509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510_mm256_cvtepu16_epi64 (__m128i __X)
511{
512 return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
513}
514
515extern __inline __m256i
516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517_mm256_cvtepu32_epi64 (__m128i __X)
518{
519 return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
520}
521
522extern __inline __m256i
523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
524_mm256_mul_epi32 (__m256i __X, __m256i __Y)
525{
526 return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
527}
528
529extern __inline __m256i
530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
531_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
532{
533 return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
534 (__v16hi)__Y);
535}
536
537extern __inline __m256i
538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
540{
541 return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
542}
543
544extern __inline __m256i
545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
546_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
547{
548 return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
549}
550
551extern __inline __m256i
552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553_mm256_mullo_epi16 (__m256i __A, __m256i __B)
554{
555 return (__m256i) ((__v16hu)__A * (__v16hu)__B);
556}
557
558extern __inline __m256i
559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560_mm256_mullo_epi32 (__m256i __A, __m256i __B)
561{
562 return (__m256i) ((__v8su)__A * (__v8su)__B);
563}
564
565extern __inline __m256i
566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
567_mm256_mul_epu32 (__m256i __A, __m256i __B)
568{
569 return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
570}
571
572extern __inline __m256i
573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
574_mm256_or_si256 (__m256i __A, __m256i __B)
575{
576 return (__m256i) ((__v4du)__A | (__v4du)__B);
577}
578
579extern __inline __m256i
580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581_mm256_sad_epu8 (__m256i __A, __m256i __B)
582{
583 return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
584}
585
586extern __inline __m256i
587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
588_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
589{
590 return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
591 (__v32qi)__Y);
592}
593
594#ifdef __OPTIMIZE__
595extern __inline __m256i
596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
597_mm256_shuffle_epi32 (__m256i __A, const int __mask)
598{
599 return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
600}
601
602extern __inline __m256i
603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
604_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
605{
606 return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
607}
608
609extern __inline __m256i
610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
612{
613 return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
614}
615#else
616#define _mm256_shuffle_epi32(A, N) \
617 ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
618#define _mm256_shufflehi_epi16(A, N) \
619 ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
620#define _mm256_shufflelo_epi16(A, N) \
621 ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
622#endif
623
624extern __inline __m256i
625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
626_mm256_sign_epi8 (__m256i __X, __m256i __Y)
627{
628 return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
629}
630
631extern __inline __m256i
632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
633_mm256_sign_epi16 (__m256i __X, __m256i __Y)
634{
635 return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
636}
637
638extern __inline __m256i
639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
640_mm256_sign_epi32 (__m256i __X, __m256i __Y)
641{
642 return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
643}
644
645#ifdef __OPTIMIZE__
646extern __inline __m256i
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm256_bslli_epi128 (__m256i __A, const int __N)
649{
650 return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
651}
652
653extern __inline __m256i
654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655_mm256_slli_si256 (__m256i __A, const int __N)
656{
657 return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
658}
659#else
660#define _mm256_bslli_epi128(A, N) \
661 ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
662#define _mm256_slli_si256(A, N) \
663 ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
664#endif
665
666extern __inline __m256i
667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
668_mm256_slli_epi16 (__m256i __A, int __B)
669{
670 return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
671}
672
673extern __inline __m256i
674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675_mm256_sll_epi16 (__m256i __A, __m128i __B)
676{
677 return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
678}
679
680extern __inline __m256i
681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
682_mm256_slli_epi32 (__m256i __A, int __B)
683{
684 return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
685}
686
687extern __inline __m256i
688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689_mm256_sll_epi32 (__m256i __A, __m128i __B)
690{
691 return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
692}
693
694extern __inline __m256i
695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
696_mm256_slli_epi64 (__m256i __A, int __B)
697{
698 return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
699}
700
701extern __inline __m256i
702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
703_mm256_sll_epi64 (__m256i __A, __m128i __B)
704{
705 return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
706}
707
708extern __inline __m256i
709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710_mm256_srai_epi16 (__m256i __A, int __B)
711{
712 return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
713}
714
715extern __inline __m256i
716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
717_mm256_sra_epi16 (__m256i __A, __m128i __B)
718{
719 return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
720}
721
722extern __inline __m256i
723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
724_mm256_srai_epi32 (__m256i __A, int __B)
725{
726 return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
727}
728
729extern __inline __m256i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm256_sra_epi32 (__m256i __A, __m128i __B)
732{
733 return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
734}
735
736#ifdef __OPTIMIZE__
737extern __inline __m256i
738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739_mm256_bsrli_epi128 (__m256i __A, const int __N)
740{
741 return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
742}
743
744extern __inline __m256i
745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
746_mm256_srli_si256 (__m256i __A, const int __N)
747{
748 return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
749}
750#else
751#define _mm256_bsrli_epi128(A, N) \
752 ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
753#define _mm256_srli_si256(A, N) \
754 ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
755#endif
756
757extern __inline __m256i
758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
759_mm256_srli_epi16 (__m256i __A, int __B)
760{
761 return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
762}
763
764extern __inline __m256i
765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
766_mm256_srl_epi16 (__m256i __A, __m128i __B)
767{
768 return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
769}
770
771extern __inline __m256i
772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
773_mm256_srli_epi32 (__m256i __A, int __B)
774{
775 return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
776}
777
778extern __inline __m256i
779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
780_mm256_srl_epi32 (__m256i __A, __m128i __B)
781{
782 return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
783}
784
785extern __inline __m256i
786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787_mm256_srli_epi64 (__m256i __A, int __B)
788{
789 return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
790}
791
792extern __inline __m256i
793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
794_mm256_srl_epi64 (__m256i __A, __m128i __B)
795{
796 return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
797}
798
799extern __inline __m256i
800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801_mm256_sub_epi8 (__m256i __A, __m256i __B)
802{
803 return (__m256i) ((__v32qu)__A - (__v32qu)__B);
804}
805
806extern __inline __m256i
807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
808_mm256_sub_epi16 (__m256i __A, __m256i __B)
809{
810 return (__m256i) ((__v16hu)__A - (__v16hu)__B);
811}
812
813extern __inline __m256i
814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
815_mm256_sub_epi32 (__m256i __A, __m256i __B)
816{
817 return (__m256i) ((__v8su)__A - (__v8su)__B);
818}
819
820extern __inline __m256i
821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
822_mm256_sub_epi64 (__m256i __A, __m256i __B)
823{
824 return (__m256i) ((__v4du)__A - (__v4du)__B);
825}
826
827extern __inline __m256i
828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829_mm256_subs_epi8 (__m256i __A, __m256i __B)
830{
831 return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
832}
833
834extern __inline __m256i
835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836_mm256_subs_epi16 (__m256i __A, __m256i __B)
837{
838 return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
839}
840
841extern __inline __m256i
842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843_mm256_subs_epu8 (__m256i __A, __m256i __B)
844{
845 return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
846}
847
848extern __inline __m256i
849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850_mm256_subs_epu16 (__m256i __A, __m256i __B)
851{
852 return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
853}
854
855extern __inline __m256i
856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
857_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
858{
859 return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
860}
861
862extern __inline __m256i
863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
864_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
865{
866 return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
867}
868
869extern __inline __m256i
870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
872{
873 return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
874}
875
876extern __inline __m256i
877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
878_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
879{
880 return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
881}
882
883extern __inline __m256i
884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
885_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
886{
887 return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
888}
889
890extern __inline __m256i
891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
893{
894 return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
895}
896
897extern __inline __m256i
898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
900{
901 return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
902}
903
904extern __inline __m256i
905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
906_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
907{
908 return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
909}
910
911extern __inline __m256i
912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
913_mm256_xor_si256 (__m256i __A, __m256i __B)
914{
915 return (__m256i) ((__v4du)__A ^ (__v4du)__B);
916}
917
918extern __inline __m256i
919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920_mm256_stream_load_si256 (__m256i const *__X)
921{
922 return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
923}
924
925extern __inline __m128
926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
927_mm_broadcastss_ps (__m128 __X)
928{
929 return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
930}
931
932extern __inline __m256
933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934_mm256_broadcastss_ps (__m128 __X)
935{
936 return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
937}
938
939extern __inline __m256d
940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
941_mm256_broadcastsd_pd (__m128d __X)
942{
943 return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
944}
945
946extern __inline __m256i
947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
948_mm256_broadcastsi128_si256 (__m128i __X)
949{
950 return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
951}
952
953#ifdef __OPTIMIZE__
954extern __inline __m128i
955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
956_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
957{
958 return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
959 (__v4si)__Y,
960 __M);
961}
962#else
963#define _mm_blend_epi32(X, Y, M) \
964 ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X), \
965 (__v4si)(__m128i)(Y), (int)(M)))
966#endif
967
968#ifdef __OPTIMIZE__
969extern __inline __m256i
970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
972{
973 return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
974 (__v8si)__Y,
975 __M);
976}
977#else
978#define _mm256_blend_epi32(X, Y, M) \
979 ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X), \
980 (__v8si)(__m256i)(Y), (int)(M)))
981#endif
982
983extern __inline __m256i
984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
985_mm256_broadcastb_epi8 (__m128i __X)
986{
987 return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
988}
989
990extern __inline __m256i
991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
992_mm256_broadcastw_epi16 (__m128i __X)
993{
994 return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
995}
996
997extern __inline __m256i
998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999_mm256_broadcastd_epi32 (__m128i __X)
1000{
1001 return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
1002}
1003
1004extern __inline __m256i
1005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1006_mm256_broadcastq_epi64 (__m128i __X)
1007{
1008 return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
1009}
1010
1011extern __inline __m128i
1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013_mm_broadcastb_epi8 (__m128i __X)
1014{
1015 return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
1016}
1017
1018extern __inline __m128i
1019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020_mm_broadcastw_epi16 (__m128i __X)
1021{
1022 return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
1023}
1024
1025extern __inline __m128i
1026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1027_mm_broadcastd_epi32 (__m128i __X)
1028{
1029 return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
1030}
1031
1032extern __inline __m128i
1033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1034_mm_broadcastq_epi64 (__m128i __X)
1035{
1036 return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
1037}
1038
1039extern __inline __m256i
1040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1041_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
1042{
1043 return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
1044}
1045
1046#ifdef __OPTIMIZE__
1047extern __inline __m256d
1048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1049_mm256_permute4x64_pd (__m256d __X, const int __M)
1050{
1051 return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
1052}
1053#else
1054#define _mm256_permute4x64_pd(X, M) \
1055 ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
1056#endif
1057
1058extern __inline __m256
1059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
1061{
1062 return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
1063}
1064
1065#ifdef __OPTIMIZE__
1066extern __inline __m256i
1067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1068_mm256_permute4x64_epi64 (__m256i __X, const int __M)
1069{
1070 return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
1071}
1072#else
1073#define _mm256_permute4x64_epi64(X, M) \
1074 ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
1075#endif
1076
1077
1078#ifdef __OPTIMIZE__
1079extern __inline __m256i
1080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1081_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
1082{
1083 return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
1084}
1085#else
1086#define _mm256_permute2x128_si256(X, Y, M) \
1087 ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
1088#endif
1089
1090#ifdef __OPTIMIZE__
1091extern __inline __m128i
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm256_extracti128_si256 (__m256i __X, const int __M)
1094{
1095 return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
1096}
1097#else
1098#define _mm256_extracti128_si256(X, M) \
1099 ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
1100#endif
1101
1102#ifdef __OPTIMIZE__
1103extern __inline __m256i
1104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
1106{
1107 return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
1108}
1109#else
1110#define _mm256_inserti128_si256(X, Y, M) \
1111 ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
1112 (__v2di)(__m128i)(Y), \
1113 (int)(M)))
1114#endif
1115
1116extern __inline __m256i
1117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118_mm256_maskload_epi32 (int const *__X, __m256i __M )
1119{
1120 return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
1121 (__v8si)__M);
1122}
1123
1124extern __inline __m256i
1125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1126_mm256_maskload_epi64 (long long const *__X, __m256i __M )
1127{
1128 return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
1129 (__v4di)__M);
1130}
1131
1132extern __inline __m128i
1133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1134_mm_maskload_epi32 (int const *__X, __m128i __M )
1135{
1136 return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
1137 (__v4si)__M);
1138}
1139
1140extern __inline __m128i
1141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1142_mm_maskload_epi64 (long long const *__X, __m128i __M )
1143{
1144 return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
1145 (__v2di)__M);
1146}
1147
1148extern __inline void
1149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
1151{
1152 __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
1153}
1154
1155extern __inline void
1156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1157_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
1158{
1159 __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
1160}
1161
1162extern __inline void
1163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1164_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
1165{
1166 __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
1167}
1168
1169extern __inline void
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
1172{
1173 __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
1174}
1175
1176extern __inline __m256i
1177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
1179{
1180 return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
1181}
1182
1183extern __inline __m128i
1184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1185_mm_sllv_epi32 (__m128i __X, __m128i __Y)
1186{
1187 return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
1188}
1189
1190extern __inline __m256i
1191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1192_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
1193{
1194 return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
1195}
1196
1197extern __inline __m128i
1198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199_mm_sllv_epi64 (__m128i __X, __m128i __Y)
1200{
1201 return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
1202}
1203
1204extern __inline __m256i
1205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206_mm256_srav_epi32 (__m256i __X, __m256i __Y)
1207{
1208 return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
1209}
1210
1211extern __inline __m128i
1212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1213_mm_srav_epi32 (__m128i __X, __m128i __Y)
1214{
1215 return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
1216}
1217
1218extern __inline __m256i
1219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1220_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
1221{
1222 return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
1223}
1224
1225extern __inline __m128i
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm_srlv_epi32 (__m128i __X, __m128i __Y)
1228{
1229 return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
1230}
1231
1232extern __inline __m256i
1233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1234_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
1235{
1236 return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
1237}
1238
1239extern __inline __m128i
1240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1241_mm_srlv_epi64 (__m128i __X, __m128i __Y)
1242{
1243 return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
1244}
1245
1246#ifdef __OPTIMIZE__
1247extern __inline __m128d
1248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1249_mm_i32gather_pd (double const *base, __m128i index, const int scale)
1250{
1251 __v2df zero = _mm_setzero_pd ();
1252 __v2df mask = _mm_cmpeq_pd (zero, zero);
1253
1254 return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
1255 base,
1256 (__v4si)index,
1257 mask,
1258 scale);
1259}
1260
1261extern __inline __m128d
1262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
1264 __m128d mask, const int scale)
1265{
1266 return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
1267 base,
1268 (__v4si)index,
1269 (__v2df)mask,
1270 scale);
1271}
1272
1273extern __inline __m256d
1274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
1276{
1277 __v4df zero = _mm256_setzero_pd ();
1278 __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
1279
1280 return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
1281 base,
1282 (__v4si)index,
1283 mask,
1284 scale);
1285}
1286
1287extern __inline __m256d
1288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1289_mm256_mask_i32gather_pd (__m256d src, double const *base,
1290 __m128i index, __m256d mask, const int scale)
1291{
1292 return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
1293 base,
1294 (__v4si)index,
1295 (__v4df)mask,
1296 scale);
1297}
1298
1299extern __inline __m128d
1300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1301_mm_i64gather_pd (double const *base, __m128i index, const int scale)
1302{
1303 __v2df src = _mm_setzero_pd ();
1304 __v2df mask = _mm_cmpeq_pd (src, src);
1305
1306 return (__m128d) __builtin_ia32_gatherdiv2df (src,
1307 base,
1308 (__v2di)index,
1309 mask,
1310 scale);
1311}
1312
1313extern __inline __m128d
1314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1315_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
1316 __m128d mask, const int scale)
1317{
1318 return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
1319 base,
1320 (__v2di)index,
1321 (__v2df)mask,
1322 scale);
1323}
1324
1325extern __inline __m256d
1326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1327_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
1328{
1329 __v4df src = _mm256_setzero_pd ();
1330 __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
1331
1332 return (__m256d) __builtin_ia32_gatherdiv4df (src,
1333 base,
1334 (__v4di)index,
1335 mask,
1336 scale);
1337}
1338
1339extern __inline __m256d
1340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1341_mm256_mask_i64gather_pd (__m256d src, double const *base,
1342 __m256i index, __m256d mask, const int scale)
1343{
1344 return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
1345 base,
1346 (__v4di)index,
1347 (__v4df)mask,
1348 scale);
1349}
1350
1351extern __inline __m128
1352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1353_mm_i32gather_ps (float const *base, __m128i index, const int scale)
1354{
1355 __v4sf src = _mm_setzero_ps ();
1356 __v4sf mask = _mm_cmpeq_ps (src, src);
1357
1358 return (__m128) __builtin_ia32_gathersiv4sf (src,
1359 base,
1360 (__v4si)index,
1361 mask,
1362 scale);
1363}
1364
1365extern __inline __m128
1366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1367_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
1368 __m128 mask, const int scale)
1369{
1370 return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
1371 base,
1372 (__v4si)index,
1373 (__v4sf)mask,
1374 scale);
1375}
1376
1377extern __inline __m256
1378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
1380{
1381 __v8sf src = _mm256_setzero_ps ();
1382 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
1383
1384 return (__m256) __builtin_ia32_gathersiv8sf (src,
1385 base,
1386 (__v8si)index,
1387 mask,
1388 scale);
1389}
1390
1391extern __inline __m256
1392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1393_mm256_mask_i32gather_ps (__m256 src, float const *base,
1394 __m256i index, __m256 mask, const int scale)
1395{
1396 return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
1397 base,
1398 (__v8si)index,
1399 (__v8sf)mask,
1400 scale);
1401}
1402
1403extern __inline __m128
1404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1405_mm_i64gather_ps (float const *base, __m128i index, const int scale)
1406{
1407 __v4sf src = _mm_setzero_ps ();
1408 __v4sf mask = _mm_cmpeq_ps (src, src);
1409
1410 return (__m128) __builtin_ia32_gatherdiv4sf (src,
1411 base,
1412 (__v2di)index,
1413 mask,
1414 scale);
1415}
1416
1417extern __inline __m128
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
1420 __m128 mask, const int scale)
1421{
1422 return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
1423 base,
1424 (__v2di)index,
1425 (__v4sf)mask,
1426 scale);
1427}
1428
1429extern __inline __m128
1430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
1432{
1433 __v4sf src = _mm_setzero_ps ();
1434 __v4sf mask = _mm_cmpeq_ps (src, src);
1435
1436 return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
1437 base,
1438 (__v4di)index,
1439 mask,
1440 scale);
1441}
1442
1443extern __inline __m128
1444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1445_mm256_mask_i64gather_ps (__m128 src, float const *base,
1446 __m256i index, __m128 mask, const int scale)
1447{
1448 return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
1449 base,
1450 (__v4di)index,
1451 (__v4sf)mask,
1452 scale);
1453}
1454
1455extern __inline __m128i
1456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457_mm_i32gather_epi64 (long long int const *base,
1458 __m128i index, const int scale)
1459{
1460 __v2di src = __extension__ (__v2di){ 0, 0 };
1461 __v2di mask = __extension__ (__v2di){ ~0, ~0 };
1462
1463 return (__m128i) __builtin_ia32_gathersiv2di (src,
1464 base,
1465 (__v4si)index,
1466 mask,
1467 scale);
1468}
1469
1470extern __inline __m128i
1471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1472_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
1473 __m128i index, __m128i mask, const int scale)
1474{
1475 return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
1476 base,
1477 (__v4si)index,
1478 (__v2di)mask,
1479 scale);
1480}
1481
1482extern __inline __m256i
1483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1484_mm256_i32gather_epi64 (long long int const *base,
1485 __m128i index, const int scale)
1486{
1487 __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
1488 __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
1489
1490 return (__m256i) __builtin_ia32_gathersiv4di (src,
1491 base,
1492 (__v4si)index,
1493 mask,
1494 scale);
1495}
1496
1497extern __inline __m256i
1498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
1500 __m128i index, __m256i mask, const int scale)
1501{
1502 return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
1503 base,
1504 (__v4si)index,
1505 (__v4di)mask,
1506 scale);
1507}
1508
1509extern __inline __m128i
1510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1511_mm_i64gather_epi64 (long long int const *base,
1512 __m128i index, const int scale)
1513{
1514 __v2di src = __extension__ (__v2di){ 0, 0 };
1515 __v2di mask = __extension__ (__v2di){ ~0, ~0 };
1516
1517 return (__m128i) __builtin_ia32_gatherdiv2di (src,
1518 base,
1519 (__v2di)index,
1520 mask,
1521 scale);
1522}
1523
1524extern __inline __m128i
1525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
1527 __m128i mask, const int scale)
1528{
1529 return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
1530 base,
1531 (__v2di)index,
1532 (__v2di)mask,
1533 scale);
1534}
1535
1536extern __inline __m256i
1537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1538_mm256_i64gather_epi64 (long long int const *base,
1539 __m256i index, const int scale)
1540{
1541 __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
1542 __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
1543
1544 return (__m256i) __builtin_ia32_gatherdiv4di (src,
1545 base,
1546 (__v4di)index,
1547 mask,
1548 scale);
1549}
1550
1551extern __inline __m256i
1552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
1554 __m256i index, __m256i mask, const int scale)
1555{
1556 return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
1557 base,
1558 (__v4di)index,
1559 (__v4di)mask,
1560 scale);
1561}
1562
1563extern __inline __m128i
1564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
1566{
1567 __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
1568 __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
1569
1570 return (__m128i) __builtin_ia32_gathersiv4si (src,
1571 base,
1572 (__v4si)index,
1573 mask,
1574 scale);
1575}
1576
1577extern __inline __m128i
1578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1579_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
1580 __m128i mask, const int scale)
1581{
1582 return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
1583 base,
1584 (__v4si)index,
1585 (__v4si)mask,
1586 scale);
1587}
1588
1589extern __inline __m256i
1590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
1592{
1593 __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
1594 __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
1595
1596 return (__m256i) __builtin_ia32_gathersiv8si (src,
1597 base,
1598 (__v8si)index,
1599 mask,
1600 scale);
1601}
1602
1603extern __inline __m256i
1604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1605_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
1606 __m256i index, __m256i mask, const int scale)
1607{
1608 return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
1609 base,
1610 (__v8si)index,
1611 (__v8si)mask,
1612 scale);
1613}
1614
1615extern __inline __m128i
1616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
1618{
1619 __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
1620 __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
1621
1622 return (__m128i) __builtin_ia32_gatherdiv4si (src,
1623 base,
1624 (__v2di)index,
1625 mask,
1626 scale);
1627}
1628
1629extern __inline __m128i
1630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1631_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
1632 __m128i mask, const int scale)
1633{
1634 return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
1635 base,
1636 (__v2di)index,
1637 (__v4si)mask,
1638 scale);
1639}
1640
1641extern __inline __m128i
1642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1643_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
1644{
1645 __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
1646 __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
1647
1648 return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
1649 base,
1650 (__v4di)index,
1651 mask,
1652 scale);
1653}
1654
1655extern __inline __m128i
1656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1657_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
1658 __m256i index, __m128i mask, const int scale)
1659{
1660 return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
1661 base,
1662 (__v4di)index,
1663 (__v4si)mask,
1664 scale);
1665}
1666#else /* __OPTIMIZE__ */
1667#define _mm_i32gather_pd(BASE, INDEX, SCALE) \
1668 (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \
1669 (double const *)BASE, \
1670 (__v4si)(__m128i)INDEX, \
1671 (__v2df)_mm_set1_pd( \
1672 (double)(long long int) -1), \
1673 (int)SCALE)
1674
1675#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1676 (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \
1677 (double const *)BASE, \
1678 (__v4si)(__m128i)INDEX, \
1679 (__v2df)(__m128d)MASK, \
1680 (int)SCALE)
1681
1682#define _mm256_i32gather_pd(BASE, INDEX, SCALE) \
1683 (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \
1684 (double const *)BASE, \
1685 (__v4si)(__m128i)INDEX, \
1686 (__v4df)_mm256_set1_pd( \
1687 (double)(long long int) -1), \
1688 (int)SCALE)
1689
1690#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1691 (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \
1692 (double const *)BASE, \
1693 (__v4si)(__m128i)INDEX, \
1694 (__v4df)(__m256d)MASK, \
1695 (int)SCALE)
1696
1697#define _mm_i64gather_pd(BASE, INDEX, SCALE) \
1698 (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \
1699 (double const *)BASE, \
1700 (__v2di)(__m128i)INDEX, \
1701 (__v2df)_mm_set1_pd( \
1702 (double)(long long int) -1), \
1703 (int)SCALE)
1704
1705#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1706 (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \
1707 (double const *)BASE, \
1708 (__v2di)(__m128i)INDEX, \
1709 (__v2df)(__m128d)MASK, \
1710 (int)SCALE)
1711
1712#define _mm256_i64gather_pd(BASE, INDEX, SCALE) \
1713 (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \
1714 (double const *)BASE, \
1715 (__v4di)(__m256i)INDEX, \
1716 (__v4df)_mm256_set1_pd( \
1717 (double)(long long int) -1), \
1718 (int)SCALE)
1719
1720#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
1721 (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \
1722 (double const *)BASE, \
1723 (__v4di)(__m256i)INDEX, \
1724 (__v4df)(__m256d)MASK, \
1725 (int)SCALE)
1726
1727#define _mm_i32gather_ps(BASE, INDEX, SCALE) \
1728 (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \
1729 (float const *)BASE, \
1730 (__v4si)(__m128i)INDEX, \
1731 _mm_set1_ps ((float)(int) -1), \
1732 (int)SCALE)
1733
1734#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1735 (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \
1736 (float const *)BASE, \
1737 (__v4si)(__m128i)INDEX, \
1738 (__v4sf)(__m128d)MASK, \
1739 (int)SCALE)
1740
1741#define _mm256_i32gather_ps(BASE, INDEX, SCALE) \
1742 (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
1743 (float const *)BASE, \
1744 (__v8si)(__m256i)INDEX, \
1745 (__v8sf)_mm256_set1_ps ( \
1746 (float)(int) -1), \
1747 (int)SCALE)
1748
1749#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1750 (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \
1751 (float const *)BASE, \
1752 (__v8si)(__m256i)INDEX, \
1753 (__v8sf)(__m256d)MASK, \
1754 (int)SCALE)
1755
1756#define _mm_i64gather_ps(BASE, INDEX, SCALE) \
1757 (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \
1758 (float const *)BASE, \
1759 (__v2di)(__m128i)INDEX, \
1760 (__v4sf)_mm_set1_ps ( \
1761 (float)(int) -1), \
1762 (int)SCALE)
1763
1764#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1765 (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \
1766 (float const *)BASE, \
1767 (__v2di)(__m128i)INDEX, \
1768 (__v4sf)(__m128d)MASK, \
1769 (int)SCALE)
1770
1771#define _mm256_i64gather_ps(BASE, INDEX, SCALE) \
1772 (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \
1773 (float const *)BASE, \
1774 (__v4di)(__m256i)INDEX, \
1775 (__v4sf)_mm_set1_ps( \
1776 (float)(int) -1), \
1777 (int)SCALE)
1778
1779#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
1780 (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \
1781 (float const *)BASE, \
1782 (__v4di)(__m256i)INDEX, \
1783 (__v4sf)(__m128)MASK, \
1784 (int)SCALE)
1785
1786#define _mm_i32gather_epi64(BASE, INDEX, SCALE) \
1787 (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
1788 (long long const *)BASE, \
1789 (__v4si)(__m128i)INDEX, \
1790 (__v2di)_mm_set1_epi64x (-1), \
1791 (int)SCALE)
1792
1793#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1794 (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \
1795 (long long const *)BASE, \
1796 (__v4si)(__m128i)INDEX, \
1797 (__v2di)(__m128i)MASK, \
1798 (int)SCALE)
1799
1800#define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \
1801 (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
1802 (long long const *)BASE, \
1803 (__v4si)(__m128i)INDEX, \
1804 (__v4di)_mm256_set1_epi64x (-1), \
1805 (int)SCALE)
1806
1807#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1808 (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \
1809 (long long const *)BASE, \
1810 (__v4si)(__m128i)INDEX, \
1811 (__v4di)(__m256i)MASK, \
1812 (int)SCALE)
1813
1814#define _mm_i64gather_epi64(BASE, INDEX, SCALE) \
1815 (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
1816 (long long const *)BASE, \
1817 (__v2di)(__m128i)INDEX, \
1818 (__v2di)_mm_set1_epi64x (-1), \
1819 (int)SCALE)
1820
1821#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1822 (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \
1823 (long long const *)BASE, \
1824 (__v2di)(__m128i)INDEX, \
1825 (__v2di)(__m128i)MASK, \
1826 (int)SCALE)
1827
1828#define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \
1829 (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
1830 (long long const *)BASE, \
1831 (__v4di)(__m256i)INDEX, \
1832 (__v4di)_mm256_set1_epi64x (-1), \
1833 (int)SCALE)
1834
1835#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
1836 (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \
1837 (long long const *)BASE, \
1838 (__v4di)(__m256i)INDEX, \
1839 (__v4di)(__m256i)MASK, \
1840 (int)SCALE)
1841
1842#define _mm_i32gather_epi32(BASE, INDEX, SCALE) \
1843 (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \
1844 (int const *)BASE, \
1845 (__v4si)(__m128i)INDEX, \
1846 (__v4si)_mm_set1_epi32 (-1), \
1847 (int)SCALE)
1848
1849#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1850 (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \
1851 (int const *)BASE, \
1852 (__v4si)(__m128i)INDEX, \
1853 (__v4si)(__m128i)MASK, \
1854 (int)SCALE)
1855
1856#define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \
1857 (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
1858 (int const *)BASE, \
1859 (__v8si)(__m256i)INDEX, \
1860 (__v8si)_mm256_set1_epi32 (-1), \
1861 (int)SCALE)
1862
1863#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1864 (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \
1865 (int const *)BASE, \
1866 (__v8si)(__m256i)INDEX, \
1867 (__v8si)(__m256i)MASK, \
1868 (int)SCALE)
1869
1870#define _mm_i64gather_epi32(BASE, INDEX, SCALE) \
1871 (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \
1872 (int const *)BASE, \
1873 (__v2di)(__m128i)INDEX, \
1874 (__v4si)_mm_set1_epi32 (-1), \
1875 (int)SCALE)
1876
1877#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1878 (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \
1879 (int const *)BASE, \
1880 (__v2di)(__m128i)INDEX, \
1881 (__v4si)(__m128i)MASK, \
1882 (int)SCALE)
1883
1884#define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \
1885 (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
1886 (int const *)BASE, \
1887 (__v4di)(__m256i)INDEX, \
1888 (__v4si)_mm_set1_epi32(-1), \
1889 (int)SCALE)
1890
1891#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
1892 (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \
1893 (int const *)BASE, \
1894 (__v4di)(__m256i)INDEX, \
1895 (__v4si)(__m128i)MASK, \
1896 (int)SCALE)
1897#endif /* __OPTIMIZE__ */
1898
1899#ifdef __DISABLE_AVX2__
1900#undef __DISABLE_AVX2__
1901#pragma GCC pop_options
1902#endif /* __DISABLE_AVX2__ */
1903
1904#endif /* _AVX2INTRIN_H_INCLUDED */
Note: See TracBrowser for help on using the repository browser.