source: Daodan/MSYS2/mingw32/lib/gcc/i686-w64-mingw32/11.2.0/include/avx512dqintrin.h

Last change on this file was 1166, checked in by rossy, 3 years ago

Daodan: Replace MinGW build env with an up-to-date MSYS2 env

File size: 90.7 KB
RevLine 
[1166]1/* Copyright (C) 2014-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512DQINTRIN_H_INCLUDED
29#define _AVX512DQINTRIN_H_INCLUDED
30
31#ifndef __AVX512DQ__
32#pragma GCC push_options
33#pragma GCC target("avx512dq")
34#define __DISABLE_AVX512DQ__
35#endif /* __AVX512DQ__ */
36
37extern __inline unsigned char
38__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
39_ktest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
40{
41 *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
42 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
43}
44
45extern __inline unsigned char
46__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
47_ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
48{
49 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B);
50}
51
52extern __inline unsigned char
53__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54_ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
55{
56 return (unsigned char) __builtin_ia32_ktestcqi (__A, __B);
57}
58
59extern __inline unsigned char
60__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61_ktest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
62{
63 *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B);
64 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
65}
66
67extern __inline unsigned char
68__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69_ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
70{
71 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B);
72}
73
74extern __inline unsigned char
75__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
76_ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
77{
78 return (unsigned char) __builtin_ia32_ktestchi (__A, __B);
79}
80
81extern __inline unsigned char
82__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83_kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF)
84{
85 *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
86 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
87}
88
89extern __inline unsigned char
90__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91_kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B)
92{
93 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B);
94}
95
96extern __inline unsigned char
97__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
98_kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B)
99{
100 return (unsigned char) __builtin_ia32_kortestcqi (__A, __B);
101}
102
103extern __inline __mmask8
104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
105_kadd_mask8 (__mmask8 __A, __mmask8 __B)
106{
107 return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B);
108}
109
110extern __inline __mmask16
111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
112_kadd_mask16 (__mmask16 __A, __mmask16 __B)
113{
114 return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B);
115}
116
117extern __inline unsigned int
118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119_cvtmask8_u32 (__mmask8 __A)
120{
121 return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A);
122}
123
124extern __inline __mmask8
125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126_cvtu32_mask8 (unsigned int __A)
127{
128 return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A);
129}
130
131extern __inline __mmask8
132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133_load_mask8 (__mmask8 *__A)
134{
135 return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A);
136}
137
138extern __inline void
139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
140_store_mask8 (__mmask8 *__A, __mmask8 __B)
141{
142 *(__mmask8 *) __A = __builtin_ia32_kmovb (__B);
143}
144
145extern __inline __mmask8
146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147_knot_mask8 (__mmask8 __A)
148{
149 return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
150}
151
152extern __inline __mmask8
153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
154_kor_mask8 (__mmask8 __A, __mmask8 __B)
155{
156 return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
157}
158
159extern __inline __mmask8
160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161_kxnor_mask8 (__mmask8 __A, __mmask8 __B)
162{
163 return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
164}
165
166extern __inline __mmask8
167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
168_kxor_mask8 (__mmask8 __A, __mmask8 __B)
169{
170 return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
171}
172
173extern __inline __mmask8
174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175_kand_mask8 (__mmask8 __A, __mmask8 __B)
176{
177 return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
178}
179
180extern __inline __mmask8
181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182_kandn_mask8 (__mmask8 __A, __mmask8 __B)
183{
184 return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
185}
186
187extern __inline __m512d
188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
189_mm512_broadcast_f64x2 (__m128d __A)
190{
191 return (__m512d)
192 __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
193 _mm512_undefined_pd (),
194 (__mmask8) -1);
195}
196
197extern __inline __m512d
198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
200{
201 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
202 __A,
203 (__v8df)
204 __O, __M);
205}
206
207extern __inline __m512d
208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
209_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
210{
211 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
212 __A,
213 (__v8df)
214 _mm512_setzero_ps (),
215 __M);
216}
217
218extern __inline __m512i
219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220_mm512_broadcast_i64x2 (__m128i __A)
221{
222 return (__m512i)
223 __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
224 _mm512_undefined_epi32 (),
225 (__mmask8) -1);
226}
227
228extern __inline __m512i
229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
231{
232 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
233 __A,
234 (__v8di)
235 __O, __M);
236}
237
238extern __inline __m512i
239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
240_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
241{
242 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
243 __A,
244 (__v8di)
245 _mm512_setzero_si512 (),
246 __M);
247}
248
249extern __inline __m512
250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
251_mm512_broadcast_f32x2 (__m128 __A)
252{
253 return (__m512)
254 __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
255 (__v16sf)_mm512_undefined_ps (),
256 (__mmask16) -1);
257}
258
259extern __inline __m512
260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
261_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
262{
263 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
264 (__v16sf)
265 __O, __M);
266}
267
268extern __inline __m512
269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
270_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
271{
272 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
273 (__v16sf)
274 _mm512_setzero_ps (),
275 __M);
276}
277
278extern __inline __m512i
279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280_mm512_broadcast_i32x2 (__m128i __A)
281{
282 return (__m512i)
283 __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
284 (__v16si)
285 _mm512_undefined_epi32 (),
286 (__mmask16) -1);
287}
288
289extern __inline __m512i
290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
291_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
292{
293 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
294 __A,
295 (__v16si)
296 __O, __M);
297}
298
299extern __inline __m512i
300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
302{
303 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
304 __A,
305 (__v16si)
306 _mm512_setzero_si512 (),
307 __M);
308}
309
310extern __inline __m512
311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
312_mm512_broadcast_f32x8 (__m256 __A)
313{
314 return (__m512)
315 __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
316 _mm512_undefined_ps (),
317 (__mmask16) -1);
318}
319
320extern __inline __m512
321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
323{
324 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
325 (__v16sf)__O,
326 __M);
327}
328
329extern __inline __m512
330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
331_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
332{
333 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
334 (__v16sf)
335 _mm512_setzero_ps (),
336 __M);
337}
338
339extern __inline __m512i
340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341_mm512_broadcast_i32x8 (__m256i __A)
342{
343 return (__m512i)
344 __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
345 (__v16si)
346 _mm512_undefined_epi32 (),
347 (__mmask16) -1);
348}
349
350extern __inline __m512i
351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
353{
354 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
355 __A,
356 (__v16si)__O,
357 __M);
358}
359
360extern __inline __m512i
361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
362_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
363{
364 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
365 __A,
366 (__v16si)
367 _mm512_setzero_si512 (),
368 __M);
369}
370
371extern __inline __m512i
372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
373_mm512_mullo_epi64 (__m512i __A, __m512i __B)
374{
375 return (__m512i) ((__v8du) __A * (__v8du) __B);
376}
377
378extern __inline __m512i
379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
380_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
381 __m512i __B)
382{
383 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
384 (__v8di) __B,
385 (__v8di) __W,
386 (__mmask8) __U);
387}
388
389extern __inline __m512i
390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
392{
393 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
394 (__v8di) __B,
395 (__v8di)
396 _mm512_setzero_si512 (),
397 (__mmask8) __U);
398}
399
400extern __inline __m512d
401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402_mm512_xor_pd (__m512d __A, __m512d __B)
403{
404 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
405 (__v8df) __B,
406 (__v8df)
407 _mm512_setzero_pd (),
408 (__mmask8) -1);
409}
410
411extern __inline __m512d
412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
413_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
414 __m512d __B)
415{
416 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
417 (__v8df) __B,
418 (__v8df) __W,
419 (__mmask8) __U);
420}
421
422extern __inline __m512d
423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
424_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
425{
426 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
427 (__v8df) __B,
428 (__v8df)
429 _mm512_setzero_pd (),
430 (__mmask8) __U);
431}
432
433extern __inline __m512
434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435_mm512_xor_ps (__m512 __A, __m512 __B)
436{
437 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
438 (__v16sf) __B,
439 (__v16sf)
440 _mm512_setzero_ps (),
441 (__mmask16) -1);
442}
443
444extern __inline __m512
445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
447{
448 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
449 (__v16sf) __B,
450 (__v16sf) __W,
451 (__mmask16) __U);
452}
453
454extern __inline __m512
455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
456_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
457{
458 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
459 (__v16sf) __B,
460 (__v16sf)
461 _mm512_setzero_ps (),
462 (__mmask16) __U);
463}
464
465extern __inline __m512d
466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467_mm512_or_pd (__m512d __A, __m512d __B)
468{
469 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
470 (__v8df) __B,
471 (__v8df)
472 _mm512_setzero_pd (),
473 (__mmask8) -1);
474}
475
476extern __inline __m512d
477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
478_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
479{
480 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
481 (__v8df) __B,
482 (__v8df) __W,
483 (__mmask8) __U);
484}
485
486extern __inline __m512d
487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
489{
490 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
491 (__v8df) __B,
492 (__v8df)
493 _mm512_setzero_pd (),
494 (__mmask8) __U);
495}
496
497extern __inline __m512
498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
499_mm512_or_ps (__m512 __A, __m512 __B)
500{
501 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
502 (__v16sf) __B,
503 (__v16sf)
504 _mm512_setzero_ps (),
505 (__mmask16) -1);
506}
507
508extern __inline __m512
509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
510_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
511{
512 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
513 (__v16sf) __B,
514 (__v16sf) __W,
515 (__mmask16) __U);
516}
517
518extern __inline __m512
519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
521{
522 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
523 (__v16sf) __B,
524 (__v16sf)
525 _mm512_setzero_ps (),
526 (__mmask16) __U);
527}
528
529extern __inline __m512d
530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
531_mm512_and_pd (__m512d __A, __m512d __B)
532{
533 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
534 (__v8df) __B,
535 (__v8df)
536 _mm512_setzero_pd (),
537 (__mmask8) -1);
538}
539
540extern __inline __m512d
541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
542_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
543 __m512d __B)
544{
545 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
546 (__v8df) __B,
547 (__v8df) __W,
548 (__mmask8) __U);
549}
550
551extern __inline __m512d
552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
553_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
554{
555 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
556 (__v8df) __B,
557 (__v8df)
558 _mm512_setzero_pd (),
559 (__mmask8) __U);
560}
561
562extern __inline __m512
563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564_mm512_and_ps (__m512 __A, __m512 __B)
565{
566 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
567 (__v16sf) __B,
568 (__v16sf)
569 _mm512_setzero_ps (),
570 (__mmask16) -1);
571}
572
573extern __inline __m512
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
576{
577 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
578 (__v16sf) __B,
579 (__v16sf) __W,
580 (__mmask16) __U);
581}
582
583extern __inline __m512
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
586{
587 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
588 (__v16sf) __B,
589 (__v16sf)
590 _mm512_setzero_ps (),
591 (__mmask16) __U);
592}
593
594extern __inline __m512d
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm512_andnot_pd (__m512d __A, __m512d __B)
597{
598 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
599 (__v8df) __B,
600 (__v8df)
601 _mm512_setzero_pd (),
602 (__mmask8) -1);
603}
604
605extern __inline __m512d
606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
607_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
608 __m512d __B)
609{
610 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
611 (__v8df) __B,
612 (__v8df) __W,
613 (__mmask8) __U);
614}
615
616extern __inline __m512d
617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
619{
620 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
621 (__v8df) __B,
622 (__v8df)
623 _mm512_setzero_pd (),
624 (__mmask8) __U);
625}
626
627extern __inline __m512
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm512_andnot_ps (__m512 __A, __m512 __B)
630{
631 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
632 (__v16sf) __B,
633 (__v16sf)
634 _mm512_setzero_ps (),
635 (__mmask16) -1);
636}
637
638extern __inline __m512
639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
640_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
641 __m512 __B)
642{
643 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
644 (__v16sf) __B,
645 (__v16sf) __W,
646 (__mmask16) __U);
647}
648
649extern __inline __m512
650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
651_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
652{
653 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
654 (__v16sf) __B,
655 (__v16sf)
656 _mm512_setzero_ps (),
657 (__mmask16) __U);
658}
659
660extern __inline __mmask16
661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662_mm512_movepi32_mask (__m512i __A)
663{
664 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
665}
666
667extern __inline __mmask8
668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669_mm512_movepi64_mask (__m512i __A)
670{
671 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
672}
673
674extern __inline __m512i
675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
676_mm512_movm_epi32 (__mmask16 __A)
677{
678 return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
679}
680
681extern __inline __m512i
682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683_mm512_movm_epi64 (__mmask8 __A)
684{
685 return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
686}
687
688extern __inline __m512i
689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
690_mm512_cvttpd_epi64 (__m512d __A)
691{
692 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
693 (__v8di)
694 _mm512_setzero_si512 (),
695 (__mmask8) -1,
696 _MM_FROUND_CUR_DIRECTION);
697}
698
699extern __inline __m512i
700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
701_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
702{
703 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
704 (__v8di) __W,
705 (__mmask8) __U,
706 _MM_FROUND_CUR_DIRECTION);
707}
708
709extern __inline __m512i
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
712{
713 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
714 (__v8di)
715 _mm512_setzero_si512 (),
716 (__mmask8) __U,
717 _MM_FROUND_CUR_DIRECTION);
718}
719
720extern __inline __m512i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm512_cvttpd_epu64 (__m512d __A)
723{
724 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
725 (__v8di)
726 _mm512_setzero_si512 (),
727 (__mmask8) -1,
728 _MM_FROUND_CUR_DIRECTION);
729}
730
731extern __inline __m512i
732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
734{
735 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
736 (__v8di) __W,
737 (__mmask8) __U,
738 _MM_FROUND_CUR_DIRECTION);
739}
740
741extern __inline __m512i
742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
743_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
744{
745 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
746 (__v8di)
747 _mm512_setzero_si512 (),
748 (__mmask8) __U,
749 _MM_FROUND_CUR_DIRECTION);
750}
751
752extern __inline __m512i
753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
754_mm512_cvttps_epi64 (__m256 __A)
755{
756 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
757 (__v8di)
758 _mm512_setzero_si512 (),
759 (__mmask8) -1,
760 _MM_FROUND_CUR_DIRECTION);
761}
762
763extern __inline __m512i
764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
765_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
766{
767 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
768 (__v8di) __W,
769 (__mmask8) __U,
770 _MM_FROUND_CUR_DIRECTION);
771}
772
773extern __inline __m512i
774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
775_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
776{
777 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
778 (__v8di)
779 _mm512_setzero_si512 (),
780 (__mmask8) __U,
781 _MM_FROUND_CUR_DIRECTION);
782}
783
784extern __inline __m512i
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm512_cvttps_epu64 (__m256 __A)
787{
788 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
789 (__v8di)
790 _mm512_setzero_si512 (),
791 (__mmask8) -1,
792 _MM_FROUND_CUR_DIRECTION);
793}
794
795extern __inline __m512i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
798{
799 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
800 (__v8di) __W,
801 (__mmask8) __U,
802 _MM_FROUND_CUR_DIRECTION);
803}
804
805extern __inline __m512i
806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
808{
809 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
810 (__v8di)
811 _mm512_setzero_si512 (),
812 (__mmask8) __U,
813 _MM_FROUND_CUR_DIRECTION);
814}
815
816extern __inline __m512i
817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
818_mm512_cvtpd_epi64 (__m512d __A)
819{
820 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
821 (__v8di)
822 _mm512_setzero_si512 (),
823 (__mmask8) -1,
824 _MM_FROUND_CUR_DIRECTION);
825}
826
827extern __inline __m512i
828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
830{
831 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
832 (__v8di) __W,
833 (__mmask8) __U,
834 _MM_FROUND_CUR_DIRECTION);
835}
836
837extern __inline __m512i
838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
840{
841 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
842 (__v8di)
843 _mm512_setzero_si512 (),
844 (__mmask8) __U,
845 _MM_FROUND_CUR_DIRECTION);
846}
847
848extern __inline __m512i
849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
850_mm512_cvtpd_epu64 (__m512d __A)
851{
852 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
853 (__v8di)
854 _mm512_setzero_si512 (),
855 (__mmask8) -1,
856 _MM_FROUND_CUR_DIRECTION);
857}
858
859extern __inline __m512i
860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
862{
863 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
864 (__v8di) __W,
865 (__mmask8) __U,
866 _MM_FROUND_CUR_DIRECTION);
867}
868
869extern __inline __m512i
870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
872{
873 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
874 (__v8di)
875 _mm512_setzero_si512 (),
876 (__mmask8) __U,
877 _MM_FROUND_CUR_DIRECTION);
878}
879
880extern __inline __m512i
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm512_cvtps_epi64 (__m256 __A)
883{
884 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
885 (__v8di)
886 _mm512_setzero_si512 (),
887 (__mmask8) -1,
888 _MM_FROUND_CUR_DIRECTION);
889}
890
891extern __inline __m512i
892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
894{
895 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
896 (__v8di) __W,
897 (__mmask8) __U,
898 _MM_FROUND_CUR_DIRECTION);
899}
900
901extern __inline __m512i
902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
903_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
904{
905 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
906 (__v8di)
907 _mm512_setzero_si512 (),
908 (__mmask8) __U,
909 _MM_FROUND_CUR_DIRECTION);
910}
911
912extern __inline __m512i
913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914_mm512_cvtps_epu64 (__m256 __A)
915{
916 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
917 (__v8di)
918 _mm512_setzero_si512 (),
919 (__mmask8) -1,
920 _MM_FROUND_CUR_DIRECTION);
921}
922
923extern __inline __m512i
924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
926{
927 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
928 (__v8di) __W,
929 (__mmask8) __U,
930 _MM_FROUND_CUR_DIRECTION);
931}
932
933extern __inline __m512i
934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
935_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
936{
937 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
938 (__v8di)
939 _mm512_setzero_si512 (),
940 (__mmask8) __U,
941 _MM_FROUND_CUR_DIRECTION);
942}
943
944extern __inline __m256
945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
946_mm512_cvtepi64_ps (__m512i __A)
947{
948 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
949 (__v8sf)
950 _mm256_setzero_ps (),
951 (__mmask8) -1,
952 _MM_FROUND_CUR_DIRECTION);
953}
954
955extern __inline __m256
956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
958{
959 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
960 (__v8sf) __W,
961 (__mmask8) __U,
962 _MM_FROUND_CUR_DIRECTION);
963}
964
965extern __inline __m256
966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
967_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
968{
969 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
970 (__v8sf)
971 _mm256_setzero_ps (),
972 (__mmask8) __U,
973 _MM_FROUND_CUR_DIRECTION);
974}
975
976extern __inline __m256
977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978_mm512_cvtepu64_ps (__m512i __A)
979{
980 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
981 (__v8sf)
982 _mm256_setzero_ps (),
983 (__mmask8) -1,
984 _MM_FROUND_CUR_DIRECTION);
985}
986
987extern __inline __m256
988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
989_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
990{
991 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
992 (__v8sf) __W,
993 (__mmask8) __U,
994 _MM_FROUND_CUR_DIRECTION);
995}
996
997extern __inline __m256
998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
999_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
1000{
1001 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1002 (__v8sf)
1003 _mm256_setzero_ps (),
1004 (__mmask8) __U,
1005 _MM_FROUND_CUR_DIRECTION);
1006}
1007
1008extern __inline __m512d
1009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1010_mm512_cvtepi64_pd (__m512i __A)
1011{
1012 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1013 (__v8df)
1014 _mm512_setzero_pd (),
1015 (__mmask8) -1,
1016 _MM_FROUND_CUR_DIRECTION);
1017}
1018
1019extern __inline __m512d
1020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1021_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1022{
1023 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1024 (__v8df) __W,
1025 (__mmask8) __U,
1026 _MM_FROUND_CUR_DIRECTION);
1027}
1028
1029extern __inline __m512d
1030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
1032{
1033 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1034 (__v8df)
1035 _mm512_setzero_pd (),
1036 (__mmask8) __U,
1037 _MM_FROUND_CUR_DIRECTION);
1038}
1039
1040extern __inline __m512d
1041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042_mm512_cvtepu64_pd (__m512i __A)
1043{
1044 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1045 (__v8df)
1046 _mm512_setzero_pd (),
1047 (__mmask8) -1,
1048 _MM_FROUND_CUR_DIRECTION);
1049}
1050
1051extern __inline __m512d
1052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
1054{
1055 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1056 (__v8df) __W,
1057 (__mmask8) __U,
1058 _MM_FROUND_CUR_DIRECTION);
1059}
1060
1061extern __inline __m512d
1062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1063_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
1064{
1065 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1066 (__v8df)
1067 _mm512_setzero_pd (),
1068 (__mmask8) __U,
1069 _MM_FROUND_CUR_DIRECTION);
1070}
1071
1072#ifdef __OPTIMIZE__
1073extern __inline __mmask8
1074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075_kshiftli_mask8 (__mmask8 __A, unsigned int __B)
1076{
1077 return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B);
1078}
1079
1080extern __inline __mmask8
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_kshiftri_mask8 (__mmask8 __A, unsigned int __B)
1083{
1084 return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B);
1085}
1086
1087extern __inline __m512d
1088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1089_mm512_range_pd (__m512d __A, __m512d __B, int __C)
1090{
1091 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1092 (__v8df) __B, __C,
1093 (__v8df)
1094 _mm512_setzero_pd (),
1095 (__mmask8) -1,
1096 _MM_FROUND_CUR_DIRECTION);
1097}
1098
1099extern __inline __m512d
1100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101_mm512_mask_range_pd (__m512d __W, __mmask8 __U,
1102 __m512d __A, __m512d __B, int __C)
1103{
1104 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1105 (__v8df) __B, __C,
1106 (__v8df) __W,
1107 (__mmask8) __U,
1108 _MM_FROUND_CUR_DIRECTION);
1109}
1110
1111extern __inline __m512d
1112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1113_mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
1114{
1115 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
1116 (__v8df) __B, __C,
1117 (__v8df)
1118 _mm512_setzero_pd (),
1119 (__mmask8) __U,
1120 _MM_FROUND_CUR_DIRECTION);
1121}
1122
1123extern __inline __m512
1124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1125_mm512_range_ps (__m512 __A, __m512 __B, int __C)
1126{
1127 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1128 (__v16sf) __B, __C,
1129 (__v16sf)
1130 _mm512_setzero_ps (),
1131 (__mmask16) -1,
1132 _MM_FROUND_CUR_DIRECTION);
1133}
1134
1135extern __inline __m512
1136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1137_mm512_mask_range_ps (__m512 __W, __mmask16 __U,
1138 __m512 __A, __m512 __B, int __C)
1139{
1140 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1141 (__v16sf) __B, __C,
1142 (__v16sf) __W,
1143 (__mmask16) __U,
1144 _MM_FROUND_CUR_DIRECTION);
1145}
1146
1147extern __inline __m512
1148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149_mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
1150{
1151 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
1152 (__v16sf) __B, __C,
1153 (__v16sf)
1154 _mm512_setzero_ps (),
1155 (__mmask16) __U,
1156 _MM_FROUND_CUR_DIRECTION);
1157}
1158
1159extern __inline __m128d
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm_reduce_sd (__m128d __A, __m128d __B, int __C)
1162{
1163 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1164 (__v2df) __B, __C,
1165 (__v2df) _mm_setzero_pd (),
1166 (__mmask8) -1);
1167}
1168
1169extern __inline __m128d
1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171_mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1172{
1173 return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1174 (__v2df) __B, __C,
1175 (__v2df)
1176 _mm_setzero_pd (),
1177 (__mmask8) -1, __R);
1178}
1179
1180extern __inline __m128d
1181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182_mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A,
1183 __m128d __B, int __C)
1184{
1185 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1186 (__v2df) __B, __C,
1187 (__v2df) __W,
1188 (__mmask8) __U);
1189}
1190
1191extern __inline __m128d
1192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1193_mm_mask_reduce_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1194 __m128d __B, int __C, const int __R)
1195{
1196 return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1197 (__v2df) __B, __C,
1198 (__v2df) __W,
1199 __U, __R);
1200}
1201
1202extern __inline __m128d
1203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1204_mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1205{
1206 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
1207 (__v2df) __B, __C,
1208 (__v2df) _mm_setzero_pd (),
1209 (__mmask8) __U);
1210}
1211
1212extern __inline __m128d
1213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214_mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1215 int __C, const int __R)
1216{
1217 return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
1218 (__v2df) __B, __C,
1219 (__v2df)
1220 _mm_setzero_pd (),
1221 __U, __R);
1222}
1223
1224extern __inline __m128
1225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1226_mm_reduce_ss (__m128 __A, __m128 __B, int __C)
1227{
1228 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1229 (__v4sf) __B, __C,
1230 (__v4sf) _mm_setzero_ps (),
1231 (__mmask8) -1);
1232}
1233
1234extern __inline __m128
1235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236_mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1237{
1238 return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1239 (__v4sf) __B, __C,
1240 (__v4sf)
1241 _mm_setzero_ps (),
1242 (__mmask8) -1, __R);
1243}
1244
1245extern __inline __m128
1246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247_mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A,
1248 __m128 __B, int __C)
1249{
1250 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1251 (__v4sf) __B, __C,
1252 (__v4sf) __W,
1253 (__mmask8) __U);
1254}
1255
1256extern __inline __m128
1257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1258_mm_mask_reduce_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1259 __m128 __B, int __C, const int __R)
1260{
1261 return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1262 (__v4sf) __B, __C,
1263 (__v4sf) __W,
1264 __U, __R);
1265}
1266
1267extern __inline __m128
1268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1269_mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1270{
1271 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
1272 (__v4sf) __B, __C,
1273 (__v4sf) _mm_setzero_ps (),
1274 (__mmask8) __U);
1275}
1276
1277extern __inline __m128
1278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1279_mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1280 int __C, const int __R)
1281{
1282 return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
1283 (__v4sf) __B, __C,
1284 (__v4sf)
1285 _mm_setzero_ps (),
1286 __U, __R);
1287}
1288
1289extern __inline __m128d
1290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1291_mm_range_sd (__m128d __A, __m128d __B, int __C)
1292{
1293 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1294 (__v2df) __B, __C,
1295 (__v2df)
1296 _mm_setzero_pd (),
1297 (__mmask8) -1,
1298 _MM_FROUND_CUR_DIRECTION);
1299}
1300
1301extern __inline __m128d
1302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1303_mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C)
1304{
1305 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1306 (__v2df) __B, __C,
1307 (__v2df) __W,
1308 (__mmask8) __U,
1309 _MM_FROUND_CUR_DIRECTION);
1310}
1311
1312extern __inline __m128d
1313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314_mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
1315{
1316 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1317 (__v2df) __B, __C,
1318 (__v2df)
1319 _mm_setzero_pd (),
1320 (__mmask8) __U,
1321 _MM_FROUND_CUR_DIRECTION);
1322}
1323
1324extern __inline __m128
1325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326_mm_range_ss (__m128 __A, __m128 __B, int __C)
1327{
1328 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1329 (__v4sf) __B, __C,
1330 (__v4sf)
1331 _mm_setzero_ps (),
1332 (__mmask8) -1,
1333 _MM_FROUND_CUR_DIRECTION);
1334}
1335
1336extern __inline __m128
1337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1338_mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C)
1339{
1340 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1341 (__v4sf) __B, __C,
1342 (__v4sf) __W,
1343 (__mmask8) __U,
1344 _MM_FROUND_CUR_DIRECTION);
1345}
1346
1347
1348extern __inline __m128
1349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350_mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
1351{
1352 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1353 (__v4sf) __B, __C,
1354 (__v4sf)
1355 _mm_setzero_ps (),
1356 (__mmask8) __U,
1357 _MM_FROUND_CUR_DIRECTION);
1358}
1359
1360extern __inline __m128d
1361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362_mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
1363{
1364 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1365 (__v2df) __B, __C,
1366 (__v2df)
1367 _mm_setzero_pd (),
1368 (__mmask8) -1, __R);
1369}
1370
1371extern __inline __m128d
1372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1373_mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
1374 int __C, const int __R)
1375{
1376 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1377 (__v2df) __B, __C,
1378 (__v2df) __W,
1379 (__mmask8) __U, __R);
1380}
1381
1382extern __inline __m128d
1383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1384_mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C,
1385 const int __R)
1386{
1387 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A,
1388 (__v2df) __B, __C,
1389 (__v2df)
1390 _mm_setzero_pd (),
1391 (__mmask8) __U, __R);
1392}
1393
1394extern __inline __m128
1395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1396_mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
1397{
1398 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1399 (__v4sf) __B, __C,
1400 (__v4sf)
1401 _mm_setzero_ps (),
1402 (__mmask8) -1, __R);
1403}
1404
1405extern __inline __m128
1406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1407_mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
1408 int __C, const int __R)
1409{
1410 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1411 (__v4sf) __B, __C,
1412 (__v4sf) __W,
1413 (__mmask8) __U, __R);
1414}
1415
1416extern __inline __m128
1417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1418_mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C,
1419 const int __R)
1420{
1421 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A,
1422 (__v4sf) __B, __C,
1423 (__v4sf)
1424 _mm_setzero_ps (),
1425 (__mmask8) __U, __R);
1426}
1427
1428extern __inline __mmask8
1429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1430_mm_fpclass_ss_mask (__m128 __A, const int __imm)
1431{
1432 return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm,
1433 (__mmask8) -1);
1434}
1435
1436extern __inline __mmask8
1437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438_mm_fpclass_sd_mask (__m128d __A, const int __imm)
1439{
1440 return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm,
1441 (__mmask8) -1);
1442}
1443
1444extern __inline __mmask8
1445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm)
1447{
1448 return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U);
1449}
1450
1451extern __inline __mmask8
1452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1453_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
1454{
1455 return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U);
1456}
1457
1458extern __inline __m512i
1459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460_mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
1461{
1462 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1463 (__v8di)
1464 _mm512_setzero_si512 (),
1465 (__mmask8) -1,
1466 __R);
1467}
1468
1469extern __inline __m512i
1470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471_mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1472 const int __R)
1473{
1474 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1475 (__v8di) __W,
1476 (__mmask8) __U,
1477 __R);
1478}
1479
1480extern __inline __m512i
1481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1482_mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1483 const int __R)
1484{
1485 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
1486 (__v8di)
1487 _mm512_setzero_si512 (),
1488 (__mmask8) __U,
1489 __R);
1490}
1491
1492extern __inline __m512i
1493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1494_mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
1495{
1496 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1497 (__v8di)
1498 _mm512_setzero_si512 (),
1499 (__mmask8) -1,
1500 __R);
1501}
1502
1503extern __inline __m512i
1504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1505_mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1506 const int __R)
1507{
1508 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1509 (__v8di) __W,
1510 (__mmask8) __U,
1511 __R);
1512}
1513
1514extern __inline __m512i
1515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516_mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1517 const int __R)
1518{
1519 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
1520 (__v8di)
1521 _mm512_setzero_si512 (),
1522 (__mmask8) __U,
1523 __R);
1524}
1525
1526extern __inline __m512i
1527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528_mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
1529{
1530 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1531 (__v8di)
1532 _mm512_setzero_si512 (),
1533 (__mmask8) -1,
1534 __R);
1535}
1536
1537extern __inline __m512i
1538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1539_mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1540 const int __R)
1541{
1542 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1543 (__v8di) __W,
1544 (__mmask8) __U,
1545 __R);
1546}
1547
1548extern __inline __m512i
1549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550_mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
1551 const int __R)
1552{
1553 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
1554 (__v8di)
1555 _mm512_setzero_si512 (),
1556 (__mmask8) __U,
1557 __R);
1558}
1559
1560extern __inline __m512i
1561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1562_mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
1563{
1564 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1565 (__v8di)
1566 _mm512_setzero_si512 (),
1567 (__mmask8) -1,
1568 __R);
1569}
1570
1571extern __inline __m512i
1572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1573_mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1574 const int __R)
1575{
1576 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1577 (__v8di) __W,
1578 (__mmask8) __U,
1579 __R);
1580}
1581
1582extern __inline __m512i
1583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584_mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
1585 const int __R)
1586{
1587 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
1588 (__v8di)
1589 _mm512_setzero_si512 (),
1590 (__mmask8) __U,
1591 __R);
1592}
1593
1594extern __inline __m512i
1595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1596_mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
1597{
1598 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1599 (__v8di)
1600 _mm512_setzero_si512 (),
1601 (__mmask8) -1,
1602 __R);
1603}
1604
1605extern __inline __m512i
1606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1607_mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
1608 const int __R)
1609{
1610 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1611 (__v8di) __W,
1612 (__mmask8) __U,
1613 __R);
1614}
1615
1616extern __inline __m512i
1617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1618_mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
1619 const int __R)
1620{
1621 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
1622 (__v8di)
1623 _mm512_setzero_si512 (),
1624 (__mmask8) __U,
1625 __R);
1626}
1627
1628extern __inline __m512i
1629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630_mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
1631{
1632 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1633 (__v8di)
1634 _mm512_setzero_si512 (),
1635 (__mmask8) -1,
1636 __R);
1637}
1638
1639extern __inline __m512i
1640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641_mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
1642 const int __R)
1643{
1644 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1645 (__v8di) __W,
1646 (__mmask8) __U,
1647 __R);
1648}
1649
1650extern __inline __m512i
1651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652_mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
1653 const int __R)
1654{
1655 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
1656 (__v8di)
1657 _mm512_setzero_si512 (),
1658 (__mmask8) __U,
1659 __R);
1660}
1661
1662extern __inline __m512i
1663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664_mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
1665{
1666 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1667 (__v8di)
1668 _mm512_setzero_si512 (),
1669 (__mmask8) -1,
1670 __R);
1671}
1672
1673extern __inline __m512i
1674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675_mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
1676 const int __R)
1677{
1678 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1679 (__v8di) __W,
1680 (__mmask8) __U,
1681 __R);
1682}
1683
1684extern __inline __m512i
1685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1686_mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
1687 const int __R)
1688{
1689 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
1690 (__v8di)
1691 _mm512_setzero_si512 (),
1692 (__mmask8) __U,
1693 __R);
1694}
1695
1696extern __inline __m512i
1697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698_mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
1699{
1700 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1701 (__v8di)
1702 _mm512_setzero_si512 (),
1703 (__mmask8) -1,
1704 __R);
1705}
1706
1707extern __inline __m512i
1708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1709_mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
1710 const int __R)
1711{
1712 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1713 (__v8di) __W,
1714 (__mmask8) __U,
1715 __R);
1716}
1717
1718extern __inline __m512i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
1721 const int __R)
1722{
1723 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
1724 (__v8di)
1725 _mm512_setzero_si512 (),
1726 (__mmask8) __U,
1727 __R);
1728}
1729
1730extern __inline __m256
1731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1732_mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
1733{
1734 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1735 (__v8sf)
1736 _mm256_setzero_ps (),
1737 (__mmask8) -1,
1738 __R);
1739}
1740
1741extern __inline __m256
1742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743_mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1744 const int __R)
1745{
1746 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1747 (__v8sf) __W,
1748 (__mmask8) __U,
1749 __R);
1750}
1751
1752extern __inline __m256
1753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1754_mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
1755 const int __R)
1756{
1757 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
1758 (__v8sf)
1759 _mm256_setzero_ps (),
1760 (__mmask8) __U,
1761 __R);
1762}
1763
1764extern __inline __m256
1765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766_mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
1767{
1768 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1769 (__v8sf)
1770 _mm256_setzero_ps (),
1771 (__mmask8) -1,
1772 __R);
1773}
1774
1775extern __inline __m256
1776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777_mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
1778 const int __R)
1779{
1780 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1781 (__v8sf) __W,
1782 (__mmask8) __U,
1783 __R);
1784}
1785
1786extern __inline __m256
1787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1788_mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
1789 const int __R)
1790{
1791 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
1792 (__v8sf)
1793 _mm256_setzero_ps (),
1794 (__mmask8) __U,
1795 __R);
1796}
1797
1798extern __inline __m512d
1799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800_mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
1801{
1802 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1803 (__v8df)
1804 _mm512_setzero_pd (),
1805 (__mmask8) -1,
1806 __R);
1807}
1808
1809extern __inline __m512d
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1812 const int __R)
1813{
1814 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1815 (__v8df) __W,
1816 (__mmask8) __U,
1817 __R);
1818}
1819
1820extern __inline __m512d
1821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1822_mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
1823 const int __R)
1824{
1825 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
1826 (__v8df)
1827 _mm512_setzero_pd (),
1828 (__mmask8) __U,
1829 __R);
1830}
1831
1832extern __inline __m512d
1833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1834_mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
1835{
1836 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1837 (__v8df)
1838 _mm512_setzero_pd (),
1839 (__mmask8) -1,
1840 __R);
1841}
1842
1843extern __inline __m512d
1844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845_mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
1846 const int __R)
1847{
1848 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1849 (__v8df) __W,
1850 (__mmask8) __U,
1851 __R);
1852}
1853
1854extern __inline __m512d
1855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856_mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
1857 const int __R)
1858{
1859 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
1860 (__v8df)
1861 _mm512_setzero_pd (),
1862 (__mmask8) __U,
1863 __R);
1864}
1865
1866extern __inline __m512d
1867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1868_mm512_reduce_pd (__m512d __A, int __B)
1869{
1870 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1871 (__v8df)
1872 _mm512_setzero_pd (),
1873 (__mmask8) -1);
1874}
1875
1876extern __inline __m512d
1877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1878_mm512_reduce_round_pd (__m512d __A, int __B, const int __R)
1879{
1880 return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1881 __B,
1882 (__v8df)
1883 _mm512_setzero_pd (),
1884 (__mmask8) -1, __R);
1885}
1886
1887extern __inline __m512d
1888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889_mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
1890{
1891 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1892 (__v8df) __W,
1893 (__mmask8) __U);
1894}
1895
1896extern __inline __m512d
1897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1898_mm512_mask_reduce_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1899 int __B, const int __R)
1900{
1901 return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1902 __B,
1903 (__v8df) __W,
1904 __U, __R);
1905}
1906
1907extern __inline __m512d
1908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909_mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
1910{
1911 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
1912 (__v8df)
1913 _mm512_setzero_pd (),
1914 (__mmask8) __U);
1915}
1916
1917extern __inline __m512d
1918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919_mm512_maskz_reduce_round_pd (__mmask8 __U, __m512d __A, int __B,
1920 const int __R)
1921{
1922 return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
1923 __B,
1924 (__v8df)
1925 _mm512_setzero_pd (),
1926 __U, __R);
1927}
1928
1929extern __inline __m512
1930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931_mm512_reduce_ps (__m512 __A, int __B)
1932{
1933 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1934 (__v16sf)
1935 _mm512_setzero_ps (),
1936 (__mmask16) -1);
1937}
1938
1939extern __inline __m512
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm512_reduce_round_ps (__m512 __A, int __B, const int __R)
1942{
1943 return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1944 __B,
1945 (__v16sf)
1946 _mm512_setzero_ps (),
1947 (__mmask16) -1, __R);
1948}
1949
1950extern __inline __m512
1951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1952_mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
1953{
1954 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1955 (__v16sf) __W,
1956 (__mmask16) __U);
1957}
1958
1959extern __inline __m512
1960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961_mm512_mask_reduce_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B,
1962 const int __R)
1963{
1964 return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1965 __B,
1966 (__v16sf) __W,
1967 __U, __R);
1968}
1969
1970extern __inline __m512
1971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1972_mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
1973{
1974 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
1975 (__v16sf)
1976 _mm512_setzero_ps (),
1977 (__mmask16) __U);
1978}
1979
1980extern __inline __m512
1981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1982_mm512_maskz_reduce_round_ps (__mmask16 __U, __m512 __A, int __B,
1983 const int __R)
1984{
1985 return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
1986 __B,
1987 (__v16sf)
1988 _mm512_setzero_ps (),
1989 __U, __R);
1990}
1991
1992extern __inline __m256
1993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994_mm512_extractf32x8_ps (__m512 __A, const int __imm)
1995{
1996 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
1997 __imm,
1998 (__v8sf)
1999 _mm256_setzero_ps (),
2000 (__mmask8) -1);
2001}
2002
2003extern __inline __m256
2004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2005_mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
2006 const int __imm)
2007{
2008 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
2009 __imm,
2010 (__v8sf) __W,
2011 (__mmask8) __U);
2012}
2013
2014extern __inline __m256
2015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2016_mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
2017 const int __imm)
2018{
2019 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
2020 __imm,
2021 (__v8sf)
2022 _mm256_setzero_ps (),
2023 (__mmask8) __U);
2024}
2025
2026extern __inline __m128d
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm512_extractf64x2_pd (__m512d __A, const int __imm)
2029{
2030 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2031 __imm,
2032 (__v2df)
2033 _mm_setzero_pd (),
2034 (__mmask8) -1);
2035}
2036
2037extern __inline __m128d
2038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2039_mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
2040 const int __imm)
2041{
2042 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2043 __imm,
2044 (__v2df) __W,
2045 (__mmask8)
2046 __U);
2047}
2048
2049extern __inline __m128d
2050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2051_mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
2052 const int __imm)
2053{
2054 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
2055 __imm,
2056 (__v2df)
2057 _mm_setzero_pd (),
2058 (__mmask8)
2059 __U);
2060}
2061
2062extern __inline __m256i
2063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064_mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
2065{
2066 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2067 __imm,
2068 (__v8si)
2069 _mm256_setzero_si256 (),
2070 (__mmask8) -1);
2071}
2072
2073extern __inline __m256i
2074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075_mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
2076 const int __imm)
2077{
2078 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2079 __imm,
2080 (__v8si) __W,
2081 (__mmask8) __U);
2082}
2083
2084extern __inline __m256i
2085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2086_mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
2087 const int __imm)
2088{
2089 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
2090 __imm,
2091 (__v8si)
2092 _mm256_setzero_si256 (),
2093 (__mmask8) __U);
2094}
2095
2096extern __inline __m128i
2097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098_mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
2099{
2100 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2101 __imm,
2102 (__v2di)
2103 _mm_setzero_si128 (),
2104 (__mmask8) -1);
2105}
2106
2107extern __inline __m128i
2108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109_mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
2110 const int __imm)
2111{
2112 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2113 __imm,
2114 (__v2di) __W,
2115 (__mmask8)
2116 __U);
2117}
2118
2119extern __inline __m128i
2120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121_mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
2122 const int __imm)
2123{
2124 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
2125 __imm,
2126 (__v2di)
2127 _mm_setzero_si128 (),
2128 (__mmask8)
2129 __U);
2130}
2131
2132extern __inline __m512d
2133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2134_mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
2135 const int __R)
2136{
2137 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2138 (__v8df) __B, __C,
2139 (__v8df)
2140 _mm512_setzero_pd (),
2141 (__mmask8) -1,
2142 __R);
2143}
2144
2145extern __inline __m512d
2146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2147_mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
2148 __m512d __A, __m512d __B, int __C,
2149 const int __R)
2150{
2151 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2152 (__v8df) __B, __C,
2153 (__v8df) __W,
2154 (__mmask8) __U,
2155 __R);
2156}
2157
2158extern __inline __m512d
2159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160_mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2161 int __C, const int __R)
2162{
2163 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
2164 (__v8df) __B, __C,
2165 (__v8df)
2166 _mm512_setzero_pd (),
2167 (__mmask8) __U,
2168 __R);
2169}
2170
2171extern __inline __m512
2172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173_mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
2174{
2175 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2176 (__v16sf) __B, __C,
2177 (__v16sf)
2178 _mm512_setzero_ps (),
2179 (__mmask16) -1,
2180 __R);
2181}
2182
2183extern __inline __m512
2184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185_mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
2186 __m512 __A, __m512 __B, int __C,
2187 const int __R)
2188{
2189 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2190 (__v16sf) __B, __C,
2191 (__v16sf) __W,
2192 (__mmask16) __U,
2193 __R);
2194}
2195
2196extern __inline __m512
2197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198_mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2199 int __C, const int __R)
2200{
2201 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
2202 (__v16sf) __B, __C,
2203 (__v16sf)
2204 _mm512_setzero_ps (),
2205 (__mmask16) __U,
2206 __R);
2207}
2208
2209extern __inline __m512i
2210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2211_mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
2212{
2213 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2214 (__v8si) __B,
2215 __imm,
2216 (__v16si)
2217 _mm512_setzero_si512 (),
2218 (__mmask16) -1);
2219}
2220
2221extern __inline __m512i
2222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2223_mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
2224 __m256i __B, const int __imm)
2225{
2226 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2227 (__v8si) __B,
2228 __imm,
2229 (__v16si) __W,
2230 (__mmask16) __U);
2231}
2232
2233extern __inline __m512i
2234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235_mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
2236 const int __imm)
2237{
2238 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
2239 (__v8si) __B,
2240 __imm,
2241 (__v16si)
2242 _mm512_setzero_si512 (),
2243 (__mmask16) __U);
2244}
2245
2246extern __inline __m512
2247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2248_mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
2249{
2250 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2251 (__v8sf) __B,
2252 __imm,
2253 (__v16sf)
2254 _mm512_setzero_ps (),
2255 (__mmask16) -1);
2256}
2257
2258extern __inline __m512
2259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260_mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
2261 __m256 __B, const int __imm)
2262{
2263 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2264 (__v8sf) __B,
2265 __imm,
2266 (__v16sf) __W,
2267 (__mmask16) __U);
2268}
2269
2270extern __inline __m512
2271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2272_mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
2273 const int __imm)
2274{
2275 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
2276 (__v8sf) __B,
2277 __imm,
2278 (__v16sf)
2279 _mm512_setzero_ps (),
2280 (__mmask16) __U);
2281}
2282
2283extern __inline __m512i
2284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285_mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
2286{
2287 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2288 (__v2di) __B,
2289 __imm,
2290 (__v8di)
2291 _mm512_setzero_si512 (),
2292 (__mmask8) -1);
2293}
2294
2295extern __inline __m512i
2296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297_mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
2298 __m128i __B, const int __imm)
2299{
2300 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2301 (__v2di) __B,
2302 __imm,
2303 (__v8di) __W,
2304 (__mmask8)
2305 __U);
2306}
2307
2308extern __inline __m512i
2309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2310_mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
2311 const int __imm)
2312{
2313 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
2314 (__v2di) __B,
2315 __imm,
2316 (__v8di)
2317 _mm512_setzero_si512 (),
2318 (__mmask8)
2319 __U);
2320}
2321
2322extern __inline __m512d
2323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2324_mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
2325{
2326 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2327 (__v2df) __B,
2328 __imm,
2329 (__v8df)
2330 _mm512_setzero_pd (),
2331 (__mmask8) -1);
2332}
2333
2334extern __inline __m512d
2335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2336_mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
2337 __m128d __B, const int __imm)
2338{
2339 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2340 (__v2df) __B,
2341 __imm,
2342 (__v8df) __W,
2343 (__mmask8)
2344 __U);
2345}
2346
2347extern __inline __m512d
2348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2349_mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
2350 const int __imm)
2351{
2352 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
2353 (__v2df) __B,
2354 __imm,
2355 (__v8df)
2356 _mm512_setzero_pd (),
2357 (__mmask8)
2358 __U);
2359}
2360
2361extern __inline __mmask8
2362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363_mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
2364 const int __imm)
2365{
2366 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2367 __imm, __U);
2368}
2369
2370extern __inline __mmask8
2371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2372_mm512_fpclass_pd_mask (__m512d __A, const int __imm)
2373{
2374 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
2375 __imm,
2376 (__mmask8) -1);
2377}
2378
2379extern __inline __mmask16
2380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381_mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
2382 const int __imm)
2383{
2384 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2385 __imm, __U);
2386}
2387
2388extern __inline __mmask16
2389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390_mm512_fpclass_ps_mask (__m512 __A, const int __imm)
2391{
2392 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
2393 __imm,
2394 (__mmask16) -1);
2395}
2396
2397#else
2398#define _kshiftli_mask8(X, Y) \
2399 ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y)))
2400
2401#define _kshiftri_mask8(X, Y) \
2402 ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y)))
2403
2404#define _mm_range_sd(A, B, C) \
2405 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2406 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2407 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2408
2409#define _mm_mask_range_sd(W, U, A, B, C) \
2410 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2411 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2412 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2413
2414#define _mm_maskz_range_sd(U, A, B, C) \
2415 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2416 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2417 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2418
2419#define _mm_range_ss(A, B, C) \
2420 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2421 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2422 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION))
2423
2424#define _mm_mask_range_ss(W, U, A, B, C) \
2425 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2426 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2427 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2428
2429#define _mm_maskz_range_ss(U, A, B, C) \
2430 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2431 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2432 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2433
2434#define _mm_range_round_sd(A, B, C, R) \
2435 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2436 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2437 (__mmask8) -1, (R)))
2438
2439#define _mm_mask_range_round_sd(W, U, A, B, C, R) \
2440 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2441 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2442 (__mmask8)(U), (R)))
2443
2444#define _mm_maskz_range_round_sd(U, A, B, C, R) \
2445 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \
2446 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2447 (__mmask8)(U), (R)))
2448
2449#define _mm_range_round_ss(A, B, C, R) \
2450 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2451 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2452 (__mmask8) -1, (R)))
2453
2454#define _mm_mask_range_round_ss(W, U, A, B, C, R) \
2455 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2456 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2457 (__mmask8)(U), (R)))
2458
2459#define _mm_maskz_range_round_ss(U, A, B, C, R) \
2460 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \
2461 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2462 (__mmask8)(U), (R)))
2463
2464#define _mm512_cvtt_roundpd_epi64(A, B) \
2465 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \
2466 _mm512_setzero_si512 (), \
2467 -1, (B)))
2468
2469#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
2470 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2471
2472#define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
2473 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2474
2475#define _mm512_cvtt_roundpd_epu64(A, B) \
2476 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2477
2478#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
2479 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2480
2481#define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
2482 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2483
2484#define _mm512_cvtt_roundps_epi64(A, B) \
2485 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2486
2487#define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
2488 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2489
2490#define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
2491 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2492
2493#define _mm512_cvtt_roundps_epu64(A, B) \
2494 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2495
2496#define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
2497 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2498
2499#define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
2500 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2501
2502#define _mm512_cvt_roundpd_epi64(A, B) \
2503 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2504
2505#define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
2506 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
2507
2508#define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
2509 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2510
2511#define _mm512_cvt_roundpd_epu64(A, B) \
2512 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2513
2514#define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
2515 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2516
2517#define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
2518 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2519
2520#define _mm512_cvt_roundps_epi64(A, B) \
2521 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2522
2523#define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
2524 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
2525
2526#define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
2527 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2528
2529#define _mm512_cvt_roundps_epu64(A, B) \
2530 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
2531
2532#define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
2533 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
2534
2535#define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
2536 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
2537
2538#define _mm512_cvt_roundepi64_ps(A, B) \
2539 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2540
2541#define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
2542 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2543
2544#define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
2545 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2546
2547#define _mm512_cvt_roundepu64_ps(A, B) \
2548 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
2549
2550#define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
2551 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
2552
2553#define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
2554 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
2555
2556#define _mm512_cvt_roundepi64_pd(A, B) \
2557 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2558
2559#define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
2560 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2561
2562#define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
2563 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2564
2565#define _mm512_cvt_roundepu64_pd(A, B) \
2566 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
2567
2568#define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
2569 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
2570
2571#define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
2572 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
2573
2574#define _mm512_reduce_pd(A, B) \
2575 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2576 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
2577
2578#define _mm512_reduce_round_pd(A, B, R) \
2579 ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2580 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2581
2582#define _mm512_mask_reduce_pd(W, U, A, B) \
2583 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2584 (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
2585
2586#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
2587 ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2588 (int)(B), (__v8df)(__m512d)(W), (U), (R)))
2589
2590#define _mm512_maskz_reduce_pd(U, A, B) \
2591 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
2592 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
2593
2594#define _mm512_maskz_reduce_round_pd(U, A, B, R) \
2595 ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
2596 (int)(B), (__v8df)_mm512_setzero_pd (), (U), (R)))
2597
2598#define _mm512_reduce_ps(A, B) \
2599 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2600 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
2601
2602#define _mm512_reduce_round_ps(A, B, R) \
2603 ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2604 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2605
2606#define _mm512_mask_reduce_ps(W, U, A, B) \
2607 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2608 (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
2609
2610#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
2611 ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2612 (int)(B), (__v16sf)(__m512)(W), (U), (R)))
2613
2614#define _mm512_maskz_reduce_ps(U, A, B) \
2615 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
2616 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
2617
2618#define _mm512_maskz_reduce_round_ps(U, A, B, R) \
2619 ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
2620 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2621
2622#define _mm512_extractf32x8_ps(X, C) \
2623 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2624 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
2625
2626#define _mm512_mask_extractf32x8_ps(W, U, X, C) \
2627 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2628 (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
2629
2630#define _mm512_maskz_extractf32x8_ps(U, X, C) \
2631 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
2632 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
2633
2634#define _mm512_extractf64x2_pd(X, C) \
2635 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2636 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
2637
2638#define _mm512_mask_extractf64x2_pd(W, U, X, C) \
2639 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2640 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
2641
2642#define _mm512_maskz_extractf64x2_pd(U, X, C) \
2643 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
2644 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
2645
2646#define _mm512_extracti32x8_epi32(X, C) \
2647 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2648 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
2649
2650#define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
2651 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2652 (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
2653
2654#define _mm512_maskz_extracti32x8_epi32(U, X, C) \
2655 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
2656 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
2657
2658#define _mm512_extracti64x2_epi64(X, C) \
2659 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2660 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
2661
2662#define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
2663 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2664 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
2665
2666#define _mm512_maskz_extracti64x2_epi64(U, X, C) \
2667 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
2668 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
2669
2670#define _mm512_range_pd(A, B, C) \
2671 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2672 (__v8df)(__m512d)(B), (int)(C), \
2673 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
2674
2675#define _mm512_mask_range_pd(W, U, A, B, C) \
2676 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2677 (__v8df)(__m512d)(B), (int)(C), \
2678 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2679
2680#define _mm512_maskz_range_pd(U, A, B, C) \
2681 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2682 (__v8df)(__m512d)(B), (int)(C), \
2683 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
2684
2685#define _mm512_range_ps(A, B, C) \
2686 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2687 (__v16sf)(__m512)(B), (int)(C), \
2688 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
2689
2690#define _mm512_mask_range_ps(W, U, A, B, C) \
2691 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2692 (__v16sf)(__m512)(B), (int)(C), \
2693 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2694
2695#define _mm512_maskz_range_ps(U, A, B, C) \
2696 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2697 (__v16sf)(__m512)(B), (int)(C), \
2698 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
2699
2700#define _mm512_range_round_pd(A, B, C, R) \
2701 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2702 (__v8df)(__m512d)(B), (int)(C), \
2703 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
2704
2705#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
2706 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2707 (__v8df)(__m512d)(B), (int)(C), \
2708 (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
2709
2710#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
2711 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
2712 (__v8df)(__m512d)(B), (int)(C), \
2713 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
2714
2715#define _mm512_range_round_ps(A, B, C, R) \
2716 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2717 (__v16sf)(__m512)(B), (int)(C), \
2718 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
2719
2720#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
2721 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2722 (__v16sf)(__m512)(B), (int)(C), \
2723 (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
2724
2725#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
2726 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
2727 (__v16sf)(__m512)(B), (int)(C), \
2728 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
2729
2730#define _mm512_insertf64x2(X, Y, C) \
2731 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2732 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
2733 (__mmask8)-1))
2734
2735#define _mm512_mask_insertf64x2(W, U, X, Y, C) \
2736 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2737 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
2738 (__mmask8) (U)))
2739
2740#define _mm512_maskz_insertf64x2(U, X, Y, C) \
2741 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
2742 (__v2df)(__m128d) (Y), (int) (C), \
2743 (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
2744
2745#define _mm512_inserti64x2(X, Y, C) \
2746 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2747 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
2748
2749#define _mm512_mask_inserti64x2(W, U, X, Y, C) \
2750 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2751 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
2752 (__mmask8) (U)))
2753
2754#define _mm512_maskz_inserti64x2(U, X, Y, C) \
2755 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
2756 (__v2di)(__m128i) (Y), (int) (C), \
2757 (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
2758
2759#define _mm512_insertf32x8(X, Y, C) \
2760 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2761 (__v8sf)(__m256) (Y), (int) (C),\
2762 (__v16sf)(__m512)_mm512_setzero_ps (),\
2763 (__mmask16)-1))
2764
2765#define _mm512_mask_insertf32x8(W, U, X, Y, C) \
2766 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2767 (__v8sf)(__m256) (Y), (int) (C),\
2768 (__v16sf)(__m512)(W),\
2769 (__mmask16)(U)))
2770
2771#define _mm512_maskz_insertf32x8(U, X, Y, C) \
2772 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
2773 (__v8sf)(__m256) (Y), (int) (C),\
2774 (__v16sf)(__m512)_mm512_setzero_ps (),\
2775 (__mmask16)(U)))
2776
2777#define _mm512_inserti32x8(X, Y, C) \
2778 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2779 (__v8si)(__m256i) (Y), (int) (C),\
2780 (__v16si)(__m512i)_mm512_setzero_si512 (),\
2781 (__mmask16)-1))
2782
2783#define _mm512_mask_inserti32x8(W, U, X, Y, C) \
2784 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2785 (__v8si)(__m256i) (Y), (int) (C),\
2786 (__v16si)(__m512i)(W),\
2787 (__mmask16)(U)))
2788
2789#define _mm512_maskz_inserti32x8(U, X, Y, C) \
2790 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
2791 (__v8si)(__m256i) (Y), (int) (C),\
2792 (__v16si)(__m512i)_mm512_setzero_si512 (),\
2793 (__mmask16)(U)))
2794
2795#define _mm_fpclass_ss_mask(X, C) \
2796 ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \
2797 (int) (C), (__mmask8) (-1))) \
2798
2799#define _mm_fpclass_sd_mask(X, C) \
2800 ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \
2801 (int) (C), (__mmask8) (-1))) \
2802
2803#define _mm_mask_fpclass_ss_mask(X, C, U) \
2804 ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \
2805 (int) (C), (__mmask8) (U)))
2806
2807#define _mm_mask_fpclass_sd_mask(X, C, U) \
2808 ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \
2809 (int) (C), (__mmask8) (U)))
2810
2811#define _mm512_mask_fpclass_pd_mask(u, X, C) \
2812 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2813 (int) (C), (__mmask8)(u)))
2814
2815#define _mm512_mask_fpclass_ps_mask(u, x, c) \
2816 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2817 (int) (c),(__mmask8)(u)))
2818
2819#define _mm512_fpclass_pd_mask(X, C) \
2820 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
2821 (int) (C), (__mmask8)-1))
2822
2823#define _mm512_fpclass_ps_mask(x, c) \
2824 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
2825 (int) (c),(__mmask8)-1))
2826
2827#define _mm_reduce_sd(A, B, C) \
2828 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2829 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2830 (__mmask8)-1))
2831
2832#define _mm_mask_reduce_sd(W, U, A, B, C) \
2833 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2834 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U)))
2835
2836#define _mm_maskz_reduce_sd(U, A, B, C) \
2837 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \
2838 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2839 (__mmask8)(U)))
2840
2841#define _mm_reduce_round_sd(A, B, C, R) \
2842 ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A), \
2843 (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R)))
2844
2845#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
2846 ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2847 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \
2848 (__mmask8)(U), (int)(R)))
2849
2850#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
2851 ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
2852 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \
2853 (__mmask8)(U), (int)(R)))
2854
2855#define _mm_reduce_ss(A, B, C) \
2856 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2857 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2858 (__mmask8)-1))
2859
2860#define _mm_mask_reduce_ss(W, U, A, B, C) \
2861 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2862 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U)))
2863
2864#define _mm_maskz_reduce_ss(U, A, B, C) \
2865 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \
2866 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2867 (__mmask8)(U)))
2868
2869#define _mm_reduce_round_ss(A, B, C, R) \
2870 ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A), \
2871 (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R)))
2872
2873#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
2874 ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \
2875 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \
2876 (__mmask8)(U), (int)(R)))
2877
2878#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
2879 ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A), \
2880 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \
2881 (__mmask8)(U), (int)(R)))
2882
2883
2884#endif
2885
2886#ifdef __DISABLE_AVX512DQ__
2887#undef __DISABLE_AVX512DQ__
2888#pragma GCC pop_options
2889#endif /* __DISABLE_AVX512DQ__ */
2890
2891#endif /* _AVX512DQINTRIN_H_INCLUDED */
Note: See TracBrowser for help on using the repository browser.