source: Daodan/MSYS2/mingw32/lib/gcc/i686-w64-mingw32/11.2.0/include/emmintrin.h

Last change on this file was 1166, checked in by rossy, 3 years ago

Daodan: Replace MinGW build env with an up-to-date MSYS2 env

File size: 51.0 KB
Line 
1/* Copyright (C) 2003-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24/* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.0. */
26
27#ifndef _EMMINTRIN_H_INCLUDED
28#define _EMMINTRIN_H_INCLUDED
29
30/* We need definitions from the SSE header files*/
31#include <xmmintrin.h>
32
33#ifndef __SSE2__
34#pragma GCC push_options
35#pragma GCC target("sse2")
36#define __DISABLE_SSE2__
37#endif /* __SSE2__ */
38
39/* SSE2 */
40typedef double __v2df __attribute__ ((__vector_size__ (16)));
41typedef long long __v2di __attribute__ ((__vector_size__ (16)));
42typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
43typedef int __v4si __attribute__ ((__vector_size__ (16)));
44typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
45typedef short __v8hi __attribute__ ((__vector_size__ (16)));
46typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
47typedef char __v16qi __attribute__ ((__vector_size__ (16)));
48typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
49typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
50
51/* The Intel API is flexible enough that we must allow aliasing with other
52 vector types, and their scalar components. */
53typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
54typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
55
56/* Unaligned version of the same types. */
57typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
58typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
59
60/* Create a selector for use with the SHUFPD instruction. */
61#define _MM_SHUFFLE2(fp1,fp0) \
62 (((fp1) << 1) | (fp0))
63
64/* Create a vector with element 0 as F and the rest zero. */
65extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
66_mm_set_sd (double __F)
67{
68 return __extension__ (__m128d){ __F, 0.0 };
69}
70
71/* Create a vector with both elements equal to F. */
72extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
73_mm_set1_pd (double __F)
74{
75 return __extension__ (__m128d){ __F, __F };
76}
77
78extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
79_mm_set_pd1 (double __F)
80{
81 return _mm_set1_pd (__F);
82}
83
84/* Create a vector with the lower value X and upper value W. */
85extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
86_mm_set_pd (double __W, double __X)
87{
88 return __extension__ (__m128d){ __X, __W };
89}
90
91/* Create a vector with the lower value W and upper value X. */
92extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
93_mm_setr_pd (double __W, double __X)
94{
95 return __extension__ (__m128d){ __W, __X };
96}
97
98/* Create an undefined vector. */
99extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
100_mm_undefined_pd (void)
101{
102 __m128d __Y = __Y;
103 return __Y;
104}
105
106/* Create a vector of zeros. */
107extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
108_mm_setzero_pd (void)
109{
110 return __extension__ (__m128d){ 0.0, 0.0 };
111}
112
113/* Sets the low DPFP value of A from the low value of B. */
114extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
115_mm_move_sd (__m128d __A, __m128d __B)
116{
117 return __extension__ (__m128d) __builtin_shuffle ((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
118}
119
120/* Load two DPFP values from P. The address must be 16-byte aligned. */
121extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
122_mm_load_pd (double const *__P)
123{
124 return *(__m128d *)__P;
125}
126
127/* Load two DPFP values from P. The address need not be 16-byte aligned. */
128extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
129_mm_loadu_pd (double const *__P)
130{
131 return *(__m128d_u *)__P;
132}
133
134/* Create a vector with all two elements equal to *P. */
135extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
136_mm_load1_pd (double const *__P)
137{
138 return _mm_set1_pd (*__P);
139}
140
141/* Create a vector with element 0 as *P and the rest zero. */
142extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
143_mm_load_sd (double const *__P)
144{
145 return _mm_set_sd (*__P);
146}
147
148extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
149_mm_load_pd1 (double const *__P)
150{
151 return _mm_load1_pd (__P);
152}
153
154/* Load two DPFP values in reverse order. The address must be aligned. */
155extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
156_mm_loadr_pd (double const *__P)
157{
158 __m128d __tmp = _mm_load_pd (__P);
159 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
160}
161
162/* Store two DPFP values. The address must be 16-byte aligned. */
163extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
164_mm_store_pd (double *__P, __m128d __A)
165{
166 *(__m128d *)__P = __A;
167}
168
169/* Store two DPFP values. The address need not be 16-byte aligned. */
170extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
171_mm_storeu_pd (double *__P, __m128d __A)
172{
173 *(__m128d_u *)__P = __A;
174}
175
176/* Stores the lower DPFP value. */
177extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
178_mm_store_sd (double *__P, __m128d __A)
179{
180 *__P = ((__v2df)__A)[0];
181}
182
183extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
184_mm_cvtsd_f64 (__m128d __A)
185{
186 return ((__v2df)__A)[0];
187}
188
189extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
190_mm_storel_pd (double *__P, __m128d __A)
191{
192 _mm_store_sd (__P, __A);
193}
194
195/* Stores the upper DPFP value. */
196extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
197_mm_storeh_pd (double *__P, __m128d __A)
198{
199 *__P = ((__v2df)__A)[1];
200}
201
202/* Store the lower DPFP value across two words.
203 The address must be 16-byte aligned. */
204extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
205_mm_store1_pd (double *__P, __m128d __A)
206{
207 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
208}
209
210extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
211_mm_store_pd1 (double *__P, __m128d __A)
212{
213 _mm_store1_pd (__P, __A);
214}
215
216/* Store two DPFP values in reverse order. The address must be aligned. */
217extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
218_mm_storer_pd (double *__P, __m128d __A)
219{
220 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
221}
222
223extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
224_mm_cvtsi128_si32 (__m128i __A)
225{
226 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
227}
228
229#ifdef __x86_64__
230/* Intel intrinsic. */
231extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
232_mm_cvtsi128_si64 (__m128i __A)
233{
234 return ((__v2di)__A)[0];
235}
236
237/* Microsoft intrinsic. */
238extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
239_mm_cvtsi128_si64x (__m128i __A)
240{
241 return ((__v2di)__A)[0];
242}
243#endif
244
245extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
246_mm_add_pd (__m128d __A, __m128d __B)
247{
248 return (__m128d) ((__v2df)__A + (__v2df)__B);
249}
250
251extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
252_mm_add_sd (__m128d __A, __m128d __B)
253{
254 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
255}
256
257extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
258_mm_sub_pd (__m128d __A, __m128d __B)
259{
260 return (__m128d) ((__v2df)__A - (__v2df)__B);
261}
262
263extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
264_mm_sub_sd (__m128d __A, __m128d __B)
265{
266 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
267}
268
269extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
270_mm_mul_pd (__m128d __A, __m128d __B)
271{
272 return (__m128d) ((__v2df)__A * (__v2df)__B);
273}
274
275extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
276_mm_mul_sd (__m128d __A, __m128d __B)
277{
278 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
279}
280
281extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
282_mm_div_pd (__m128d __A, __m128d __B)
283{
284 return (__m128d) ((__v2df)__A / (__v2df)__B);
285}
286
287extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
288_mm_div_sd (__m128d __A, __m128d __B)
289{
290 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
291}
292
293extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
294_mm_sqrt_pd (__m128d __A)
295{
296 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
297}
298
299/* Return pair {sqrt (B[0]), A[1]}. */
300extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
301_mm_sqrt_sd (__m128d __A, __m128d __B)
302{
303 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
304 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
305}
306
307extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308_mm_min_pd (__m128d __A, __m128d __B)
309{
310 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
311}
312
313extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
314_mm_min_sd (__m128d __A, __m128d __B)
315{
316 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
317}
318
319extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
320_mm_max_pd (__m128d __A, __m128d __B)
321{
322 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
323}
324
325extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
326_mm_max_sd (__m128d __A, __m128d __B)
327{
328 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
329}
330
331extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
332_mm_and_pd (__m128d __A, __m128d __B)
333{
334 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
335}
336
337extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
338_mm_andnot_pd (__m128d __A, __m128d __B)
339{
340 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
341}
342
343extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
344_mm_or_pd (__m128d __A, __m128d __B)
345{
346 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
347}
348
349extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
350_mm_xor_pd (__m128d __A, __m128d __B)
351{
352 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
353}
354
355extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
356_mm_cmpeq_pd (__m128d __A, __m128d __B)
357{
358 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
359}
360
361extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
362_mm_cmplt_pd (__m128d __A, __m128d __B)
363{
364 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
365}
366
367extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
368_mm_cmple_pd (__m128d __A, __m128d __B)
369{
370 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
371}
372
373extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
374_mm_cmpgt_pd (__m128d __A, __m128d __B)
375{
376 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
377}
378
379extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
380_mm_cmpge_pd (__m128d __A, __m128d __B)
381{
382 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
383}
384
385extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
386_mm_cmpneq_pd (__m128d __A, __m128d __B)
387{
388 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
389}
390
391extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
392_mm_cmpnlt_pd (__m128d __A, __m128d __B)
393{
394 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
395}
396
397extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
398_mm_cmpnle_pd (__m128d __A, __m128d __B)
399{
400 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
401}
402
403extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404_mm_cmpngt_pd (__m128d __A, __m128d __B)
405{
406 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
407}
408
409extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
410_mm_cmpnge_pd (__m128d __A, __m128d __B)
411{
412 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
413}
414
415extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
416_mm_cmpord_pd (__m128d __A, __m128d __B)
417{
418 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
419}
420
421extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
422_mm_cmpunord_pd (__m128d __A, __m128d __B)
423{
424 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
425}
426
427extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428_mm_cmpeq_sd (__m128d __A, __m128d __B)
429{
430 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
431}
432
433extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434_mm_cmplt_sd (__m128d __A, __m128d __B)
435{
436 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
437}
438
439extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
440_mm_cmple_sd (__m128d __A, __m128d __B)
441{
442 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
443}
444
445extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
446_mm_cmpgt_sd (__m128d __A, __m128d __B)
447{
448 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
449 (__v2df)
450 __builtin_ia32_cmpltsd ((__v2df) __B,
451 (__v2df)
452 __A));
453}
454
455extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
456_mm_cmpge_sd (__m128d __A, __m128d __B)
457{
458 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
459 (__v2df)
460 __builtin_ia32_cmplesd ((__v2df) __B,
461 (__v2df)
462 __A));
463}
464
465extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
466_mm_cmpneq_sd (__m128d __A, __m128d __B)
467{
468 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
469}
470
471extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
472_mm_cmpnlt_sd (__m128d __A, __m128d __B)
473{
474 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
475}
476
477extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
478_mm_cmpnle_sd (__m128d __A, __m128d __B)
479{
480 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
481}
482
483extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
484_mm_cmpngt_sd (__m128d __A, __m128d __B)
485{
486 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
487 (__v2df)
488 __builtin_ia32_cmpnltsd ((__v2df) __B,
489 (__v2df)
490 __A));
491}
492
493extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
494_mm_cmpnge_sd (__m128d __A, __m128d __B)
495{
496 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
497 (__v2df)
498 __builtin_ia32_cmpnlesd ((__v2df) __B,
499 (__v2df)
500 __A));
501}
502
503extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
504_mm_cmpord_sd (__m128d __A, __m128d __B)
505{
506 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
507}
508
509extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
510_mm_cmpunord_sd (__m128d __A, __m128d __B)
511{
512 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
513}
514
515extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
516_mm_comieq_sd (__m128d __A, __m128d __B)
517{
518 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
519}
520
521extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522_mm_comilt_sd (__m128d __A, __m128d __B)
523{
524 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
525}
526
527extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
528_mm_comile_sd (__m128d __A, __m128d __B)
529{
530 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
531}
532
533extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534_mm_comigt_sd (__m128d __A, __m128d __B)
535{
536 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
537}
538
539extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540_mm_comige_sd (__m128d __A, __m128d __B)
541{
542 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
543}
544
545extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
546_mm_comineq_sd (__m128d __A, __m128d __B)
547{
548 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
549}
550
551extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
552_mm_ucomieq_sd (__m128d __A, __m128d __B)
553{
554 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
555}
556
557extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
558_mm_ucomilt_sd (__m128d __A, __m128d __B)
559{
560 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
561}
562
563extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
564_mm_ucomile_sd (__m128d __A, __m128d __B)
565{
566 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
567}
568
569extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
570_mm_ucomigt_sd (__m128d __A, __m128d __B)
571{
572 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
573}
574
575extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
576_mm_ucomige_sd (__m128d __A, __m128d __B)
577{
578 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
579}
580
581extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
582_mm_ucomineq_sd (__m128d __A, __m128d __B)
583{
584 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
585}
586
587/* Create a vector of Qi, where i is the element number. */
588
589extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
590_mm_set_epi64x (long long __q1, long long __q0)
591{
592 return __extension__ (__m128i)(__v2di){ __q0, __q1 };
593}
594
595extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
596_mm_set_epi64 (__m64 __q1, __m64 __q0)
597{
598 return _mm_set_epi64x ((long long)__q1, (long long)__q0);
599}
600
601extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
602_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
603{
604 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
605}
606
607extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
608_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
609 short __q3, short __q2, short __q1, short __q0)
610{
611 return __extension__ (__m128i)(__v8hi){
612 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
613}
614
615extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
616_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
617 char __q11, char __q10, char __q09, char __q08,
618 char __q07, char __q06, char __q05, char __q04,
619 char __q03, char __q02, char __q01, char __q00)
620{
621 return __extension__ (__m128i)(__v16qi){
622 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
623 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
624 };
625}
626
627/* Set all of the elements of the vector to A. */
628
629extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
630_mm_set1_epi64x (long long __A)
631{
632 return _mm_set_epi64x (__A, __A);
633}
634
635extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636_mm_set1_epi64 (__m64 __A)
637{
638 return _mm_set_epi64 (__A, __A);
639}
640
641extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642_mm_set1_epi32 (int __A)
643{
644 return _mm_set_epi32 (__A, __A, __A, __A);
645}
646
647extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648_mm_set1_epi16 (short __A)
649{
650 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
651}
652
653extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654_mm_set1_epi8 (char __A)
655{
656 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
657 __A, __A, __A, __A, __A, __A, __A, __A);
658}
659
660/* Create a vector of Qi, where i is the element number.
661 The parameter order is reversed from the _mm_set_epi* functions. */
662
663extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
664_mm_setr_epi64 (__m64 __q0, __m64 __q1)
665{
666 return _mm_set_epi64 (__q1, __q0);
667}
668
669extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
670_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
671{
672 return _mm_set_epi32 (__q3, __q2, __q1, __q0);
673}
674
675extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
676_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
677 short __q4, short __q5, short __q6, short __q7)
678{
679 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
680}
681
682extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
683_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
684 char __q04, char __q05, char __q06, char __q07,
685 char __q08, char __q09, char __q10, char __q11,
686 char __q12, char __q13, char __q14, char __q15)
687{
688 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
689 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
690}
691
692/* Create a vector with element 0 as *P and the rest zero. */
693
694extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
695_mm_load_si128 (__m128i const *__P)
696{
697 return *__P;
698}
699
700extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
701_mm_loadu_si128 (__m128i_u const *__P)
702{
703 return *__P;
704}
705
706extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
707_mm_loadl_epi64 (__m128i_u const *__P)
708{
709 return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
710}
711
712extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
713_mm_loadu_si64 (void const *__P)
714{
715 return _mm_loadl_epi64 ((__m128i_u *)__P);
716}
717
718extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
719_mm_loadu_si32 (void const *__P)
720{
721 return _mm_set_epi32 (*(int *)__P, (int)0, (int)0, (int)0);
722}
723
724extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
725_mm_loadu_si16 (void const *__P)
726{
727 return _mm_set_epi16 (*(short *)__P, (short)0, (short)0, (short)0,
728 (short)0, (short)0, (short)0, (short)0);
729}
730
731extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
732_mm_store_si128 (__m128i *__P, __m128i __B)
733{
734 *__P = __B;
735}
736
737extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
738_mm_storeu_si128 (__m128i_u *__P, __m128i __B)
739{
740 *__P = __B;
741}
742
743extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
744_mm_storel_epi64 (__m128i_u *__P, __m128i __B)
745{
746 *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
747}
748
749extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
750_mm_storeu_si64 (void *__P, __m128i __B)
751{
752 _mm_storel_epi64 ((__m128i_u *)__P, __B);
753}
754
755extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
756_mm_storeu_si32 (void *__P, __m128i __B)
757{
758 *(__m32_u *)__P = (__m32) ((__v4si)__B)[0];
759}
760
761extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762_mm_storeu_si16 (void *__P, __m128i __B)
763{
764 *(__m16_u *)__P = (__m16) ((__v8hi)__B)[0];
765}
766
767extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
768_mm_movepi64_pi64 (__m128i __B)
769{
770 return (__m64) ((__v2di)__B)[0];
771}
772
773extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
774_mm_movpi64_epi64 (__m64 __A)
775{
776 return _mm_set_epi64 ((__m64)0LL, __A);
777}
778
779extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
780_mm_move_epi64 (__m128i __A)
781{
782 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
783}
784
785/* Create an undefined vector. */
786extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
787_mm_undefined_si128 (void)
788{
789 __m128i __Y = __Y;
790 return __Y;
791}
792
793/* Create a vector of zeros. */
794extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
795_mm_setzero_si128 (void)
796{
797 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
798}
799
800extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801_mm_cvtepi32_pd (__m128i __A)
802{
803 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
804}
805
806extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807_mm_cvtepi32_ps (__m128i __A)
808{
809 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
810}
811
812extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
813_mm_cvtpd_epi32 (__m128d __A)
814{
815 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
816}
817
818extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
819_mm_cvtpd_pi32 (__m128d __A)
820{
821 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
822}
823
824extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
825_mm_cvtpd_ps (__m128d __A)
826{
827 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
828}
829
830extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
831_mm_cvttpd_epi32 (__m128d __A)
832{
833 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
834}
835
836extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
837_mm_cvttpd_pi32 (__m128d __A)
838{
839 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
840}
841
842extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
843_mm_cvtpi32_pd (__m64 __A)
844{
845 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
846}
847
848extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
849_mm_cvtps_epi32 (__m128 __A)
850{
851 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
852}
853
854extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
855_mm_cvttps_epi32 (__m128 __A)
856{
857 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
858}
859
860extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
861_mm_cvtps_pd (__m128 __A)
862{
863 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
864}
865
866extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
867_mm_cvtsd_si32 (__m128d __A)
868{
869 return __builtin_ia32_cvtsd2si ((__v2df) __A);
870}
871
872#ifdef __x86_64__
873/* Intel intrinsic. */
874extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
875_mm_cvtsd_si64 (__m128d __A)
876{
877 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
878}
879
880/* Microsoft intrinsic. */
881extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
882_mm_cvtsd_si64x (__m128d __A)
883{
884 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
885}
886#endif
887
888extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
889_mm_cvttsd_si32 (__m128d __A)
890{
891 return __builtin_ia32_cvttsd2si ((__v2df) __A);
892}
893
894#ifdef __x86_64__
895/* Intel intrinsic. */
896extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
897_mm_cvttsd_si64 (__m128d __A)
898{
899 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
900}
901
902/* Microsoft intrinsic. */
903extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
904_mm_cvttsd_si64x (__m128d __A)
905{
906 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
907}
908#endif
909
910extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
911_mm_cvtsd_ss (__m128 __A, __m128d __B)
912{
913 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
914}
915
916extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
917_mm_cvtsi32_sd (__m128d __A, int __B)
918{
919 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
920}
921
922#ifdef __x86_64__
923/* Intel intrinsic. */
924extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
925_mm_cvtsi64_sd (__m128d __A, long long __B)
926{
927 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
928}
929
930/* Microsoft intrinsic. */
931extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
932_mm_cvtsi64x_sd (__m128d __A, long long __B)
933{
934 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
935}
936#endif
937
938extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
939_mm_cvtss_sd (__m128d __A, __m128 __B)
940{
941 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
942}
943
944#ifdef __OPTIMIZE__
945extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
946_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
947{
948 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
949}
950#else
951#define _mm_shuffle_pd(A, B, N) \
952 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
953 (__v2df)(__m128d)(B), (int)(N)))
954#endif
955
956extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
957_mm_unpackhi_pd (__m128d __A, __m128d __B)
958{
959 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
960}
961
962extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
963_mm_unpacklo_pd (__m128d __A, __m128d __B)
964{
965 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
966}
967
968extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
969_mm_loadh_pd (__m128d __A, double const *__B)
970{
971 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
972}
973
974extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
975_mm_loadl_pd (__m128d __A, double const *__B)
976{
977 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
978}
979
980extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
981_mm_movemask_pd (__m128d __A)
982{
983 return __builtin_ia32_movmskpd ((__v2df)__A);
984}
985
986extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
987_mm_packs_epi16 (__m128i __A, __m128i __B)
988{
989 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
990}
991
992extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
993_mm_packs_epi32 (__m128i __A, __m128i __B)
994{
995 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
996}
997
998extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
999_mm_packus_epi16 (__m128i __A, __m128i __B)
1000{
1001 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
1002}
1003
1004extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1005_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
1006{
1007 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
1008}
1009
1010extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1011_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
1012{
1013 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
1014}
1015
1016extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1017_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
1018{
1019 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
1020}
1021
1022extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1023_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
1024{
1025 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
1026}
1027
1028extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1029_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
1030{
1031 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
1032}
1033
1034extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1035_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
1036{
1037 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
1038}
1039
1040extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1041_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
1042{
1043 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
1044}
1045
1046extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1047_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
1048{
1049 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
1050}
1051
1052extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1053_mm_add_epi8 (__m128i __A, __m128i __B)
1054{
1055 return (__m128i) ((__v16qu)__A + (__v16qu)__B);
1056}
1057
1058extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1059_mm_add_epi16 (__m128i __A, __m128i __B)
1060{
1061 return (__m128i) ((__v8hu)__A + (__v8hu)__B);
1062}
1063
1064extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1065_mm_add_epi32 (__m128i __A, __m128i __B)
1066{
1067 return (__m128i) ((__v4su)__A + (__v4su)__B);
1068}
1069
1070extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1071_mm_add_epi64 (__m128i __A, __m128i __B)
1072{
1073 return (__m128i) ((__v2du)__A + (__v2du)__B);
1074}
1075
1076extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1077_mm_adds_epi8 (__m128i __A, __m128i __B)
1078{
1079 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1080}
1081
1082extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1083_mm_adds_epi16 (__m128i __A, __m128i __B)
1084{
1085 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1086}
1087
1088extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1089_mm_adds_epu8 (__m128i __A, __m128i __B)
1090{
1091 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1092}
1093
1094extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1095_mm_adds_epu16 (__m128i __A, __m128i __B)
1096{
1097 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1098}
1099
1100extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1101_mm_sub_epi8 (__m128i __A, __m128i __B)
1102{
1103 return (__m128i) ((__v16qu)__A - (__v16qu)__B);
1104}
1105
1106extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1107_mm_sub_epi16 (__m128i __A, __m128i __B)
1108{
1109 return (__m128i) ((__v8hu)__A - (__v8hu)__B);
1110}
1111
1112extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1113_mm_sub_epi32 (__m128i __A, __m128i __B)
1114{
1115 return (__m128i) ((__v4su)__A - (__v4su)__B);
1116}
1117
1118extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1119_mm_sub_epi64 (__m128i __A, __m128i __B)
1120{
1121 return (__m128i) ((__v2du)__A - (__v2du)__B);
1122}
1123
1124extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1125_mm_subs_epi8 (__m128i __A, __m128i __B)
1126{
1127 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1128}
1129
1130extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1131_mm_subs_epi16 (__m128i __A, __m128i __B)
1132{
1133 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1134}
1135
1136extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1137_mm_subs_epu8 (__m128i __A, __m128i __B)
1138{
1139 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1140}
1141
1142extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1143_mm_subs_epu16 (__m128i __A, __m128i __B)
1144{
1145 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1146}
1147
1148extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1149_mm_madd_epi16 (__m128i __A, __m128i __B)
1150{
1151 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1152}
1153
1154extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1155_mm_mulhi_epi16 (__m128i __A, __m128i __B)
1156{
1157 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1158}
1159
1160extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1161_mm_mullo_epi16 (__m128i __A, __m128i __B)
1162{
1163 return (__m128i) ((__v8hu)__A * (__v8hu)__B);
1164}
1165
1166extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1167_mm_mul_su32 (__m64 __A, __m64 __B)
1168{
1169 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1170}
1171
1172extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1173_mm_mul_epu32 (__m128i __A, __m128i __B)
1174{
1175 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1176}
1177
1178extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1179_mm_slli_epi16 (__m128i __A, int __B)
1180{
1181 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1182}
1183
1184extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1185_mm_slli_epi32 (__m128i __A, int __B)
1186{
1187 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1188}
1189
1190extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1191_mm_slli_epi64 (__m128i __A, int __B)
1192{
1193 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1194}
1195
1196extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1197_mm_srai_epi16 (__m128i __A, int __B)
1198{
1199 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1200}
1201
1202extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1203_mm_srai_epi32 (__m128i __A, int __B)
1204{
1205 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1206}
1207
1208#ifdef __OPTIMIZE__
1209extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1210_mm_bsrli_si128 (__m128i __A, const int __N)
1211{
1212 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1213}
1214
1215extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1216_mm_bslli_si128 (__m128i __A, const int __N)
1217{
1218 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1219}
1220
1221extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1222_mm_srli_si128 (__m128i __A, const int __N)
1223{
1224 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1225}
1226
1227extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1228_mm_slli_si128 (__m128i __A, const int __N)
1229{
1230 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1231}
1232#else
1233#define _mm_bsrli_si128(A, N) \
1234 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1235#define _mm_bslli_si128(A, N) \
1236 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1237#define _mm_srli_si128(A, N) \
1238 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1239#define _mm_slli_si128(A, N) \
1240 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1241#endif
1242
1243extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1244_mm_srli_epi16 (__m128i __A, int __B)
1245{
1246 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1247}
1248
1249extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1250_mm_srli_epi32 (__m128i __A, int __B)
1251{
1252 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1253}
1254
1255extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1256_mm_srli_epi64 (__m128i __A, int __B)
1257{
1258 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1259}
1260
1261extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1262_mm_sll_epi16 (__m128i __A, __m128i __B)
1263{
1264 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1265}
1266
1267extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1268_mm_sll_epi32 (__m128i __A, __m128i __B)
1269{
1270 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1271}
1272
1273extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1274_mm_sll_epi64 (__m128i __A, __m128i __B)
1275{
1276 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1277}
1278
1279extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1280_mm_sra_epi16 (__m128i __A, __m128i __B)
1281{
1282 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1283}
1284
1285extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1286_mm_sra_epi32 (__m128i __A, __m128i __B)
1287{
1288 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1289}
1290
1291extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1292_mm_srl_epi16 (__m128i __A, __m128i __B)
1293{
1294 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1295}
1296
1297extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1298_mm_srl_epi32 (__m128i __A, __m128i __B)
1299{
1300 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1301}
1302
1303extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1304_mm_srl_epi64 (__m128i __A, __m128i __B)
1305{
1306 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1307}
1308
1309extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1310_mm_and_si128 (__m128i __A, __m128i __B)
1311{
1312 return (__m128i) ((__v2du)__A & (__v2du)__B);
1313}
1314
1315extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1316_mm_andnot_si128 (__m128i __A, __m128i __B)
1317{
1318 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1319}
1320
1321extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1322_mm_or_si128 (__m128i __A, __m128i __B)
1323{
1324 return (__m128i) ((__v2du)__A | (__v2du)__B);
1325}
1326
1327extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1328_mm_xor_si128 (__m128i __A, __m128i __B)
1329{
1330 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
1331}
1332
1333extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1334_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1335{
1336 return (__m128i) ((__v16qi)__A == (__v16qi)__B);
1337}
1338
1339extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1340_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1341{
1342 return (__m128i) ((__v8hi)__A == (__v8hi)__B);
1343}
1344
1345extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1346_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1347{
1348 return (__m128i) ((__v4si)__A == (__v4si)__B);
1349}
1350
1351extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1352_mm_cmplt_epi8 (__m128i __A, __m128i __B)
1353{
1354 return (__m128i) ((__v16qs)__A < (__v16qs)__B);
1355}
1356
1357extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1358_mm_cmplt_epi16 (__m128i __A, __m128i __B)
1359{
1360 return (__m128i) ((__v8hi)__A < (__v8hi)__B);
1361}
1362
1363extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1364_mm_cmplt_epi32 (__m128i __A, __m128i __B)
1365{
1366 return (__m128i) ((__v4si)__A < (__v4si)__B);
1367}
1368
1369extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1370_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1371{
1372 return (__m128i) ((__v16qs)__A > (__v16qs)__B);
1373}
1374
1375extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1376_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1377{
1378 return (__m128i) ((__v8hi)__A > (__v8hi)__B);
1379}
1380
1381extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1382_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1383{
1384 return (__m128i) ((__v4si)__A > (__v4si)__B);
1385}
1386
1387#ifdef __OPTIMIZE__
1388extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1389_mm_extract_epi16 (__m128i const __A, int const __N)
1390{
1391 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1392}
1393
1394extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1395_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1396{
1397 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1398}
1399#else
1400#define _mm_extract_epi16(A, N) \
1401 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1402#define _mm_insert_epi16(A, D, N) \
1403 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1404 (int)(D), (int)(N)))
1405#endif
1406
1407extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1408_mm_max_epi16 (__m128i __A, __m128i __B)
1409{
1410 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1411}
1412
1413extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1414_mm_max_epu8 (__m128i __A, __m128i __B)
1415{
1416 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1417}
1418
1419extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1420_mm_min_epi16 (__m128i __A, __m128i __B)
1421{
1422 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1423}
1424
1425extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1426_mm_min_epu8 (__m128i __A, __m128i __B)
1427{
1428 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1429}
1430
1431extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1432_mm_movemask_epi8 (__m128i __A)
1433{
1434 return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1435}
1436
1437extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1438_mm_mulhi_epu16 (__m128i __A, __m128i __B)
1439{
1440 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1441}
1442
1443#ifdef __OPTIMIZE__
1444extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1445_mm_shufflehi_epi16 (__m128i __A, const int __mask)
1446{
1447 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1448}
1449
1450extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1451_mm_shufflelo_epi16 (__m128i __A, const int __mask)
1452{
1453 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1454}
1455
1456extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1457_mm_shuffle_epi32 (__m128i __A, const int __mask)
1458{
1459 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1460}
1461#else
1462#define _mm_shufflehi_epi16(A, N) \
1463 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1464#define _mm_shufflelo_epi16(A, N) \
1465 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1466#define _mm_shuffle_epi32(A, N) \
1467 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1468#endif
1469
1470extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1471_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1472{
1473 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1474}
1475
1476extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1477_mm_avg_epu8 (__m128i __A, __m128i __B)
1478{
1479 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1480}
1481
1482extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1483_mm_avg_epu16 (__m128i __A, __m128i __B)
1484{
1485 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1486}
1487
1488extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1489_mm_sad_epu8 (__m128i __A, __m128i __B)
1490{
1491 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1492}
1493
1494extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1495_mm_stream_si32 (int *__A, int __B)
1496{
1497 __builtin_ia32_movnti (__A, __B);
1498}
1499
1500#ifdef __x86_64__
1501extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1502_mm_stream_si64 (long long int *__A, long long int __B)
1503{
1504 __builtin_ia32_movnti64 (__A, __B);
1505}
1506#endif
1507
1508extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1509_mm_stream_si128 (__m128i *__A, __m128i __B)
1510{
1511 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1512}
1513
1514extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1515_mm_stream_pd (double *__A, __m128d __B)
1516{
1517 __builtin_ia32_movntpd (__A, (__v2df)__B);
1518}
1519
1520extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1521_mm_clflush (void const *__A)
1522{
1523 __builtin_ia32_clflush (__A);
1524}
1525
1526extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1527_mm_lfence (void)
1528{
1529 __builtin_ia32_lfence ();
1530}
1531
1532extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1533_mm_mfence (void)
1534{
1535 __builtin_ia32_mfence ();
1536}
1537
1538extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1539_mm_cvtsi32_si128 (int __A)
1540{
1541 return _mm_set_epi32 (0, 0, 0, __A);
1542}
1543
1544#ifdef __x86_64__
1545/* Intel intrinsic. */
1546extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1547_mm_cvtsi64_si128 (long long __A)
1548{
1549 return _mm_set_epi64x (0, __A);
1550}
1551
1552/* Microsoft intrinsic. */
1553extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1554_mm_cvtsi64x_si128 (long long __A)
1555{
1556 return _mm_set_epi64x (0, __A);
1557}
1558#endif
1559
1560/* Casts between various SP, DP, INT vector types. Note that these do no
1561 conversion of values, they just change the type. */
1562extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1563_mm_castpd_ps(__m128d __A)
1564{
1565 return (__m128) __A;
1566}
1567
1568extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1569_mm_castpd_si128(__m128d __A)
1570{
1571 return (__m128i) __A;
1572}
1573
1574extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1575_mm_castps_pd(__m128 __A)
1576{
1577 return (__m128d) __A;
1578}
1579
1580extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1581_mm_castps_si128(__m128 __A)
1582{
1583 return (__m128i) __A;
1584}
1585
1586extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1587_mm_castsi128_ps(__m128i __A)
1588{
1589 return (__m128) __A;
1590}
1591
1592extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1593_mm_castsi128_pd(__m128i __A)
1594{
1595 return (__m128d) __A;
1596}
1597
1598#ifdef __DISABLE_SSE2__
1599#undef __DISABLE_SSE2__
1600#pragma GCC pop_options
1601#endif /* __DISABLE_SSE2__ */
1602
1603#endif /* _EMMINTRIN_H_INCLUDED */
Note: See TracBrowser for help on using the repository browser.