source: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/emmintrin.h@ 1088

Last change on this file since 1088 was 1046, checked in by alloc, 8 years ago

Daodan: Added Windows MinGW and build batch file

File size: 49.6 KB
RevLine 
[1046]1/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24/* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.0. */
26
27#ifndef _EMMINTRIN_H_INCLUDED
28#define _EMMINTRIN_H_INCLUDED
29
30/* We need definitions from the SSE header files*/
31#include <xmmintrin.h>
32
33#ifndef __SSE2__
34#pragma GCC push_options
35#pragma GCC target("sse2")
36#define __DISABLE_SSE2__
37#endif /* __SSE2__ */
38
39/* SSE2 */
40typedef double __v2df __attribute__ ((__vector_size__ (16)));
41typedef long long __v2di __attribute__ ((__vector_size__ (16)));
42typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
43typedef int __v4si __attribute__ ((__vector_size__ (16)));
44typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
45typedef short __v8hi __attribute__ ((__vector_size__ (16)));
46typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
47typedef char __v16qi __attribute__ ((__vector_size__ (16)));
48typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
49
50/* The Intel API is flexible enough that we must allow aliasing with other
51 vector types, and their scalar components. */
52typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
53typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
54
55/* Create a selector for use with the SHUFPD instruction. */
56#define _MM_SHUFFLE2(fp1,fp0) \
57 (((fp1) << 1) | (fp0))
58
59/* Create a vector with element 0 as F and the rest zero. */
60extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
61_mm_set_sd (double __F)
62{
63 return __extension__ (__m128d){ __F, 0.0 };
64}
65
66/* Create a vector with both elements equal to F. */
67extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
68_mm_set1_pd (double __F)
69{
70 return __extension__ (__m128d){ __F, __F };
71}
72
73extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
74_mm_set_pd1 (double __F)
75{
76 return _mm_set1_pd (__F);
77}
78
79/* Create a vector with the lower value X and upper value W. */
80extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
81_mm_set_pd (double __W, double __X)
82{
83 return __extension__ (__m128d){ __X, __W };
84}
85
86/* Create a vector with the lower value W and upper value X. */
87extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
88_mm_setr_pd (double __W, double __X)
89{
90 return __extension__ (__m128d){ __W, __X };
91}
92
93/* Create an undefined vector. */
94extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
95_mm_undefined_pd (void)
96{
97 __m128d __Y = __Y;
98 return __Y;
99}
100
101/* Create a vector of zeros. */
102extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
103_mm_setzero_pd (void)
104{
105 return __extension__ (__m128d){ 0.0, 0.0 };
106}
107
108/* Sets the low DPFP value of A from the low value of B. */
109extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
110_mm_move_sd (__m128d __A, __m128d __B)
111{
112 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
113}
114
115/* Load two DPFP values from P. The address must be 16-byte aligned. */
116extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
117_mm_load_pd (double const *__P)
118{
119 return *(__m128d *)__P;
120}
121
122/* Load two DPFP values from P. The address need not be 16-byte aligned. */
123extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
124_mm_loadu_pd (double const *__P)
125{
126 return __builtin_ia32_loadupd (__P);
127}
128
129/* Create a vector with all two elements equal to *P. */
130extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
131_mm_load1_pd (double const *__P)
132{
133 return _mm_set1_pd (*__P);
134}
135
136/* Create a vector with element 0 as *P and the rest zero. */
137extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
138_mm_load_sd (double const *__P)
139{
140 return _mm_set_sd (*__P);
141}
142
143extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
144_mm_load_pd1 (double const *__P)
145{
146 return _mm_load1_pd (__P);
147}
148
149/* Load two DPFP values in reverse order. The address must be aligned. */
150extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
151_mm_loadr_pd (double const *__P)
152{
153 __m128d __tmp = _mm_load_pd (__P);
154 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
155}
156
157/* Store two DPFP values. The address must be 16-byte aligned. */
158extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159_mm_store_pd (double *__P, __m128d __A)
160{
161 *(__m128d *)__P = __A;
162}
163
164/* Store two DPFP values. The address need not be 16-byte aligned. */
165extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
166_mm_storeu_pd (double *__P, __m128d __A)
167{
168 __builtin_ia32_storeupd (__P, __A);
169}
170
171/* Stores the lower DPFP value. */
172extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
173_mm_store_sd (double *__P, __m128d __A)
174{
175 *__P = ((__v2df)__A)[0];
176}
177
178extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
179_mm_cvtsd_f64 (__m128d __A)
180{
181 return ((__v2df)__A)[0];
182}
183
184extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
185_mm_storel_pd (double *__P, __m128d __A)
186{
187 _mm_store_sd (__P, __A);
188}
189
190/* Stores the upper DPFP value. */
191extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
192_mm_storeh_pd (double *__P, __m128d __A)
193{
194 *__P = ((__v2df)__A)[1];
195}
196
197/* Store the lower DPFP value across two words.
198 The address must be 16-byte aligned. */
199extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
200_mm_store1_pd (double *__P, __m128d __A)
201{
202 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
203}
204
205extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206_mm_store_pd1 (double *__P, __m128d __A)
207{
208 _mm_store1_pd (__P, __A);
209}
210
211/* Store two DPFP values in reverse order. The address must be aligned. */
212extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
213_mm_storer_pd (double *__P, __m128d __A)
214{
215 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
216}
217
218extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
219_mm_cvtsi128_si32 (__m128i __A)
220{
221 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
222}
223
224#ifdef __x86_64__
225/* Intel intrinsic. */
226extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
227_mm_cvtsi128_si64 (__m128i __A)
228{
229 return ((__v2di)__A)[0];
230}
231
232/* Microsoft intrinsic. */
233extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
234_mm_cvtsi128_si64x (__m128i __A)
235{
236 return ((__v2di)__A)[0];
237}
238#endif
239
240extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
241_mm_add_pd (__m128d __A, __m128d __B)
242{
243 return (__m128d) ((__v2df)__A + (__v2df)__B);
244}
245
246extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
247_mm_add_sd (__m128d __A, __m128d __B)
248{
249 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
250}
251
252extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
253_mm_sub_pd (__m128d __A, __m128d __B)
254{
255 return (__m128d) ((__v2df)__A - (__v2df)__B);
256}
257
258extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
259_mm_sub_sd (__m128d __A, __m128d __B)
260{
261 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
262}
263
264extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
265_mm_mul_pd (__m128d __A, __m128d __B)
266{
267 return (__m128d) ((__v2df)__A * (__v2df)__B);
268}
269
270extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
271_mm_mul_sd (__m128d __A, __m128d __B)
272{
273 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
274}
275
276extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
277_mm_div_pd (__m128d __A, __m128d __B)
278{
279 return (__m128d) ((__v2df)__A / (__v2df)__B);
280}
281
282extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
283_mm_div_sd (__m128d __A, __m128d __B)
284{
285 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
286}
287
288extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
289_mm_sqrt_pd (__m128d __A)
290{
291 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
292}
293
294/* Return pair {sqrt (B[0]), A[1]}. */
295extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
296_mm_sqrt_sd (__m128d __A, __m128d __B)
297{
298 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
299 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
300}
301
302extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
303_mm_min_pd (__m128d __A, __m128d __B)
304{
305 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
306}
307
308extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
309_mm_min_sd (__m128d __A, __m128d __B)
310{
311 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
312}
313
314extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
315_mm_max_pd (__m128d __A, __m128d __B)
316{
317 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
318}
319
320extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
321_mm_max_sd (__m128d __A, __m128d __B)
322{
323 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
324}
325
326extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
327_mm_and_pd (__m128d __A, __m128d __B)
328{
329 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
330}
331
332extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
333_mm_andnot_pd (__m128d __A, __m128d __B)
334{
335 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
336}
337
338extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
339_mm_or_pd (__m128d __A, __m128d __B)
340{
341 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
342}
343
344extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
345_mm_xor_pd (__m128d __A, __m128d __B)
346{
347 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
348}
349
350extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
351_mm_cmpeq_pd (__m128d __A, __m128d __B)
352{
353 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
354}
355
356extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
357_mm_cmplt_pd (__m128d __A, __m128d __B)
358{
359 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
360}
361
362extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
363_mm_cmple_pd (__m128d __A, __m128d __B)
364{
365 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
366}
367
368extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
369_mm_cmpgt_pd (__m128d __A, __m128d __B)
370{
371 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
372}
373
374extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
375_mm_cmpge_pd (__m128d __A, __m128d __B)
376{
377 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
378}
379
380extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
381_mm_cmpneq_pd (__m128d __A, __m128d __B)
382{
383 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
384}
385
386extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
387_mm_cmpnlt_pd (__m128d __A, __m128d __B)
388{
389 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
390}
391
392extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
393_mm_cmpnle_pd (__m128d __A, __m128d __B)
394{
395 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
396}
397
398extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
399_mm_cmpngt_pd (__m128d __A, __m128d __B)
400{
401 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
402}
403
404extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
405_mm_cmpnge_pd (__m128d __A, __m128d __B)
406{
407 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
408}
409
410extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
411_mm_cmpord_pd (__m128d __A, __m128d __B)
412{
413 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
414}
415
416extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
417_mm_cmpunord_pd (__m128d __A, __m128d __B)
418{
419 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
420}
421
422extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
423_mm_cmpeq_sd (__m128d __A, __m128d __B)
424{
425 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
426}
427
428extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
429_mm_cmplt_sd (__m128d __A, __m128d __B)
430{
431 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
432}
433
434extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
435_mm_cmple_sd (__m128d __A, __m128d __B)
436{
437 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
438}
439
440extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
441_mm_cmpgt_sd (__m128d __A, __m128d __B)
442{
443 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
444 (__v2df)
445 __builtin_ia32_cmpltsd ((__v2df) __B,
446 (__v2df)
447 __A));
448}
449
450extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
451_mm_cmpge_sd (__m128d __A, __m128d __B)
452{
453 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
454 (__v2df)
455 __builtin_ia32_cmplesd ((__v2df) __B,
456 (__v2df)
457 __A));
458}
459
460extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
461_mm_cmpneq_sd (__m128d __A, __m128d __B)
462{
463 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
464}
465
466extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
467_mm_cmpnlt_sd (__m128d __A, __m128d __B)
468{
469 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
470}
471
472extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
473_mm_cmpnle_sd (__m128d __A, __m128d __B)
474{
475 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
476}
477
478extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
479_mm_cmpngt_sd (__m128d __A, __m128d __B)
480{
481 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
482 (__v2df)
483 __builtin_ia32_cmpnltsd ((__v2df) __B,
484 (__v2df)
485 __A));
486}
487
488extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
489_mm_cmpnge_sd (__m128d __A, __m128d __B)
490{
491 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
492 (__v2df)
493 __builtin_ia32_cmpnlesd ((__v2df) __B,
494 (__v2df)
495 __A));
496}
497
498extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
499_mm_cmpord_sd (__m128d __A, __m128d __B)
500{
501 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
502}
503
504extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
505_mm_cmpunord_sd (__m128d __A, __m128d __B)
506{
507 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
508}
509
510extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
511_mm_comieq_sd (__m128d __A, __m128d __B)
512{
513 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
514}
515
516extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
517_mm_comilt_sd (__m128d __A, __m128d __B)
518{
519 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
520}
521
522extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
523_mm_comile_sd (__m128d __A, __m128d __B)
524{
525 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
526}
527
528extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
529_mm_comigt_sd (__m128d __A, __m128d __B)
530{
531 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
532}
533
534extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
535_mm_comige_sd (__m128d __A, __m128d __B)
536{
537 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
538}
539
540extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
541_mm_comineq_sd (__m128d __A, __m128d __B)
542{
543 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
544}
545
546extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
547_mm_ucomieq_sd (__m128d __A, __m128d __B)
548{
549 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
550}
551
552extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
553_mm_ucomilt_sd (__m128d __A, __m128d __B)
554{
555 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
556}
557
558extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
559_mm_ucomile_sd (__m128d __A, __m128d __B)
560{
561 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
562}
563
564extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
565_mm_ucomigt_sd (__m128d __A, __m128d __B)
566{
567 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
568}
569
570extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
571_mm_ucomige_sd (__m128d __A, __m128d __B)
572{
573 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
574}
575
576extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
577_mm_ucomineq_sd (__m128d __A, __m128d __B)
578{
579 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
580}
581
582/* Create a vector of Qi, where i is the element number. */
583
584extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
585_mm_set_epi64x (long long __q1, long long __q0)
586{
587 return __extension__ (__m128i)(__v2di){ __q0, __q1 };
588}
589
590extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
591_mm_set_epi64 (__m64 __q1, __m64 __q0)
592{
593 return _mm_set_epi64x ((long long)__q1, (long long)__q0);
594}
595
596extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
597_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
598{
599 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
600}
601
602extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
603_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
604 short __q3, short __q2, short __q1, short __q0)
605{
606 return __extension__ (__m128i)(__v8hi){
607 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
608}
609
610extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
611_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
612 char __q11, char __q10, char __q09, char __q08,
613 char __q07, char __q06, char __q05, char __q04,
614 char __q03, char __q02, char __q01, char __q00)
615{
616 return __extension__ (__m128i)(__v16qi){
617 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
618 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
619 };
620}
621
622/* Set all of the elements of the vector to A. */
623
624extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
625_mm_set1_epi64x (long long __A)
626{
627 return _mm_set_epi64x (__A, __A);
628}
629
630extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
631_mm_set1_epi64 (__m64 __A)
632{
633 return _mm_set_epi64 (__A, __A);
634}
635
636extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
637_mm_set1_epi32 (int __A)
638{
639 return _mm_set_epi32 (__A, __A, __A, __A);
640}
641
642extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
643_mm_set1_epi16 (short __A)
644{
645 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
646}
647
648extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
649_mm_set1_epi8 (char __A)
650{
651 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
652 __A, __A, __A, __A, __A, __A, __A, __A);
653}
654
655/* Create a vector of Qi, where i is the element number.
656 The parameter order is reversed from the _mm_set_epi* functions. */
657
658extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
659_mm_setr_epi64 (__m64 __q0, __m64 __q1)
660{
661 return _mm_set_epi64 (__q1, __q0);
662}
663
664extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
665_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
666{
667 return _mm_set_epi32 (__q3, __q2, __q1, __q0);
668}
669
670extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
671_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
672 short __q4, short __q5, short __q6, short __q7)
673{
674 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
675}
676
677extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
678_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
679 char __q04, char __q05, char __q06, char __q07,
680 char __q08, char __q09, char __q10, char __q11,
681 char __q12, char __q13, char __q14, char __q15)
682{
683 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
684 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
685}
686
687/* Create a vector with element 0 as *P and the rest zero. */
688
689extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
690_mm_load_si128 (__m128i const *__P)
691{
692 return *__P;
693}
694
695extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
696_mm_loadu_si128 (__m128i const *__P)
697{
698 return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
699}
700
701extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
702_mm_loadl_epi64 (__m128i const *__P)
703{
704 return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
705}
706
707extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
708_mm_store_si128 (__m128i *__P, __m128i __B)
709{
710 *__P = __B;
711}
712
713extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
714_mm_storeu_si128 (__m128i *__P, __m128i __B)
715{
716 __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
717}
718
719extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
720_mm_storel_epi64 (__m128i *__P, __m128i __B)
721{
722 *(long long *)__P = ((__v2di)__B)[0];
723}
724
725extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
726_mm_movepi64_pi64 (__m128i __B)
727{
728 return (__m64) ((__v2di)__B)[0];
729}
730
731extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
732_mm_movpi64_epi64 (__m64 __A)
733{
734 return _mm_set_epi64 ((__m64)0LL, __A);
735}
736
737extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
738_mm_move_epi64 (__m128i __A)
739{
740 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
741}
742
743/* Create an undefined vector. */
744extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
745_mm_undefined_si128 (void)
746{
747 __m128i __Y = __Y;
748 return __Y;
749}
750
751/* Create a vector of zeros. */
752extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
753_mm_setzero_si128 (void)
754{
755 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
756}
757
758extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
759_mm_cvtepi32_pd (__m128i __A)
760{
761 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
762}
763
764extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
765_mm_cvtepi32_ps (__m128i __A)
766{
767 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
768}
769
770extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
771_mm_cvtpd_epi32 (__m128d __A)
772{
773 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
774}
775
776extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
777_mm_cvtpd_pi32 (__m128d __A)
778{
779 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
780}
781
782extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
783_mm_cvtpd_ps (__m128d __A)
784{
785 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
786}
787
788extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
789_mm_cvttpd_epi32 (__m128d __A)
790{
791 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
792}
793
794extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
795_mm_cvttpd_pi32 (__m128d __A)
796{
797 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
798}
799
800extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801_mm_cvtpi32_pd (__m64 __A)
802{
803 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
804}
805
806extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807_mm_cvtps_epi32 (__m128 __A)
808{
809 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
810}
811
812extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
813_mm_cvttps_epi32 (__m128 __A)
814{
815 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
816}
817
818extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
819_mm_cvtps_pd (__m128 __A)
820{
821 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
822}
823
824extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
825_mm_cvtsd_si32 (__m128d __A)
826{
827 return __builtin_ia32_cvtsd2si ((__v2df) __A);
828}
829
830#ifdef __x86_64__
831/* Intel intrinsic. */
832extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
833_mm_cvtsd_si64 (__m128d __A)
834{
835 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
836}
837
838/* Microsoft intrinsic. */
839extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
840_mm_cvtsd_si64x (__m128d __A)
841{
842 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
843}
844#endif
845
846extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
847_mm_cvttsd_si32 (__m128d __A)
848{
849 return __builtin_ia32_cvttsd2si ((__v2df) __A);
850}
851
852#ifdef __x86_64__
853/* Intel intrinsic. */
854extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
855_mm_cvttsd_si64 (__m128d __A)
856{
857 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
858}
859
860/* Microsoft intrinsic. */
861extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
862_mm_cvttsd_si64x (__m128d __A)
863{
864 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
865}
866#endif
867
868extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
869_mm_cvtsd_ss (__m128 __A, __m128d __B)
870{
871 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
872}
873
874extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
875_mm_cvtsi32_sd (__m128d __A, int __B)
876{
877 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
878}
879
880#ifdef __x86_64__
881/* Intel intrinsic. */
882extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
883_mm_cvtsi64_sd (__m128d __A, long long __B)
884{
885 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
886}
887
888/* Microsoft intrinsic. */
889extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
890_mm_cvtsi64x_sd (__m128d __A, long long __B)
891{
892 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
893}
894#endif
895
896extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
897_mm_cvtss_sd (__m128d __A, __m128 __B)
898{
899 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
900}
901
902#ifdef __OPTIMIZE__
903extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
904_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
905{
906 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
907}
908#else
909#define _mm_shuffle_pd(A, B, N) \
910 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
911 (__v2df)(__m128d)(B), (int)(N)))
912#endif
913
914extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
915_mm_unpackhi_pd (__m128d __A, __m128d __B)
916{
917 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
918}
919
920extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
921_mm_unpacklo_pd (__m128d __A, __m128d __B)
922{
923 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
924}
925
926extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
927_mm_loadh_pd (__m128d __A, double const *__B)
928{
929 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
930}
931
932extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
933_mm_loadl_pd (__m128d __A, double const *__B)
934{
935 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
936}
937
938extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
939_mm_movemask_pd (__m128d __A)
940{
941 return __builtin_ia32_movmskpd ((__v2df)__A);
942}
943
944extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
945_mm_packs_epi16 (__m128i __A, __m128i __B)
946{
947 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
948}
949
950extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
951_mm_packs_epi32 (__m128i __A, __m128i __B)
952{
953 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
954}
955
956extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
957_mm_packus_epi16 (__m128i __A, __m128i __B)
958{
959 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
960}
961
962extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
963_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
964{
965 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
966}
967
968extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
969_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
970{
971 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
972}
973
974extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
975_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
976{
977 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
978}
979
980extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
981_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
982{
983 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
984}
985
986extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
987_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
988{
989 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
990}
991
992extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
993_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
994{
995 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
996}
997
998extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
999_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
1000{
1001 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
1002}
1003
1004extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1005_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
1006{
1007 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
1008}
1009
1010extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1011_mm_add_epi8 (__m128i __A, __m128i __B)
1012{
1013 return (__m128i) ((__v16qu)__A + (__v16qu)__B);
1014}
1015
1016extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1017_mm_add_epi16 (__m128i __A, __m128i __B)
1018{
1019 return (__m128i) ((__v8hu)__A + (__v8hu)__B);
1020}
1021
1022extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1023_mm_add_epi32 (__m128i __A, __m128i __B)
1024{
1025 return (__m128i) ((__v4su)__A + (__v4su)__B);
1026}
1027
1028extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1029_mm_add_epi64 (__m128i __A, __m128i __B)
1030{
1031 return (__m128i) ((__v2du)__A + (__v2du)__B);
1032}
1033
1034extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1035_mm_adds_epi8 (__m128i __A, __m128i __B)
1036{
1037 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1038}
1039
1040extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1041_mm_adds_epi16 (__m128i __A, __m128i __B)
1042{
1043 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1044}
1045
1046extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1047_mm_adds_epu8 (__m128i __A, __m128i __B)
1048{
1049 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1050}
1051
1052extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1053_mm_adds_epu16 (__m128i __A, __m128i __B)
1054{
1055 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1056}
1057
1058extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1059_mm_sub_epi8 (__m128i __A, __m128i __B)
1060{
1061 return (__m128i) ((__v16qu)__A - (__v16qu)__B);
1062}
1063
1064extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1065_mm_sub_epi16 (__m128i __A, __m128i __B)
1066{
1067 return (__m128i) ((__v8hu)__A - (__v8hu)__B);
1068}
1069
1070extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1071_mm_sub_epi32 (__m128i __A, __m128i __B)
1072{
1073 return (__m128i) ((__v4su)__A - (__v4su)__B);
1074}
1075
1076extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1077_mm_sub_epi64 (__m128i __A, __m128i __B)
1078{
1079 return (__m128i) ((__v2du)__A - (__v2du)__B);
1080}
1081
1082extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1083_mm_subs_epi8 (__m128i __A, __m128i __B)
1084{
1085 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1086}
1087
1088extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1089_mm_subs_epi16 (__m128i __A, __m128i __B)
1090{
1091 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1092}
1093
1094extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1095_mm_subs_epu8 (__m128i __A, __m128i __B)
1096{
1097 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1098}
1099
1100extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1101_mm_subs_epu16 (__m128i __A, __m128i __B)
1102{
1103 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1104}
1105
1106extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1107_mm_madd_epi16 (__m128i __A, __m128i __B)
1108{
1109 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1110}
1111
1112extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1113_mm_mulhi_epi16 (__m128i __A, __m128i __B)
1114{
1115 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1116}
1117
1118extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1119_mm_mullo_epi16 (__m128i __A, __m128i __B)
1120{
1121 return (__m128i) ((__v8hu)__A * (__v8hu)__B);
1122}
1123
1124extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1125_mm_mul_su32 (__m64 __A, __m64 __B)
1126{
1127 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1128}
1129
1130extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1131_mm_mul_epu32 (__m128i __A, __m128i __B)
1132{
1133 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1134}
1135
1136extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1137_mm_slli_epi16 (__m128i __A, int __B)
1138{
1139 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1140}
1141
1142extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1143_mm_slli_epi32 (__m128i __A, int __B)
1144{
1145 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1146}
1147
1148extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1149_mm_slli_epi64 (__m128i __A, int __B)
1150{
1151 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1152}
1153
1154extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1155_mm_srai_epi16 (__m128i __A, int __B)
1156{
1157 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1158}
1159
1160extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1161_mm_srai_epi32 (__m128i __A, int __B)
1162{
1163 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1164}
1165
1166#ifdef __OPTIMIZE__
1167extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1168_mm_bsrli_si128 (__m128i __A, const int __N)
1169{
1170 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1171}
1172
1173extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1174_mm_bslli_si128 (__m128i __A, const int __N)
1175{
1176 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1177}
1178
1179extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1180_mm_srli_si128 (__m128i __A, const int __N)
1181{
1182 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1183}
1184
1185extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1186_mm_slli_si128 (__m128i __A, const int __N)
1187{
1188 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1189}
1190#else
1191#define _mm_bsrli_si128(A, N) \
1192 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1193#define _mm_bslli_si128(A, N) \
1194 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1195#define _mm_srli_si128(A, N) \
1196 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1197#define _mm_slli_si128(A, N) \
1198 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1199#endif
1200
1201extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1202_mm_srli_epi16 (__m128i __A, int __B)
1203{
1204 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1205}
1206
1207extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1208_mm_srli_epi32 (__m128i __A, int __B)
1209{
1210 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1211}
1212
1213extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1214_mm_srli_epi64 (__m128i __A, int __B)
1215{
1216 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1217}
1218
1219extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1220_mm_sll_epi16 (__m128i __A, __m128i __B)
1221{
1222 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1223}
1224
1225extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1226_mm_sll_epi32 (__m128i __A, __m128i __B)
1227{
1228 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1229}
1230
1231extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1232_mm_sll_epi64 (__m128i __A, __m128i __B)
1233{
1234 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1235}
1236
1237extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1238_mm_sra_epi16 (__m128i __A, __m128i __B)
1239{
1240 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1241}
1242
1243extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1244_mm_sra_epi32 (__m128i __A, __m128i __B)
1245{
1246 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1247}
1248
1249extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1250_mm_srl_epi16 (__m128i __A, __m128i __B)
1251{
1252 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1253}
1254
1255extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1256_mm_srl_epi32 (__m128i __A, __m128i __B)
1257{
1258 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1259}
1260
1261extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1262_mm_srl_epi64 (__m128i __A, __m128i __B)
1263{
1264 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1265}
1266
1267extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1268_mm_and_si128 (__m128i __A, __m128i __B)
1269{
1270 return (__m128i) ((__v2du)__A & (__v2du)__B);
1271}
1272
1273extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1274_mm_andnot_si128 (__m128i __A, __m128i __B)
1275{
1276 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1277}
1278
1279extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1280_mm_or_si128 (__m128i __A, __m128i __B)
1281{
1282 return (__m128i) ((__v2du)__A | (__v2du)__B);
1283}
1284
1285extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1286_mm_xor_si128 (__m128i __A, __m128i __B)
1287{
1288 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
1289}
1290
1291extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1292_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1293{
1294 return (__m128i) ((__v16qi)__A == (__v16qi)__B);
1295}
1296
1297extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1298_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1299{
1300 return (__m128i) ((__v8hi)__A == (__v8hi)__B);
1301}
1302
1303extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1304_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1305{
1306 return (__m128i) ((__v4si)__A == (__v4si)__B);
1307}
1308
1309extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1310_mm_cmplt_epi8 (__m128i __A, __m128i __B)
1311{
1312 return (__m128i) ((__v16qi)__A < (__v16qi)__B);
1313}
1314
1315extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1316_mm_cmplt_epi16 (__m128i __A, __m128i __B)
1317{
1318 return (__m128i) ((__v8hi)__A < (__v8hi)__B);
1319}
1320
1321extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1322_mm_cmplt_epi32 (__m128i __A, __m128i __B)
1323{
1324 return (__m128i) ((__v4si)__A < (__v4si)__B);
1325}
1326
1327extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1328_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1329{
1330 return (__m128i) ((__v16qi)__A > (__v16qi)__B);
1331}
1332
1333extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1334_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1335{
1336 return (__m128i) ((__v8hi)__A > (__v8hi)__B);
1337}
1338
1339extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1340_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1341{
1342 return (__m128i) ((__v4si)__A > (__v4si)__B);
1343}
1344
1345#ifdef __OPTIMIZE__
1346extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1347_mm_extract_epi16 (__m128i const __A, int const __N)
1348{
1349 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1350}
1351
1352extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1353_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1354{
1355 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1356}
1357#else
1358#define _mm_extract_epi16(A, N) \
1359 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1360#define _mm_insert_epi16(A, D, N) \
1361 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1362 (int)(D), (int)(N)))
1363#endif
1364
1365extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1366_mm_max_epi16 (__m128i __A, __m128i __B)
1367{
1368 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1369}
1370
1371extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1372_mm_max_epu8 (__m128i __A, __m128i __B)
1373{
1374 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1375}
1376
1377extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1378_mm_min_epi16 (__m128i __A, __m128i __B)
1379{
1380 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1381}
1382
1383extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1384_mm_min_epu8 (__m128i __A, __m128i __B)
1385{
1386 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1387}
1388
1389extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1390_mm_movemask_epi8 (__m128i __A)
1391{
1392 return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1393}
1394
1395extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1396_mm_mulhi_epu16 (__m128i __A, __m128i __B)
1397{
1398 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1399}
1400
1401#ifdef __OPTIMIZE__
1402extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1403_mm_shufflehi_epi16 (__m128i __A, const int __mask)
1404{
1405 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1406}
1407
1408extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1409_mm_shufflelo_epi16 (__m128i __A, const int __mask)
1410{
1411 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1412}
1413
1414extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1415_mm_shuffle_epi32 (__m128i __A, const int __mask)
1416{
1417 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1418}
1419#else
1420#define _mm_shufflehi_epi16(A, N) \
1421 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1422#define _mm_shufflelo_epi16(A, N) \
1423 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1424#define _mm_shuffle_epi32(A, N) \
1425 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1426#endif
1427
1428extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1429_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1430{
1431 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1432}
1433
1434extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1435_mm_avg_epu8 (__m128i __A, __m128i __B)
1436{
1437 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1438}
1439
1440extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1441_mm_avg_epu16 (__m128i __A, __m128i __B)
1442{
1443 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1444}
1445
1446extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1447_mm_sad_epu8 (__m128i __A, __m128i __B)
1448{
1449 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1450}
1451
1452extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1453_mm_stream_si32 (int *__A, int __B)
1454{
1455 __builtin_ia32_movnti (__A, __B);
1456}
1457
1458#ifdef __x86_64__
1459extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1460_mm_stream_si64 (long long int *__A, long long int __B)
1461{
1462 __builtin_ia32_movnti64 (__A, __B);
1463}
1464#endif
1465
1466extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1467_mm_stream_si128 (__m128i *__A, __m128i __B)
1468{
1469 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1470}
1471
1472extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1473_mm_stream_pd (double *__A, __m128d __B)
1474{
1475 __builtin_ia32_movntpd (__A, (__v2df)__B);
1476}
1477
1478extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1479_mm_clflush (void const *__A)
1480{
1481 __builtin_ia32_clflush (__A);
1482}
1483
1484extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1485_mm_lfence (void)
1486{
1487 __builtin_ia32_lfence ();
1488}
1489
1490extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1491_mm_mfence (void)
1492{
1493 __builtin_ia32_mfence ();
1494}
1495
1496extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1497_mm_cvtsi32_si128 (int __A)
1498{
1499 return _mm_set_epi32 (0, 0, 0, __A);
1500}
1501
1502#ifdef __x86_64__
1503/* Intel intrinsic. */
1504extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1505_mm_cvtsi64_si128 (long long __A)
1506{
1507 return _mm_set_epi64x (0, __A);
1508}
1509
1510/* Microsoft intrinsic. */
1511extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1512_mm_cvtsi64x_si128 (long long __A)
1513{
1514 return _mm_set_epi64x (0, __A);
1515}
1516#endif
1517
1518/* Casts between various SP, DP, INT vector types. Note that these do no
1519 conversion of values, they just change the type. */
1520extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1521_mm_castpd_ps(__m128d __A)
1522{
1523 return (__m128) __A;
1524}
1525
1526extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1527_mm_castpd_si128(__m128d __A)
1528{
1529 return (__m128i) __A;
1530}
1531
1532extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1533_mm_castps_pd(__m128 __A)
1534{
1535 return (__m128d) __A;
1536}
1537
1538extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1539_mm_castps_si128(__m128 __A)
1540{
1541 return (__m128i) __A;
1542}
1543
1544extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1545_mm_castsi128_ps(__m128i __A)
1546{
1547 return (__m128) __A;
1548}
1549
1550extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1551_mm_castsi128_pd(__m128i __A)
1552{
1553 return (__m128d) __A;
1554}
1555
1556#ifdef __DISABLE_SSE2__
1557#undef __DISABLE_SSE2__
1558#pragma GCC pop_options
1559#endif /* __DISABLE_SSE2__ */
1560
1561#endif /* _EMMINTRIN_H_INCLUDED */
Note: See TracBrowser for help on using the repository browser.