source: Daodan/MSYS2/mingw32/lib/gcc/i686-w64-mingw32/11.2.0/include/mmintrin.h

Last change on this file was 1166, checked in by rossy, 3 years ago

Daodan: Replace MinGW build env with an up-to-date MSYS2 env

File size: 31.1 KB
Line 
1/* Copyright (C) 2002-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24/* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.0. */
26
27#ifndef _MMINTRIN_H_INCLUDED
28#define _MMINTRIN_H_INCLUDED
29
30#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
31#pragma GCC push_options
32#ifdef __MMX_WITH_SSE__
33#pragma GCC target("sse2")
34#elif defined __x86_64__
35#pragma GCC target("sse,mmx")
36#else
37#pragma GCC target("mmx")
38#endif
39#define __DISABLE_MMX__
40#endif /* __MMX__ */
41
42/* The Intel API is flexible enough that we must allow aliasing with other
43 vector types, and their scalar components. */
44typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
45typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
46typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
47
48/* Unaligned version of the same type */
49typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
50typedef int __m32_u __attribute__ ((__vector_size__ (4), \
51 __may_alias__, __aligned__ (1)));
52typedef short __m16_u __attribute__ ((__vector_size__ (2), \
53 __may_alias__, __aligned__ (1)));
54
55/* Internal data types for implementing the intrinsics. */
56typedef int __v2si __attribute__ ((__vector_size__ (8)));
57typedef short __v4hi __attribute__ ((__vector_size__ (8)));
58typedef char __v8qi __attribute__ ((__vector_size__ (8)));
59typedef long long __v1di __attribute__ ((__vector_size__ (8)));
60typedef float __v2sf __attribute__ ((__vector_size__ (8)));
61
62/* Empty the multimedia state. */
63extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64_mm_empty (void)
65{
66 __builtin_ia32_emms ();
67}
68
69extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
70_m_empty (void)
71{
72 _mm_empty ();
73}
74
75/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
76extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77_mm_cvtsi32_si64 (int __i)
78{
79 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
80}
81
82extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83_m_from_int (int __i)
84{
85 return _mm_cvtsi32_si64 (__i);
86}
87
88#ifdef __x86_64__
89/* Convert I to a __m64 object. */
90
91/* Intel intrinsic. */
92extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
93_m_from_int64 (long long __i)
94{
95 return (__m64) __i;
96}
97
98extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99_mm_cvtsi64_m64 (long long __i)
100{
101 return (__m64) __i;
102}
103
104/* Microsoft intrinsic. */
105extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
106_mm_cvtsi64x_si64 (long long __i)
107{
108 return (__m64) __i;
109}
110
111extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
112_mm_set_pi64x (long long __i)
113{
114 return (__m64) __i;
115}
116#endif
117
118/* Convert the lower 32 bits of the __m64 object into an integer. */
119extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120_mm_cvtsi64_si32 (__m64 __i)
121{
122 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
123}
124
125extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126_m_to_int (__m64 __i)
127{
128 return _mm_cvtsi64_si32 (__i);
129}
130
131#ifdef __x86_64__
132/* Convert the __m64 object to a 64bit integer. */
133
134/* Intel intrinsic. */
135extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
136_m_to_int64 (__m64 __i)
137{
138 return (long long)__i;
139}
140
141extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
142_mm_cvtm64_si64 (__m64 __i)
143{
144 return (long long)__i;
145}
146
147/* Microsoft intrinsic. */
148extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
149_mm_cvtsi64_si64x (__m64 __i)
150{
151 return (long long)__i;
152}
153#endif
154
155/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
156 the result, and the four 16-bit values from M2 into the upper four 8-bit
157 values of the result, all with signed saturation. */
158extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159_mm_packs_pi16 (__m64 __m1, __m64 __m2)
160{
161 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
162}
163
164extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
165_m_packsswb (__m64 __m1, __m64 __m2)
166{
167 return _mm_packs_pi16 (__m1, __m2);
168}
169
170/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
171 the result, and the two 32-bit values from M2 into the upper two 16-bit
172 values of the result, all with signed saturation. */
173extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174_mm_packs_pi32 (__m64 __m1, __m64 __m2)
175{
176 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
177}
178
179extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
180_m_packssdw (__m64 __m1, __m64 __m2)
181{
182 return _mm_packs_pi32 (__m1, __m2);
183}
184
185/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
186 the result, and the four 16-bit values from M2 into the upper four 8-bit
187 values of the result, all with unsigned saturation. */
188extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
189_mm_packs_pu16 (__m64 __m1, __m64 __m2)
190{
191 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
192}
193
194extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
195_m_packuswb (__m64 __m1, __m64 __m2)
196{
197 return _mm_packs_pu16 (__m1, __m2);
198}
199
200/* Interleave the four 8-bit values from the high half of M1 with the four
201 8-bit values from the high half of M2. */
202extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
203_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
204{
205 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
206}
207
208extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
209_m_punpckhbw (__m64 __m1, __m64 __m2)
210{
211 return _mm_unpackhi_pi8 (__m1, __m2);
212}
213
214/* Interleave the two 16-bit values from the high half of M1 with the two
215 16-bit values from the high half of M2. */
216extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
217_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
218{
219 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
220}
221
222extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
223_m_punpckhwd (__m64 __m1, __m64 __m2)
224{
225 return _mm_unpackhi_pi16 (__m1, __m2);
226}
227
228/* Interleave the 32-bit value from the high half of M1 with the 32-bit
229 value from the high half of M2. */
230extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
232{
233 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
234}
235
236extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
237_m_punpckhdq (__m64 __m1, __m64 __m2)
238{
239 return _mm_unpackhi_pi32 (__m1, __m2);
240}
241
242/* Interleave the four 8-bit values from the low half of M1 with the four
243 8-bit values from the low half of M2. */
244extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
246{
247 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
248}
249
250extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
251_m_punpcklbw (__m64 __m1, __m64 __m2)
252{
253 return _mm_unpacklo_pi8 (__m1, __m2);
254}
255
256/* Interleave the two 16-bit values from the low half of M1 with the two
257 16-bit values from the low half of M2. */
258extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
259_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
260{
261 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
262}
263
264extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
265_m_punpcklwd (__m64 __m1, __m64 __m2)
266{
267 return _mm_unpacklo_pi16 (__m1, __m2);
268}
269
270/* Interleave the 32-bit value from the low half of M1 with the 32-bit
271 value from the low half of M2. */
272extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
273_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
274{
275 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
276}
277
278extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
279_m_punpckldq (__m64 __m1, __m64 __m2)
280{
281 return _mm_unpacklo_pi32 (__m1, __m2);
282}
283
284/* Add the 8-bit values in M1 to the 8-bit values in M2. */
285extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
286_mm_add_pi8 (__m64 __m1, __m64 __m2)
287{
288 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
289}
290
291extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
292_m_paddb (__m64 __m1, __m64 __m2)
293{
294 return _mm_add_pi8 (__m1, __m2);
295}
296
297/* Add the 16-bit values in M1 to the 16-bit values in M2. */
298extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
299_mm_add_pi16 (__m64 __m1, __m64 __m2)
300{
301 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
302}
303
304extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
305_m_paddw (__m64 __m1, __m64 __m2)
306{
307 return _mm_add_pi16 (__m1, __m2);
308}
309
310/* Add the 32-bit values in M1 to the 32-bit values in M2. */
311extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
312_mm_add_pi32 (__m64 __m1, __m64 __m2)
313{
314 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
315}
316
317extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
318_m_paddd (__m64 __m1, __m64 __m2)
319{
320 return _mm_add_pi32 (__m1, __m2);
321}
322
323/* Add the 64-bit values in M1 to the 64-bit values in M2. */
324#ifndef __SSE2__
325#pragma GCC push_options
326#ifdef __MMX_WITH_SSE__
327#pragma GCC target("sse2")
328#else
329#pragma GCC target("sse2,mmx")
330#endif
331#define __DISABLE_SSE2__
332#endif /* __SSE2__ */
333
334extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
335_mm_add_si64 (__m64 __m1, __m64 __m2)
336{
337 return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
338}
339#ifdef __DISABLE_SSE2__
340#undef __DISABLE_SSE2__
341#pragma GCC pop_options
342#endif /* __DISABLE_SSE2__ */
343
344/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
345 saturated arithmetic. */
346extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
347_mm_adds_pi8 (__m64 __m1, __m64 __m2)
348{
349 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
350}
351
352extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
353_m_paddsb (__m64 __m1, __m64 __m2)
354{
355 return _mm_adds_pi8 (__m1, __m2);
356}
357
358/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
359 saturated arithmetic. */
360extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
361_mm_adds_pi16 (__m64 __m1, __m64 __m2)
362{
363 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
364}
365
366extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
367_m_paddsw (__m64 __m1, __m64 __m2)
368{
369 return _mm_adds_pi16 (__m1, __m2);
370}
371
372/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
373 saturated arithmetic. */
374extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
375_mm_adds_pu8 (__m64 __m1, __m64 __m2)
376{
377 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
378}
379
380extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
381_m_paddusb (__m64 __m1, __m64 __m2)
382{
383 return _mm_adds_pu8 (__m1, __m2);
384}
385
386/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
387 saturated arithmetic. */
388extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
389_mm_adds_pu16 (__m64 __m1, __m64 __m2)
390{
391 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
392}
393
394extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
395_m_paddusw (__m64 __m1, __m64 __m2)
396{
397 return _mm_adds_pu16 (__m1, __m2);
398}
399
400/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
401extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
402_mm_sub_pi8 (__m64 __m1, __m64 __m2)
403{
404 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
405}
406
407extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
408_m_psubb (__m64 __m1, __m64 __m2)
409{
410 return _mm_sub_pi8 (__m1, __m2);
411}
412
413/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
414extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
415_mm_sub_pi16 (__m64 __m1, __m64 __m2)
416{
417 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
418}
419
420extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
421_m_psubw (__m64 __m1, __m64 __m2)
422{
423 return _mm_sub_pi16 (__m1, __m2);
424}
425
426/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
427extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428_mm_sub_pi32 (__m64 __m1, __m64 __m2)
429{
430 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
431}
432
433extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434_m_psubd (__m64 __m1, __m64 __m2)
435{
436 return _mm_sub_pi32 (__m1, __m2);
437}
438
439/* Add the 64-bit values in M1 to the 64-bit values in M2. */
440#ifndef __SSE2__
441#pragma GCC push_options
442#ifdef __MMX_WITH_SSE__
443#pragma GCC target("sse2")
444#else
445#pragma GCC target("sse2,mmx")
446#endif
447#define __DISABLE_SSE2__
448#endif /* __SSE2__ */
449
450extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
451_mm_sub_si64 (__m64 __m1, __m64 __m2)
452{
453 return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
454}
455#ifdef __DISABLE_SSE2__
456#undef __DISABLE_SSE2__
457#pragma GCC pop_options
458#endif /* __DISABLE_SSE2__ */
459
460/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
461 saturating arithmetic. */
462extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
463_mm_subs_pi8 (__m64 __m1, __m64 __m2)
464{
465 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
466}
467
468extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
469_m_psubsb (__m64 __m1, __m64 __m2)
470{
471 return _mm_subs_pi8 (__m1, __m2);
472}
473
474/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
475 signed saturating arithmetic. */
476extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
477_mm_subs_pi16 (__m64 __m1, __m64 __m2)
478{
479 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
480}
481
482extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
483_m_psubsw (__m64 __m1, __m64 __m2)
484{
485 return _mm_subs_pi16 (__m1, __m2);
486}
487
488/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
489 unsigned saturating arithmetic. */
490extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
491_mm_subs_pu8 (__m64 __m1, __m64 __m2)
492{
493 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
494}
495
496extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
497_m_psubusb (__m64 __m1, __m64 __m2)
498{
499 return _mm_subs_pu8 (__m1, __m2);
500}
501
502/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
503 unsigned saturating arithmetic. */
504extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
505_mm_subs_pu16 (__m64 __m1, __m64 __m2)
506{
507 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
508}
509
510extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
511_m_psubusw (__m64 __m1, __m64 __m2)
512{
513 return _mm_subs_pu16 (__m1, __m2);
514}
515
516/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
517 four 32-bit intermediate results, which are then summed by pairs to
518 produce two 32-bit results. */
519extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
520_mm_madd_pi16 (__m64 __m1, __m64 __m2)
521{
522 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
523}
524
525extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
526_m_pmaddwd (__m64 __m1, __m64 __m2)
527{
528 return _mm_madd_pi16 (__m1, __m2);
529}
530
531/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
532 M2 and produce the high 16 bits of the 32-bit results. */
533extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
535{
536 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
537}
538
539extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540_m_pmulhw (__m64 __m1, __m64 __m2)
541{
542 return _mm_mulhi_pi16 (__m1, __m2);
543}
544
545/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
546 the low 16 bits of the results. */
547extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
548_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
549{
550 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
551}
552
553extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554_m_pmullw (__m64 __m1, __m64 __m2)
555{
556 return _mm_mullo_pi16 (__m1, __m2);
557}
558
559/* Shift four 16-bit values in M left by COUNT. */
560extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
561_mm_sll_pi16 (__m64 __m, __m64 __count)
562{
563 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
564}
565
566extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
567_m_psllw (__m64 __m, __m64 __count)
568{
569 return _mm_sll_pi16 (__m, __count);
570}
571
572extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
573_mm_slli_pi16 (__m64 __m, int __count)
574{
575 return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
576}
577
578extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
579_m_psllwi (__m64 __m, int __count)
580{
581 return _mm_slli_pi16 (__m, __count);
582}
583
584/* Shift two 32-bit values in M left by COUNT. */
585extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
586_mm_sll_pi32 (__m64 __m, __m64 __count)
587{
588 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
589}
590
591extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592_m_pslld (__m64 __m, __m64 __count)
593{
594 return _mm_sll_pi32 (__m, __count);
595}
596
597extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
598_mm_slli_pi32 (__m64 __m, int __count)
599{
600 return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
601}
602
603extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
604_m_pslldi (__m64 __m, int __count)
605{
606 return _mm_slli_pi32 (__m, __count);
607}
608
609/* Shift the 64-bit value in M left by COUNT. */
610extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
611_mm_sll_si64 (__m64 __m, __m64 __count)
612{
613 return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
614}
615
616extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
617_m_psllq (__m64 __m, __m64 __count)
618{
619 return _mm_sll_si64 (__m, __count);
620}
621
622extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
623_mm_slli_si64 (__m64 __m, int __count)
624{
625 return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
626}
627
628extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
629_m_psllqi (__m64 __m, int __count)
630{
631 return _mm_slli_si64 (__m, __count);
632}
633
634/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
635extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636_mm_sra_pi16 (__m64 __m, __m64 __count)
637{
638 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
639}
640
641extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642_m_psraw (__m64 __m, __m64 __count)
643{
644 return _mm_sra_pi16 (__m, __count);
645}
646
647extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648_mm_srai_pi16 (__m64 __m, int __count)
649{
650 return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
651}
652
653extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654_m_psrawi (__m64 __m, int __count)
655{
656 return _mm_srai_pi16 (__m, __count);
657}
658
659/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
660extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
661_mm_sra_pi32 (__m64 __m, __m64 __count)
662{
663 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
664}
665
666extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
667_m_psrad (__m64 __m, __m64 __count)
668{
669 return _mm_sra_pi32 (__m, __count);
670}
671
672extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
673_mm_srai_pi32 (__m64 __m, int __count)
674{
675 return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
676}
677
678extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
679_m_psradi (__m64 __m, int __count)
680{
681 return _mm_srai_pi32 (__m, __count);
682}
683
684/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
685extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
686_mm_srl_pi16 (__m64 __m, __m64 __count)
687{
688 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
689}
690
691extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
692_m_psrlw (__m64 __m, __m64 __count)
693{
694 return _mm_srl_pi16 (__m, __count);
695}
696
697extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
698_mm_srli_pi16 (__m64 __m, int __count)
699{
700 return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
701}
702
703extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
704_m_psrlwi (__m64 __m, int __count)
705{
706 return _mm_srli_pi16 (__m, __count);
707}
708
709/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
710extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
711_mm_srl_pi32 (__m64 __m, __m64 __count)
712{
713 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
714}
715
716extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
717_m_psrld (__m64 __m, __m64 __count)
718{
719 return _mm_srl_pi32 (__m, __count);
720}
721
722extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
723_mm_srli_pi32 (__m64 __m, int __count)
724{
725 return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
726}
727
728extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
729_m_psrldi (__m64 __m, int __count)
730{
731 return _mm_srli_pi32 (__m, __count);
732}
733
734/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
735extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
736_mm_srl_si64 (__m64 __m, __m64 __count)
737{
738 return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
739}
740
741extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
742_m_psrlq (__m64 __m, __m64 __count)
743{
744 return _mm_srl_si64 (__m, __count);
745}
746
747extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
748_mm_srli_si64 (__m64 __m, int __count)
749{
750 return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
751}
752
753extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
754_m_psrlqi (__m64 __m, int __count)
755{
756 return _mm_srli_si64 (__m, __count);
757}
758
759/* Bit-wise AND the 64-bit values in M1 and M2. */
760extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
761_mm_and_si64 (__m64 __m1, __m64 __m2)
762{
763 return __builtin_ia32_pand (__m1, __m2);
764}
765
766extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
767_m_pand (__m64 __m1, __m64 __m2)
768{
769 return _mm_and_si64 (__m1, __m2);
770}
771
772/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
773 64-bit value in M2. */
774extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
775_mm_andnot_si64 (__m64 __m1, __m64 __m2)
776{
777 return __builtin_ia32_pandn (__m1, __m2);
778}
779
780extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
781_m_pandn (__m64 __m1, __m64 __m2)
782{
783 return _mm_andnot_si64 (__m1, __m2);
784}
785
786/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
787extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
788_mm_or_si64 (__m64 __m1, __m64 __m2)
789{
790 return __builtin_ia32_por (__m1, __m2);
791}
792
793extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
794_m_por (__m64 __m1, __m64 __m2)
795{
796 return _mm_or_si64 (__m1, __m2);
797}
798
799/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
800extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801_mm_xor_si64 (__m64 __m1, __m64 __m2)
802{
803 return __builtin_ia32_pxor (__m1, __m2);
804}
805
806extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807_m_pxor (__m64 __m1, __m64 __m2)
808{
809 return _mm_xor_si64 (__m1, __m2);
810}
811
812/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
813 test is true and zero if false. */
814extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
815_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
816{
817 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
818}
819
820extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
821_m_pcmpeqb (__m64 __m1, __m64 __m2)
822{
823 return _mm_cmpeq_pi8 (__m1, __m2);
824}
825
826extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
827_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
828{
829 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
830}
831
832extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
833_m_pcmpgtb (__m64 __m1, __m64 __m2)
834{
835 return _mm_cmpgt_pi8 (__m1, __m2);
836}
837
838/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
839 the test is true and zero if false. */
840extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
841_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
842{
843 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
844}
845
846extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
847_m_pcmpeqw (__m64 __m1, __m64 __m2)
848{
849 return _mm_cmpeq_pi16 (__m1, __m2);
850}
851
852extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
853_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
854{
855 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
856}
857
858extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
859_m_pcmpgtw (__m64 __m1, __m64 __m2)
860{
861 return _mm_cmpgt_pi16 (__m1, __m2);
862}
863
864/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
865 the test is true and zero if false. */
866extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
867_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
868{
869 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
870}
871
872extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
873_m_pcmpeqd (__m64 __m1, __m64 __m2)
874{
875 return _mm_cmpeq_pi32 (__m1, __m2);
876}
877
878extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
879_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
880{
881 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
882}
883
884extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
885_m_pcmpgtd (__m64 __m1, __m64 __m2)
886{
887 return _mm_cmpgt_pi32 (__m1, __m2);
888}
889
890/* Creates a 64-bit zero. */
891extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
892_mm_setzero_si64 (void)
893{
894 return (__m64)0LL;
895}
896
897/* Creates a vector of two 32-bit values; I0 is least significant. */
898extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
899_mm_set_pi32 (int __i1, int __i0)
900{
901 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
902}
903
904/* Creates a vector of four 16-bit values; W0 is least significant. */
905extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
906_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
907{
908 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
909}
910
911/* Creates a vector of eight 8-bit values; B0 is least significant. */
912extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
913_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
914 char __b3, char __b2, char __b1, char __b0)
915{
916 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
917 __b4, __b5, __b6, __b7);
918}
919
920/* Similar, but with the arguments in reverse order. */
921extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
922_mm_setr_pi32 (int __i0, int __i1)
923{
924 return _mm_set_pi32 (__i1, __i0);
925}
926
927extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
928_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
929{
930 return _mm_set_pi16 (__w3, __w2, __w1, __w0);
931}
932
933extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
934_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
935 char __b4, char __b5, char __b6, char __b7)
936{
937 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
938}
939
940/* Creates a vector of two 32-bit values, both elements containing I. */
941extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
942_mm_set1_pi32 (int __i)
943{
944 return _mm_set_pi32 (__i, __i);
945}
946
947/* Creates a vector of four 16-bit values, all elements containing W. */
948extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
949_mm_set1_pi16 (short __w)
950{
951 return _mm_set_pi16 (__w, __w, __w, __w);
952}
953
954/* Creates a vector of eight 8-bit values, all elements containing B. */
955extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
956_mm_set1_pi8 (char __b)
957{
958 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
959}
960#ifdef __DISABLE_MMX__
961#undef __DISABLE_MMX__
962#pragma GCC pop_options
963#endif /* __DISABLE_MMX__ */
964
965#endif /* _MMINTRIN_H_INCLUDED */
Note: See TracBrowser for help on using the repository browser.