Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log
Repository URL

mmintrin.h@ 1174

Last change on this file since 1174 was 1166, checked in by rossy, 4 years ago
Daodan: Replace MinGW build env with an up-to-date MSYS2 env
File size: 31.1 KB

Line
1	/* Copyright (C) 2002-2021 Free Software Foundation, Inc.
2
3	This file is part of GCC.
4
5	GCC is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published by
7	the Free Software Foundation; either version 3, or (at your option)
8	any later version.
9
10	GCC is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	Under Section 7 of GPL version 3, you are granted additional
16	permissions described in the GCC Runtime Library Exception, version
17	3.1, as published by the Free Software Foundation.
18
19	You should have received a copy of the GNU General Public License and
20	a copy of the GCC Runtime Library Exception along with this program;
21	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22	<http://www.gnu.org/licenses/>. */
23
24	/* Implemented from the specification included in the Intel C++ Compiler
25	User Guide and Reference, version 9.0. */
26
27	#ifndef _MMINTRIN_H_INCLUDED
28	#define _MMINTRIN_H_INCLUDED
29
30	#if defined __x86_64__ && !defined __SSE__ \|\| !defined __MMX__
31	#pragma GCC push_options
32	#ifdef __MMX_WITH_SSE__
33	#pragma GCC target("sse2")
34	#elif defined __x86_64__
35	#pragma GCC target("sse,mmx")
36	#else
37	#pragma GCC target("mmx")
38	#endif
39	#define __DISABLE_MMX__
40	#endif /* __MMX__ */
41
42	/* The Intel API is flexible enough that we must allow aliasing with other
43	vector types, and their scalar components. */
44	typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
45	typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
46	typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
47
48	/* Unaligned version of the same type */
49	typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
50	typedef int __m32_u __attribute__ ((__vector_size__ (4), \
51	__may_alias__, __aligned__ (1)));
52	typedef short __m16_u __attribute__ ((__vector_size__ (2), \
53	__may_alias__, __aligned__ (1)));
54
55	/* Internal data types for implementing the intrinsics. */
56	typedef int __v2si __attribute__ ((__vector_size__ (8)));
57	typedef short __v4hi __attribute__ ((__vector_size__ (8)));
58	typedef char __v8qi __attribute__ ((__vector_size__ (8)));
59	typedef long long __v1di __attribute__ ((__vector_size__ (8)));
60	typedef float __v2sf __attribute__ ((__vector_size__ (8)));
61
62	/* Empty the multimedia state. */
63	extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64	_mm_empty (void)
65	{
66	__builtin_ia32_emms ();
67	}
68
69	extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
70	_m_empty (void)
71	{
72	_mm_empty ();
73	}
74
75	/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
76	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77	_mm_cvtsi32_si64 (int __i)
78	{
79	return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
80	}
81
82	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83	_m_from_int (int __i)
84	{
85	return _mm_cvtsi32_si64 (__i);
86	}
87
88	#ifdef __x86_64__
89	/* Convert I to a __m64 object. */
90
91	/* Intel intrinsic. */
92	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
93	_m_from_int64 (long long __i)
94	{
95	return (__m64) __i;
96	}
97
98	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99	_mm_cvtsi64_m64 (long long __i)
100	{
101	return (__m64) __i;
102	}
103
104	/* Microsoft intrinsic. */
105	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
106	_mm_cvtsi64x_si64 (long long __i)
107	{
108	return (__m64) __i;
109	}
110
111	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
112	_mm_set_pi64x (long long __i)
113	{
114	return (__m64) __i;
115	}
116	#endif
117
118	/* Convert the lower 32 bits of the __m64 object into an integer. */
119	extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120	_mm_cvtsi64_si32 (__m64 __i)
121	{
122	return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
123	}
124
125	extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126	_m_to_int (__m64 __i)
127	{
128	return _mm_cvtsi64_si32 (__i);
129	}
130
131	#ifdef __x86_64__
132	/* Convert the __m64 object to a 64bit integer. */
133
134	/* Intel intrinsic. */
135	extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
136	_m_to_int64 (__m64 __i)
137	{
138	return (long long)__i;
139	}
140
141	extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
142	_mm_cvtm64_si64 (__m64 __i)
143	{
144	return (long long)__i;
145	}
146
147	/* Microsoft intrinsic. */
148	extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
149	_mm_cvtsi64_si64x (__m64 __i)
150	{
151	return (long long)__i;
152	}
153	#endif
154
155	/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
156	the result, and the four 16-bit values from M2 into the upper four 8-bit
157	values of the result, all with signed saturation. */
158	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
159	_mm_packs_pi16 (__m64 __m1, __m64 __m2)
160	{
161	return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
162	}
163
164	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
165	_m_packsswb (__m64 __m1, __m64 __m2)
166	{
167	return _mm_packs_pi16 (__m1, __m2);
168	}
169
170	/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
171	the result, and the two 32-bit values from M2 into the upper two 16-bit
172	values of the result, all with signed saturation. */
173	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174	_mm_packs_pi32 (__m64 __m1, __m64 __m2)
175	{
176	return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
177	}
178
179	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
180	_m_packssdw (__m64 __m1, __m64 __m2)
181	{
182	return _mm_packs_pi32 (__m1, __m2);
183	}
184
185	/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
186	the result, and the four 16-bit values from M2 into the upper four 8-bit
187	values of the result, all with unsigned saturation. */
188	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
189	_mm_packs_pu16 (__m64 __m1, __m64 __m2)
190	{
191	return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
192	}
193
194	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
195	_m_packuswb (__m64 __m1, __m64 __m2)
196	{
197	return _mm_packs_pu16 (__m1, __m2);
198	}
199
200	/* Interleave the four 8-bit values from the high half of M1 with the four
201	8-bit values from the high half of M2. */
202	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
203	_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
204	{
205	return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
206	}
207
208	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
209	_m_punpckhbw (__m64 __m1, __m64 __m2)
210	{
211	return _mm_unpackhi_pi8 (__m1, __m2);
212	}
213
214	/* Interleave the two 16-bit values from the high half of M1 with the two
215	16-bit values from the high half of M2. */
216	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
217	_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
218	{
219	return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
220	}
221
222	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
223	_m_punpckhwd (__m64 __m1, __m64 __m2)
224	{
225	return _mm_unpackhi_pi16 (__m1, __m2);
226	}
227
228	/* Interleave the 32-bit value from the high half of M1 with the 32-bit
229	value from the high half of M2. */
230	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231	_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
232	{
233	return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
234	}
235
236	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
237	_m_punpckhdq (__m64 __m1, __m64 __m2)
238	{
239	return _mm_unpackhi_pi32 (__m1, __m2);
240	}
241
242	/* Interleave the four 8-bit values from the low half of M1 with the four
243	8-bit values from the low half of M2. */
244	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245	_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
246	{
247	return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
248	}
249
250	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
251	_m_punpcklbw (__m64 __m1, __m64 __m2)
252	{
253	return _mm_unpacklo_pi8 (__m1, __m2);
254	}
255
256	/* Interleave the two 16-bit values from the low half of M1 with the two
257	16-bit values from the low half of M2. */
258	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
259	_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
260	{
261	return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
262	}
263
264	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
265	_m_punpcklwd (__m64 __m1, __m64 __m2)
266	{
267	return _mm_unpacklo_pi16 (__m1, __m2);
268	}
269
270	/* Interleave the 32-bit value from the low half of M1 with the 32-bit
271	value from the low half of M2. */
272	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
273	_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
274	{
275	return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
276	}
277
278	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
279	_m_punpckldq (__m64 __m1, __m64 __m2)
280	{
281	return _mm_unpacklo_pi32 (__m1, __m2);
282	}
283
284	/* Add the 8-bit values in M1 to the 8-bit values in M2. */
285	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
286	_mm_add_pi8 (__m64 __m1, __m64 __m2)
287	{
288	return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
289	}
290
291	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
292	_m_paddb (__m64 __m1, __m64 __m2)
293	{
294	return _mm_add_pi8 (__m1, __m2);
295	}
296
297	/* Add the 16-bit values in M1 to the 16-bit values in M2. */
298	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
299	_mm_add_pi16 (__m64 __m1, __m64 __m2)
300	{
301	return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
302	}
303
304	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
305	_m_paddw (__m64 __m1, __m64 __m2)
306	{
307	return _mm_add_pi16 (__m1, __m2);
308	}
309
310	/* Add the 32-bit values in M1 to the 32-bit values in M2. */
311	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
312	_mm_add_pi32 (__m64 __m1, __m64 __m2)
313	{
314	return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
315	}
316
317	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
318	_m_paddd (__m64 __m1, __m64 __m2)
319	{
320	return _mm_add_pi32 (__m1, __m2);
321	}
322
323	/* Add the 64-bit values in M1 to the 64-bit values in M2. */
324	#ifndef __SSE2__
325	#pragma GCC push_options
326	#ifdef __MMX_WITH_SSE__
327	#pragma GCC target("sse2")
328	#else
329	#pragma GCC target("sse2,mmx")
330	#endif
331	#define __DISABLE_SSE2__
332	#endif /* __SSE2__ */
333
334	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
335	_mm_add_si64 (__m64 __m1, __m64 __m2)
336	{
337	return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
338	}
339	#ifdef __DISABLE_SSE2__
340	#undef __DISABLE_SSE2__
341	#pragma GCC pop_options
342	#endif /* __DISABLE_SSE2__ */
343
344	/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
345	saturated arithmetic. */
346	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
347	_mm_adds_pi8 (__m64 __m1, __m64 __m2)
348	{
349	return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
350	}
351
352	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
353	_m_paddsb (__m64 __m1, __m64 __m2)
354	{
355	return _mm_adds_pi8 (__m1, __m2);
356	}
357
358	/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
359	saturated arithmetic. */
360	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
361	_mm_adds_pi16 (__m64 __m1, __m64 __m2)
362	{
363	return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
364	}
365
366	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
367	_m_paddsw (__m64 __m1, __m64 __m2)
368	{
369	return _mm_adds_pi16 (__m1, __m2);
370	}
371
372	/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
373	saturated arithmetic. */
374	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
375	_mm_adds_pu8 (__m64 __m1, __m64 __m2)
376	{
377	return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
378	}
379
380	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
381	_m_paddusb (__m64 __m1, __m64 __m2)
382	{
383	return _mm_adds_pu8 (__m1, __m2);
384	}
385
386	/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
387	saturated arithmetic. */
388	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
389	_mm_adds_pu16 (__m64 __m1, __m64 __m2)
390	{
391	return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
392	}
393
394	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
395	_m_paddusw (__m64 __m1, __m64 __m2)
396	{
397	return _mm_adds_pu16 (__m1, __m2);
398	}
399
400	/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
401	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
402	_mm_sub_pi8 (__m64 __m1, __m64 __m2)
403	{
404	return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
405	}
406
407	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
408	_m_psubb (__m64 __m1, __m64 __m2)
409	{
410	return _mm_sub_pi8 (__m1, __m2);
411	}
412
413	/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
414	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
415	_mm_sub_pi16 (__m64 __m1, __m64 __m2)
416	{
417	return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
418	}
419
420	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
421	_m_psubw (__m64 __m1, __m64 __m2)
422	{
423	return _mm_sub_pi16 (__m1, __m2);
424	}
425
426	/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
427	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428	_mm_sub_pi32 (__m64 __m1, __m64 __m2)
429	{
430	return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
431	}
432
433	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434	_m_psubd (__m64 __m1, __m64 __m2)
435	{
436	return _mm_sub_pi32 (__m1, __m2);
437	}
438
439	/* Add the 64-bit values in M1 to the 64-bit values in M2. */
440	#ifndef __SSE2__
441	#pragma GCC push_options
442	#ifdef __MMX_WITH_SSE__
443	#pragma GCC target("sse2")
444	#else
445	#pragma GCC target("sse2,mmx")
446	#endif
447	#define __DISABLE_SSE2__
448	#endif /* __SSE2__ */
449
450	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
451	_mm_sub_si64 (__m64 __m1, __m64 __m2)
452	{
453	return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
454	}
455	#ifdef __DISABLE_SSE2__
456	#undef __DISABLE_SSE2__
457	#pragma GCC pop_options
458	#endif /* __DISABLE_SSE2__ */
459
460	/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
461	saturating arithmetic. */
462	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
463	_mm_subs_pi8 (__m64 __m1, __m64 __m2)
464	{
465	return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
466	}
467
468	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
469	_m_psubsb (__m64 __m1, __m64 __m2)
470	{
471	return _mm_subs_pi8 (__m1, __m2);
472	}
473
474	/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
475	signed saturating arithmetic. */
476	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
477	_mm_subs_pi16 (__m64 __m1, __m64 __m2)
478	{
479	return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
480	}
481
482	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
483	_m_psubsw (__m64 __m1, __m64 __m2)
484	{
485	return _mm_subs_pi16 (__m1, __m2);
486	}
487
488	/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
489	unsigned saturating arithmetic. */
490	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
491	_mm_subs_pu8 (__m64 __m1, __m64 __m2)
492	{
493	return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
494	}
495
496	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
497	_m_psubusb (__m64 __m1, __m64 __m2)
498	{
499	return _mm_subs_pu8 (__m1, __m2);
500	}
501
502	/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
503	unsigned saturating arithmetic. */
504	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
505	_mm_subs_pu16 (__m64 __m1, __m64 __m2)
506	{
507	return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
508	}
509
510	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
511	_m_psubusw (__m64 __m1, __m64 __m2)
512	{
513	return _mm_subs_pu16 (__m1, __m2);
514	}
515
516	/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
517	four 32-bit intermediate results, which are then summed by pairs to
518	produce two 32-bit results. */
519	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
520	_mm_madd_pi16 (__m64 __m1, __m64 __m2)
521	{
522	return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
523	}
524
525	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
526	_m_pmaddwd (__m64 __m1, __m64 __m2)
527	{
528	return _mm_madd_pi16 (__m1, __m2);
529	}
530
531	/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
532	M2 and produce the high 16 bits of the 32-bit results. */
533	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534	_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
535	{
536	return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
537	}
538
539	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540	_m_pmulhw (__m64 __m1, __m64 __m2)
541	{
542	return _mm_mulhi_pi16 (__m1, __m2);
543	}
544
545	/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
546	the low 16 bits of the results. */
547	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
548	_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
549	{
550	return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
551	}
552
553	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554	_m_pmullw (__m64 __m1, __m64 __m2)
555	{
556	return _mm_mullo_pi16 (__m1, __m2);
557	}
558
559	/* Shift four 16-bit values in M left by COUNT. */
560	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
561	_mm_sll_pi16 (__m64 __m, __m64 __count)
562	{
563	return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
564	}
565
566	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
567	_m_psllw (__m64 __m, __m64 __count)
568	{
569	return _mm_sll_pi16 (__m, __count);
570	}
571
572	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
573	_mm_slli_pi16 (__m64 __m, int __count)
574	{
575	return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
576	}
577
578	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
579	_m_psllwi (__m64 __m, int __count)
580	{
581	return _mm_slli_pi16 (__m, __count);
582	}
583
584	/* Shift two 32-bit values in M left by COUNT. */
585	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
586	_mm_sll_pi32 (__m64 __m, __m64 __count)
587	{
588	return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
589	}
590
591	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592	_m_pslld (__m64 __m, __m64 __count)
593	{
594	return _mm_sll_pi32 (__m, __count);
595	}
596
597	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
598	_mm_slli_pi32 (__m64 __m, int __count)
599	{
600	return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
601	}
602
603	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
604	_m_pslldi (__m64 __m, int __count)
605	{
606	return _mm_slli_pi32 (__m, __count);
607	}
608
609	/* Shift the 64-bit value in M left by COUNT. */
610	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
611	_mm_sll_si64 (__m64 __m, __m64 __count)
612	{
613	return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
614	}
615
616	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
617	_m_psllq (__m64 __m, __m64 __count)
618	{
619	return _mm_sll_si64 (__m, __count);
620	}
621
622	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
623	_mm_slli_si64 (__m64 __m, int __count)
624	{
625	return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
626	}
627
628	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
629	_m_psllqi (__m64 __m, int __count)
630	{
631	return _mm_slli_si64 (__m, __count);
632	}
633
634	/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
635	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636	_mm_sra_pi16 (__m64 __m, __m64 __count)
637	{
638	return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
639	}
640
641	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642	_m_psraw (__m64 __m, __m64 __count)
643	{
644	return _mm_sra_pi16 (__m, __count);
645	}
646
647	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648	_mm_srai_pi16 (__m64 __m, int __count)
649	{
650	return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
651	}
652
653	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654	_m_psrawi (__m64 __m, int __count)
655	{
656	return _mm_srai_pi16 (__m, __count);
657	}
658
659	/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
660	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
661	_mm_sra_pi32 (__m64 __m, __m64 __count)
662	{
663	return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
664	}
665
666	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
667	_m_psrad (__m64 __m, __m64 __count)
668	{
669	return _mm_sra_pi32 (__m, __count);
670	}
671
672	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
673	_mm_srai_pi32 (__m64 __m, int __count)
674	{
675	return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
676	}
677
678	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
679	_m_psradi (__m64 __m, int __count)
680	{
681	return _mm_srai_pi32 (__m, __count);
682	}
683
684	/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
685	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
686	_mm_srl_pi16 (__m64 __m, __m64 __count)
687	{
688	return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
689	}
690
691	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
692	_m_psrlw (__m64 __m, __m64 __count)
693	{
694	return _mm_srl_pi16 (__m, __count);
695	}
696
697	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
698	_mm_srli_pi16 (__m64 __m, int __count)
699	{
700	return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
701	}
702
703	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
704	_m_psrlwi (__m64 __m, int __count)
705	{
706	return _mm_srli_pi16 (__m, __count);
707	}
708
709	/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
710	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
711	_mm_srl_pi32 (__m64 __m, __m64 __count)
712	{
713	return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
714	}
715
716	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
717	_m_psrld (__m64 __m, __m64 __count)
718	{
719	return _mm_srl_pi32 (__m, __count);
720	}
721
722	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
723	_mm_srli_pi32 (__m64 __m, int __count)
724	{
725	return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
726	}
727
728	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
729	_m_psrldi (__m64 __m, int __count)
730	{
731	return _mm_srli_pi32 (__m, __count);
732	}
733
734	/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
735	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
736	_mm_srl_si64 (__m64 __m, __m64 __count)
737	{
738	return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
739	}
740
741	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
742	_m_psrlq (__m64 __m, __m64 __count)
743	{
744	return _mm_srl_si64 (__m, __count);
745	}
746
747	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
748	_mm_srli_si64 (__m64 __m, int __count)
749	{
750	return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
751	}
752
753	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
754	_m_psrlqi (__m64 __m, int __count)
755	{
756	return _mm_srli_si64 (__m, __count);
757	}
758
759	/* Bit-wise AND the 64-bit values in M1 and M2. */
760	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
761	_mm_and_si64 (__m64 __m1, __m64 __m2)
762	{
763	return __builtin_ia32_pand (__m1, __m2);
764	}
765
766	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
767	_m_pand (__m64 __m1, __m64 __m2)
768	{
769	return _mm_and_si64 (__m1, __m2);
770	}
771
772	/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
773	64-bit value in M2. */
774	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
775	_mm_andnot_si64 (__m64 __m1, __m64 __m2)
776	{
777	return __builtin_ia32_pandn (__m1, __m2);
778	}
779
780	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
781	_m_pandn (__m64 __m1, __m64 __m2)
782	{
783	return _mm_andnot_si64 (__m1, __m2);
784	}
785
786	/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
787	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
788	_mm_or_si64 (__m64 __m1, __m64 __m2)
789	{
790	return __builtin_ia32_por (__m1, __m2);
791	}
792
793	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
794	_m_por (__m64 __m1, __m64 __m2)
795	{
796	return _mm_or_si64 (__m1, __m2);
797	}
798
799	/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
800	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801	_mm_xor_si64 (__m64 __m1, __m64 __m2)
802	{
803	return __builtin_ia32_pxor (__m1, __m2);
804	}
805
806	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807	_m_pxor (__m64 __m1, __m64 __m2)
808	{
809	return _mm_xor_si64 (__m1, __m2);
810	}
811
812	/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
813	test is true and zero if false. */
814	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
815	_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
816	{
817	return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
818	}
819
820	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
821	_m_pcmpeqb (__m64 __m1, __m64 __m2)
822	{
823	return _mm_cmpeq_pi8 (__m1, __m2);
824	}
825
826	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
827	_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
828	{
829	return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
830	}
831
832	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
833	_m_pcmpgtb (__m64 __m1, __m64 __m2)
834	{
835	return _mm_cmpgt_pi8 (__m1, __m2);
836	}
837
838	/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
839	the test is true and zero if false. */
840	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
841	_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
842	{
843	return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
844	}
845
846	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
847	_m_pcmpeqw (__m64 __m1, __m64 __m2)
848	{
849	return _mm_cmpeq_pi16 (__m1, __m2);
850	}
851
852	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
853	_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
854	{
855	return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
856	}
857
858	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
859	_m_pcmpgtw (__m64 __m1, __m64 __m2)
860	{
861	return _mm_cmpgt_pi16 (__m1, __m2);
862	}
863
864	/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
865	the test is true and zero if false. */
866	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
867	_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
868	{
869	return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
870	}
871
872	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
873	_m_pcmpeqd (__m64 __m1, __m64 __m2)
874	{
875	return _mm_cmpeq_pi32 (__m1, __m2);
876	}
877
878	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
879	_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
880	{
881	return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
882	}
883
884	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
885	_m_pcmpgtd (__m64 __m1, __m64 __m2)
886	{
887	return _mm_cmpgt_pi32 (__m1, __m2);
888	}
889
890	/* Creates a 64-bit zero. */
891	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
892	_mm_setzero_si64 (void)
893	{
894	return (__m64)0LL;
895	}
896
897	/* Creates a vector of two 32-bit values; I0 is least significant. */
898	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
899	_mm_set_pi32 (int __i1, int __i0)
900	{
901	return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
902	}
903
904	/* Creates a vector of four 16-bit values; W0 is least significant. */
905	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
906	_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
907	{
908	return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
909	}
910
911	/* Creates a vector of eight 8-bit values; B0 is least significant. */
912	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
913	_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
914	char __b3, char __b2, char __b1, char __b0)
915	{
916	return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
917	__b4, __b5, __b6, __b7);
918	}
919
920	/* Similar, but with the arguments in reverse order. */
921	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
922	_mm_setr_pi32 (int __i0, int __i1)
923	{
924	return _mm_set_pi32 (__i1, __i0);
925	}
926
927	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
928	_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
929	{
930	return _mm_set_pi16 (__w3, __w2, __w1, __w0);
931	}
932
933	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
934	_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
935	char __b4, char __b5, char __b6, char __b7)
936	{
937	return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
938	}
939
940	/* Creates a vector of two 32-bit values, both elements containing I. */
941	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
942	_mm_set1_pi32 (int __i)
943	{
944	return _mm_set_pi32 (__i, __i);
945	}
946
947	/* Creates a vector of four 16-bit values, all elements containing W. */
948	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
949	_mm_set1_pi16 (short __w)
950	{
951	return _mm_set_pi16 (__w, __w, __w, __w);
952	}
953
954	/* Creates a vector of eight 8-bit values, all elements containing B. */
955	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
956	_mm_set1_pi8 (char __b)
957	{
958	return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
959	}
960	#ifdef __DISABLE_MMX__
961	#undef __DISABLE_MMX__
962	#pragma GCC pop_options
963	#endif /* __DISABLE_MMX__ */
964
965	#endif /* _MMINTRIN_H_INCLUDED */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: Daodan/MSYS2/mingw32/lib/gcc/i686-w64-mingw32/11.2.0/include/mmintrin.h@ 1174

Download in other formats: