Context Navigation

mmintrin.h

Last change on this file was 1166, checked in by rossy, 3 years ago
Daodan: Replace MinGW build env with an up-to-date MSYS2 env
File size: 31.1 KB

Rev	Line
[1166]	1	/* Copyright (C) 2002-2021 Free Software Foundation, Inc.
	2
	3	This file is part of GCC.
	4
	5	GCC is free software; you can redistribute it and/or modify
	6	it under the terms of the GNU General Public License as published by
	7	the Free Software Foundation; either version 3, or (at your option)
	8	any later version.
	9
	10	GCC is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	GNU General Public License for more details.
	14
	15	Under Section 7 of GPL version 3, you are granted additional
	16	permissions described in the GCC Runtime Library Exception, version
	17	3.1, as published by the Free Software Foundation.
	18
	19	You should have received a copy of the GNU General Public License and
	20	a copy of the GCC Runtime Library Exception along with this program;
	21	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	22	<http://www.gnu.org/licenses/>. */
	23
	24	/* Implemented from the specification included in the Intel C++ Compiler
	25	User Guide and Reference, version 9.0. */
	26
	27	#ifndef _MMINTRIN_H_INCLUDED
	28	#define _MMINTRIN_H_INCLUDED
	29
	30	#if defined __x86_64__ && !defined __SSE__ \|\| !defined __MMX__
	31	#pragma GCC push_options
	32	#ifdef __MMX_WITH_SSE__
	33	#pragma GCC target("sse2")
	34	#elif defined __x86_64__
	35	#pragma GCC target("sse,mmx")
	36	#else
	37	#pragma GCC target("mmx")
	38	#endif
	39	#define __DISABLE_MMX__
	40	#endif /* __MMX__ */
	41
	42	/* The Intel API is flexible enough that we must allow aliasing with other
	43	vector types, and their scalar components. */
	44	typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
	45	typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
	46	typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
	47
	48	/* Unaligned version of the same type */
	49	typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
	50	typedef int __m32_u __attribute__ ((__vector_size__ (4), \
	51	__may_alias__, __aligned__ (1)));
	52	typedef short __m16_u __attribute__ ((__vector_size__ (2), \
	53	__may_alias__, __aligned__ (1)));
	54
	55	/* Internal data types for implementing the intrinsics. */
	56	typedef int __v2si __attribute__ ((__vector_size__ (8)));
	57	typedef short __v4hi __attribute__ ((__vector_size__ (8)));
	58	typedef char __v8qi __attribute__ ((__vector_size__ (8)));
	59	typedef long long __v1di __attribute__ ((__vector_size__ (8)));
	60	typedef float __v2sf __attribute__ ((__vector_size__ (8)));
	61
	62	/* Empty the multimedia state. */
	63	extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	64	_mm_empty (void)
	65	{
	66	__builtin_ia32_emms ();
	67	}
	68
	69	extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	70	_m_empty (void)
	71	{
	72	_mm_empty ();
	73	}
	74
	75	/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
	76	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	77	_mm_cvtsi32_si64 (int __i)
	78	{
	79	return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
	80	}
	81
	82	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	83	_m_from_int (int __i)
	84	{
	85	return _mm_cvtsi32_si64 (__i);
	86	}
	87
	88	#ifdef __x86_64__
	89	/* Convert I to a __m64 object. */
	90
	91	/* Intel intrinsic. */
	92	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	93	_m_from_int64 (long long __i)
	94	{
	95	return (__m64) __i;
	96	}
	97
	98	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	99	_mm_cvtsi64_m64 (long long __i)
	100	{
	101	return (__m64) __i;
	102	}
	103
	104	/* Microsoft intrinsic. */
	105	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	106	_mm_cvtsi64x_si64 (long long __i)
	107	{
	108	return (__m64) __i;
	109	}
	110
	111	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	112	_mm_set_pi64x (long long __i)
	113	{
	114	return (__m64) __i;
	115	}
	116	#endif
	117
	118	/* Convert the lower 32 bits of the __m64 object into an integer. */
	119	extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	120	_mm_cvtsi64_si32 (__m64 __i)
	121	{
	122	return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
	123	}
	124
	125	extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	126	_m_to_int (__m64 __i)
	127	{
	128	return _mm_cvtsi64_si32 (__i);
	129	}
	130
	131	#ifdef __x86_64__
	132	/* Convert the __m64 object to a 64bit integer. */
	133
	134	/* Intel intrinsic. */
	135	extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	136	_m_to_int64 (__m64 __i)
	137	{
	138	return (long long)__i;
	139	}
	140
	141	extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	142	_mm_cvtm64_si64 (__m64 __i)
	143	{
	144	return (long long)__i;
	145	}
	146
	147	/* Microsoft intrinsic. */
	148	extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	149	_mm_cvtsi64_si64x (__m64 __i)
	150	{
	151	return (long long)__i;
	152	}
	153	#endif
	154
	155	/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
	156	the result, and the four 16-bit values from M2 into the upper four 8-bit
	157	values of the result, all with signed saturation. */
	158	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	159	_mm_packs_pi16 (__m64 __m1, __m64 __m2)
	160	{
	161	return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
	162	}
	163
	164	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	165	_m_packsswb (__m64 __m1, __m64 __m2)
	166	{
	167	return _mm_packs_pi16 (__m1, __m2);
	168	}
	169
	170	/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
	171	the result, and the two 32-bit values from M2 into the upper two 16-bit
	172	values of the result, all with signed saturation. */
	173	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	174	_mm_packs_pi32 (__m64 __m1, __m64 __m2)
	175	{
	176	return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
	177	}
	178
	179	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	180	_m_packssdw (__m64 __m1, __m64 __m2)
	181	{
	182	return _mm_packs_pi32 (__m1, __m2);
	183	}
	184
	185	/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
	186	the result, and the four 16-bit values from M2 into the upper four 8-bit
	187	values of the result, all with unsigned saturation. */
	188	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	189	_mm_packs_pu16 (__m64 __m1, __m64 __m2)
	190	{
	191	return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
	192	}
	193
	194	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	195	_m_packuswb (__m64 __m1, __m64 __m2)
	196	{
	197	return _mm_packs_pu16 (__m1, __m2);
	198	}
	199
	200	/* Interleave the four 8-bit values from the high half of M1 with the four
	201	8-bit values from the high half of M2. */
	202	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	203	_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
	204	{
	205	return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
	206	}
	207
	208	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	209	_m_punpckhbw (__m64 __m1, __m64 __m2)
	210	{
	211	return _mm_unpackhi_pi8 (__m1, __m2);
	212	}
	213
	214	/* Interleave the two 16-bit values from the high half of M1 with the two
	215	16-bit values from the high half of M2. */
	216	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	217	_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
	218	{
	219	return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
	220	}
	221
	222	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	223	_m_punpckhwd (__m64 __m1, __m64 __m2)
	224	{
	225	return _mm_unpackhi_pi16 (__m1, __m2);
	226	}
	227
	228	/* Interleave the 32-bit value from the high half of M1 with the 32-bit
	229	value from the high half of M2. */
	230	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	231	_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
	232	{
	233	return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
	234	}
	235
	236	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	237	_m_punpckhdq (__m64 __m1, __m64 __m2)
	238	{
	239	return _mm_unpackhi_pi32 (__m1, __m2);
	240	}
	241
	242	/* Interleave the four 8-bit values from the low half of M1 with the four
	243	8-bit values from the low half of M2. */
	244	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	245	_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
	246	{
	247	return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
	248	}
	249
	250	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	251	_m_punpcklbw (__m64 __m1, __m64 __m2)
	252	{
	253	return _mm_unpacklo_pi8 (__m1, __m2);
	254	}
	255
	256	/* Interleave the two 16-bit values from the low half of M1 with the two
	257	16-bit values from the low half of M2. */
	258	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	259	_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
	260	{
	261	return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
	262	}
	263
	264	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	265	_m_punpcklwd (__m64 __m1, __m64 __m2)
	266	{
	267	return _mm_unpacklo_pi16 (__m1, __m2);
	268	}
	269
	270	/* Interleave the 32-bit value from the low half of M1 with the 32-bit
	271	value from the low half of M2. */
	272	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	273	_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
	274	{
	275	return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
	276	}
	277
	278	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	279	_m_punpckldq (__m64 __m1, __m64 __m2)
	280	{
	281	return _mm_unpacklo_pi32 (__m1, __m2);
	282	}
	283
	284	/* Add the 8-bit values in M1 to the 8-bit values in M2. */
	285	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	286	_mm_add_pi8 (__m64 __m1, __m64 __m2)
	287	{
	288	return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
	289	}
	290
	291	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	292	_m_paddb (__m64 __m1, __m64 __m2)
	293	{
	294	return _mm_add_pi8 (__m1, __m2);
	295	}
	296
	297	/* Add the 16-bit values in M1 to the 16-bit values in M2. */
	298	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	299	_mm_add_pi16 (__m64 __m1, __m64 __m2)
	300	{
	301	return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
	302	}
	303
	304	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	305	_m_paddw (__m64 __m1, __m64 __m2)
	306	{
	307	return _mm_add_pi16 (__m1, __m2);
	308	}
	309
	310	/* Add the 32-bit values in M1 to the 32-bit values in M2. */
	311	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	312	_mm_add_pi32 (__m64 __m1, __m64 __m2)
	313	{
	314	return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
	315	}
	316
	317	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	318	_m_paddd (__m64 __m1, __m64 __m2)
	319	{
	320	return _mm_add_pi32 (__m1, __m2);
	321	}
	322
	323	/* Add the 64-bit values in M1 to the 64-bit values in M2. */
	324	#ifndef __SSE2__
	325	#pragma GCC push_options
	326	#ifdef __MMX_WITH_SSE__
	327	#pragma GCC target("sse2")
	328	#else
	329	#pragma GCC target("sse2,mmx")
	330	#endif
	331	#define __DISABLE_SSE2__
	332	#endif /* __SSE2__ */
	333
	334	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	335	_mm_add_si64 (__m64 __m1, __m64 __m2)
	336	{
	337	return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
	338	}
	339	#ifdef __DISABLE_SSE2__
	340	#undef __DISABLE_SSE2__
	341	#pragma GCC pop_options
	342	#endif /* __DISABLE_SSE2__ */
	343
	344	/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
	345	saturated arithmetic. */
	346	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	347	_mm_adds_pi8 (__m64 __m1, __m64 __m2)
	348	{
	349	return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
	350	}
	351
	352	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	353	_m_paddsb (__m64 __m1, __m64 __m2)
	354	{
	355	return _mm_adds_pi8 (__m1, __m2);
	356	}
	357
	358	/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
	359	saturated arithmetic. */
	360	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	361	_mm_adds_pi16 (__m64 __m1, __m64 __m2)
	362	{
	363	return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
	364	}
	365
	366	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	367	_m_paddsw (__m64 __m1, __m64 __m2)
	368	{
	369	return _mm_adds_pi16 (__m1, __m2);
	370	}
	371
	372	/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
	373	saturated arithmetic. */
	374	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	375	_mm_adds_pu8 (__m64 __m1, __m64 __m2)
	376	{
	377	return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
	378	}
	379
	380	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	381	_m_paddusb (__m64 __m1, __m64 __m2)
	382	{
	383	return _mm_adds_pu8 (__m1, __m2);
	384	}
	385
	386	/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
	387	saturated arithmetic. */
	388	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	389	_mm_adds_pu16 (__m64 __m1, __m64 __m2)
	390	{
	391	return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
	392	}
	393
	394	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	395	_m_paddusw (__m64 __m1, __m64 __m2)
	396	{
	397	return _mm_adds_pu16 (__m1, __m2);
	398	}
	399
	400	/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
	401	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	402	_mm_sub_pi8 (__m64 __m1, __m64 __m2)
	403	{
	404	return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
	405	}
	406
	407	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	408	_m_psubb (__m64 __m1, __m64 __m2)
	409	{
	410	return _mm_sub_pi8 (__m1, __m2);
	411	}
	412
	413	/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
	414	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	415	_mm_sub_pi16 (__m64 __m1, __m64 __m2)
	416	{
	417	return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
	418	}
	419
	420	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	421	_m_psubw (__m64 __m1, __m64 __m2)
	422	{
	423	return _mm_sub_pi16 (__m1, __m2);
	424	}
	425
	426	/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
	427	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	428	_mm_sub_pi32 (__m64 __m1, __m64 __m2)
	429	{
	430	return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
	431	}
	432
	433	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	434	_m_psubd (__m64 __m1, __m64 __m2)
	435	{
	436	return _mm_sub_pi32 (__m1, __m2);
	437	}
	438
	439	/* Add the 64-bit values in M1 to the 64-bit values in M2. */
	440	#ifndef __SSE2__
	441	#pragma GCC push_options
	442	#ifdef __MMX_WITH_SSE__
	443	#pragma GCC target("sse2")
	444	#else
	445	#pragma GCC target("sse2,mmx")
	446	#endif
	447	#define __DISABLE_SSE2__
	448	#endif /* __SSE2__ */
	449
	450	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	451	_mm_sub_si64 (__m64 __m1, __m64 __m2)
	452	{
	453	return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
	454	}
	455	#ifdef __DISABLE_SSE2__
	456	#undef __DISABLE_SSE2__
	457	#pragma GCC pop_options
	458	#endif /* __DISABLE_SSE2__ */
	459
	460	/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
	461	saturating arithmetic. */
	462	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	463	_mm_subs_pi8 (__m64 __m1, __m64 __m2)
	464	{
	465	return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
	466	}
	467
	468	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	469	_m_psubsb (__m64 __m1, __m64 __m2)
	470	{
	471	return _mm_subs_pi8 (__m1, __m2);
	472	}
	473
	474	/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
	475	signed saturating arithmetic. */
	476	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	477	_mm_subs_pi16 (__m64 __m1, __m64 __m2)
	478	{
	479	return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
	480	}
	481
	482	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	483	_m_psubsw (__m64 __m1, __m64 __m2)
	484	{
	485	return _mm_subs_pi16 (__m1, __m2);
	486	}
	487
	488	/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
	489	unsigned saturating arithmetic. */
	490	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	491	_mm_subs_pu8 (__m64 __m1, __m64 __m2)
	492	{
	493	return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
	494	}
	495
	496	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	497	_m_psubusb (__m64 __m1, __m64 __m2)
	498	{
	499	return _mm_subs_pu8 (__m1, __m2);
	500	}
	501
	502	/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
	503	unsigned saturating arithmetic. */
	504	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	505	_mm_subs_pu16 (__m64 __m1, __m64 __m2)
	506	{
	507	return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
	508	}
	509
	510	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	511	_m_psubusw (__m64 __m1, __m64 __m2)
	512	{
	513	return _mm_subs_pu16 (__m1, __m2);
	514	}
	515
	516	/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
	517	four 32-bit intermediate results, which are then summed by pairs to
	518	produce two 32-bit results. */
	519	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	520	_mm_madd_pi16 (__m64 __m1, __m64 __m2)
	521	{
	522	return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
	523	}
	524
	525	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	526	_m_pmaddwd (__m64 __m1, __m64 __m2)
	527	{
	528	return _mm_madd_pi16 (__m1, __m2);
	529	}
	530
	531	/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
	532	M2 and produce the high 16 bits of the 32-bit results. */
	533	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	534	_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
	535	{
	536	return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
	537	}
	538
	539	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	540	_m_pmulhw (__m64 __m1, __m64 __m2)
	541	{
	542	return _mm_mulhi_pi16 (__m1, __m2);
	543	}
	544
	545	/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
	546	the low 16 bits of the results. */
	547	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	548	_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
	549	{
	550	return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
	551	}
	552
	553	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	554	_m_pmullw (__m64 __m1, __m64 __m2)
	555	{
	556	return _mm_mullo_pi16 (__m1, __m2);
	557	}
	558
	559	/* Shift four 16-bit values in M left by COUNT. */
	560	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	561	_mm_sll_pi16 (__m64 __m, __m64 __count)
	562	{
	563	return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
	564	}
	565
	566	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	567	_m_psllw (__m64 __m, __m64 __count)
	568	{
	569	return _mm_sll_pi16 (__m, __count);
	570	}
	571
	572	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	573	_mm_slli_pi16 (__m64 __m, int __count)
	574	{
	575	return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
	576	}
	577
	578	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	579	_m_psllwi (__m64 __m, int __count)
	580	{
	581	return _mm_slli_pi16 (__m, __count);
	582	}
	583
	584	/* Shift two 32-bit values in M left by COUNT. */
	585	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	586	_mm_sll_pi32 (__m64 __m, __m64 __count)
	587	{
	588	return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
	589	}
	590
	591	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	592	_m_pslld (__m64 __m, __m64 __count)
	593	{
	594	return _mm_sll_pi32 (__m, __count);
	595	}
	596
	597	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	598	_mm_slli_pi32 (__m64 __m, int __count)
	599	{
	600	return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
	601	}
	602
	603	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	604	_m_pslldi (__m64 __m, int __count)
	605	{
	606	return _mm_slli_pi32 (__m, __count);
	607	}
	608
	609	/* Shift the 64-bit value in M left by COUNT. */
	610	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	611	_mm_sll_si64 (__m64 __m, __m64 __count)
	612	{
	613	return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
	614	}
	615
	616	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	617	_m_psllq (__m64 __m, __m64 __count)
	618	{
	619	return _mm_sll_si64 (__m, __count);
	620	}
	621
	622	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	623	_mm_slli_si64 (__m64 __m, int __count)
	624	{
	625	return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
	626	}
	627
	628	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	629	_m_psllqi (__m64 __m, int __count)
	630	{
	631	return _mm_slli_si64 (__m, __count);
	632	}
	633
	634	/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
	635	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	636	_mm_sra_pi16 (__m64 __m, __m64 __count)
	637	{
	638	return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
	639	}
	640
	641	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	642	_m_psraw (__m64 __m, __m64 __count)
	643	{
	644	return _mm_sra_pi16 (__m, __count);
	645	}
	646
	647	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	648	_mm_srai_pi16 (__m64 __m, int __count)
	649	{
	650	return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
	651	}
	652
	653	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	654	_m_psrawi (__m64 __m, int __count)
	655	{
	656	return _mm_srai_pi16 (__m, __count);
	657	}
	658
	659	/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
	660	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	661	_mm_sra_pi32 (__m64 __m, __m64 __count)
	662	{
	663	return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
	664	}
	665
	666	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	667	_m_psrad (__m64 __m, __m64 __count)
	668	{
	669	return _mm_sra_pi32 (__m, __count);
	670	}
	671
	672	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	673	_mm_srai_pi32 (__m64 __m, int __count)
	674	{
	675	return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
	676	}
	677
	678	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	679	_m_psradi (__m64 __m, int __count)
	680	{
	681	return _mm_srai_pi32 (__m, __count);
	682	}
	683
	684	/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
	685	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	686	_mm_srl_pi16 (__m64 __m, __m64 __count)
	687	{
	688	return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
	689	}
	690
	691	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	692	_m_psrlw (__m64 __m, __m64 __count)
	693	{
	694	return _mm_srl_pi16 (__m, __count);
	695	}
	696
	697	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	698	_mm_srli_pi16 (__m64 __m, int __count)
	699	{
	700	return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
	701	}
	702
	703	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	704	_m_psrlwi (__m64 __m, int __count)
	705	{
	706	return _mm_srli_pi16 (__m, __count);
	707	}
	708
	709	/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
	710	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	711	_mm_srl_pi32 (__m64 __m, __m64 __count)
	712	{
	713	return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
	714	}
	715
	716	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	717	_m_psrld (__m64 __m, __m64 __count)
	718	{
	719	return _mm_srl_pi32 (__m, __count);
	720	}
	721
	722	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	723	_mm_srli_pi32 (__m64 __m, int __count)
	724	{
	725	return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
	726	}
	727
	728	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	729	_m_psrldi (__m64 __m, int __count)
	730	{
	731	return _mm_srli_pi32 (__m, __count);
	732	}
	733
	734	/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
	735	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	736	_mm_srl_si64 (__m64 __m, __m64 __count)
	737	{
	738	return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
	739	}
	740
	741	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	742	_m_psrlq (__m64 __m, __m64 __count)
	743	{
	744	return _mm_srl_si64 (__m, __count);
	745	}
	746
	747	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	748	_mm_srli_si64 (__m64 __m, int __count)
	749	{
	750	return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
	751	}
	752
	753	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	754	_m_psrlqi (__m64 __m, int __count)
	755	{
	756	return _mm_srli_si64 (__m, __count);
	757	}
	758
	759	/* Bit-wise AND the 64-bit values in M1 and M2. */
	760	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	761	_mm_and_si64 (__m64 __m1, __m64 __m2)
	762	{
	763	return __builtin_ia32_pand (__m1, __m2);
	764	}
	765
	766	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	767	_m_pand (__m64 __m1, __m64 __m2)
	768	{
	769	return _mm_and_si64 (__m1, __m2);
	770	}
	771
	772	/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
	773	64-bit value in M2. */
	774	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	775	_mm_andnot_si64 (__m64 __m1, __m64 __m2)
	776	{
	777	return __builtin_ia32_pandn (__m1, __m2);
	778	}
	779
	780	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	781	_m_pandn (__m64 __m1, __m64 __m2)
	782	{
	783	return _mm_andnot_si64 (__m1, __m2);
	784	}
	785
	786	/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
	787	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	788	_mm_or_si64 (__m64 __m1, __m64 __m2)
	789	{
	790	return __builtin_ia32_por (__m1, __m2);
	791	}
	792
	793	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	794	_m_por (__m64 __m1, __m64 __m2)
	795	{
	796	return _mm_or_si64 (__m1, __m2);
	797	}
	798
	799	/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
	800	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	801	_mm_xor_si64 (__m64 __m1, __m64 __m2)
	802	{
	803	return __builtin_ia32_pxor (__m1, __m2);
	804	}
	805
	806	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	807	_m_pxor (__m64 __m1, __m64 __m2)
	808	{
	809	return _mm_xor_si64 (__m1, __m2);
	810	}
	811
	812	/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
	813	test is true and zero if false. */
	814	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	815	_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
	816	{
	817	return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
	818	}
	819
	820	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	821	_m_pcmpeqb (__m64 __m1, __m64 __m2)
	822	{
	823	return _mm_cmpeq_pi8 (__m1, __m2);
	824	}
	825
	826	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	827	_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
	828	{
	829	return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
	830	}
	831
	832	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	833	_m_pcmpgtb (__m64 __m1, __m64 __m2)
	834	{
	835	return _mm_cmpgt_pi8 (__m1, __m2);
	836	}
	837
	838	/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
	839	the test is true and zero if false. */
	840	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	841	_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
	842	{
	843	return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
	844	}
	845
	846	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	847	_m_pcmpeqw (__m64 __m1, __m64 __m2)
	848	{
	849	return _mm_cmpeq_pi16 (__m1, __m2);
	850	}
	851
	852	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	853	_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
	854	{
	855	return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
	856	}
	857
	858	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	859	_m_pcmpgtw (__m64 __m1, __m64 __m2)
	860	{
	861	return _mm_cmpgt_pi16 (__m1, __m2);
	862	}
	863
	864	/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
	865	the test is true and zero if false. */
	866	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	867	_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
	868	{
	869	return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
	870	}
	871
	872	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	873	_m_pcmpeqd (__m64 __m1, __m64 __m2)
	874	{
	875	return _mm_cmpeq_pi32 (__m1, __m2);
	876	}
	877
	878	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	879	_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
	880	{
	881	return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
	882	}
	883
	884	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	885	_m_pcmpgtd (__m64 __m1, __m64 __m2)
	886	{
	887	return _mm_cmpgt_pi32 (__m1, __m2);
	888	}
	889
	890	/* Creates a 64-bit zero. */
	891	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	892	_mm_setzero_si64 (void)
	893	{
	894	return (__m64)0LL;
	895	}
	896
	897	/* Creates a vector of two 32-bit values; I0 is least significant. */
	898	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	899	_mm_set_pi32 (int __i1, int __i0)
	900	{
	901	return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
	902	}
	903
	904	/* Creates a vector of four 16-bit values; W0 is least significant. */
	905	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	906	_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
	907	{
	908	return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
	909	}
	910
	911	/* Creates a vector of eight 8-bit values; B0 is least significant. */
	912	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	913	_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
	914	char __b3, char __b2, char __b1, char __b0)
	915	{
	916	return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
	917	__b4, __b5, __b6, __b7);
	918	}
	919
	920	/* Similar, but with the arguments in reverse order. */
	921	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	922	_mm_setr_pi32 (int __i0, int __i1)
	923	{
	924	return _mm_set_pi32 (__i1, __i0);
	925	}
	926
	927	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	928	_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
	929	{
	930	return _mm_set_pi16 (__w3, __w2, __w1, __w0);
	931	}
	932
	933	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	934	_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
	935	char __b4, char __b5, char __b6, char __b7)
	936	{
	937	return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
	938	}
	939
	940	/* Creates a vector of two 32-bit values, both elements containing I. */
	941	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	942	_mm_set1_pi32 (int __i)
	943	{
	944	return _mm_set_pi32 (__i, __i);
	945	}
	946
	947	/* Creates a vector of four 16-bit values, all elements containing W. */
	948	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	949	_mm_set1_pi16 (short __w)
	950	{
	951	return _mm_set_pi16 (__w, __w, __w, __w);
	952	}
	953
	954	/* Creates a vector of eight 8-bit values, all elements containing B. */
	955	extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
	956	_mm_set1_pi8 (char __b)
	957	{
	958	return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
	959	}
	960	#ifdef __DISABLE_MMX__
	961	#undef __DISABLE_MMX__
	962	#pragma GCC pop_options
	963	#endif /* __DISABLE_MMX__ */
	964
	965	#endif /* _MMINTRIN_H_INCLUDED */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: Daodan/MSYS2/mingw32/lib/gcc/i686-w64-mingw32/11.2.0/include/mmintrin.h

Download in other formats: