Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log
Repository URL

dvec.h@ 1194

Last change on this file since 1194 was 1166, checked in by rossy, 4 years ago
Daodan: Replace MinGW build env with an up-to-date MSYS2 env
File size: 32.5 KB

Rev	Line
[1166]	1	/**
	2	* This file has no copyright assigned and is placed in the Public Domain.
	3	* This file is part of the mingw-w64 runtime package.
	4	* No warranty is given; refer to the file DISCLAIMER.PD within this package.
	5	*/
	6	#ifndef _DVEC_H_INCLUDED
	7	#define _DVEC_H_INCLUDED
	8	#ifndef RC_INVOKED
	9
	10	#if !defined __cplusplus
	11	#error This file is only supported in C++ compilations!
	12	#endif
	13
	14	#include <intrin.h>
	15	#include <assert.h>
	16	#include <fvec.h>
	17	#include <crtdefs.h>
	18
	19	#pragma pack(push,_CRT_PACKING)
	20
	21	#if defined(_ENABLE_VEC_DEBUG)
	22	#include <iostream>
	23	#endif
	24
	25	#ifdef __SSE__
	26
	27	#pragma pack(push,16)
	28
	29	#define EXPLICIT explicit
	30
	31	class I8vec16;
	32	class Is8vec16;
	33	class Iu8vec16;
	34	class I16vec8;
	35	class Is16vec8;
	36	class Iu16vec8;
	37	class I32vec4;
	38	class Is32vec4;
	39	class Iu32vec4;
	40	class I64vec2;
	41	class I128vec1;
	42
	43	#define _MM_16UB(element,vector) (((unsigned char)&(vector) + (element)))
	44	#define _MM_16B(element,vector) (((signed char)&(vector) + (element)))
	45
	46	#define _MM_8UW(element,vector) (((unsigned short)&(vector) + (element)))
	47	#define _MM_8W(element,vector) (((short)&(vector) + (element)))
	48
	49	#define _MM_4UDW(element,vector) (((unsigned int)&(vector) + (element)))
	50	#define _MM_4DW(element,vector) (((int)&(vector) + (element)))
	51
	52	#define _MM_2QW(element,vector) (((__int64)&(vector) + (element)))
	53
	54	__MINGW_EXTENSION inline const __m128i get_mask128()
	55	{
	56	static const __m128i mask128 = _mm_set1_epi64(M64((__int64)0xffffffffffffffffll));
	57	return mask128;
	58	}
	59
	60	class M128
	61	{
	62	protected:
	63	__m128i vec;
	64
	65	public:
	66	M128() { }
	67	M128(__m128i mm) { vec = mm; }
	68
	69	operator __m128i() const { return vec; }
	70
	71	M128& operator&=(const M128 &a) { return *this = (M128) _mm_and_si128(vec,a); }
	72	M128& operator\|=(const M128 &a) { return *this = (M128) _mm_or_si128(vec,a); }
	73	M128& operator^=(const M128 &a) { return *this = (M128) _mm_xor_si128(vec,a); }
	74
	75	};
	76
	77	inline M128 operator&(const M128 &a,const M128 &b) { return _mm_and_si128(a,b); }
	78	inline M128 operator\|(const M128 &a,const M128 &b) { return _mm_or_si128(a,b); }
	79	inline M128 operator^(const M128 &a,const M128 &b) { return _mm_xor_si128(a,b); }
	80	inline M128 andnot(const M128 &a,const M128 &b) { return _mm_andnot_si128(a,b); }
	81
	82	class I128vec1 : public M128
	83	{
	84	public:
	85	I128vec1() { }
	86	I128vec1(__m128i mm) : M128(mm) { }
	87
	88	I128vec1& operator= (const M128 &a) { return *this = (I128vec1) a; }
	89	I128vec1& operator&=(const M128 &a) { return *this = (I128vec1) _mm_and_si128(vec,a); }
	90	I128vec1& operator\|=(const M128 &a) { return *this = (I128vec1) _mm_or_si128(vec,a); }
	91	I128vec1& operator^=(const M128 &a) { return *this = (I128vec1) _mm_xor_si128(vec,a); }
	92
	93	};
	94
	95	class I64vec2 : public M128
	96	{
	97	public:
	98	I64vec2() { }
	99	I64vec2(__m128i mm) : M128(mm) { }
	100
	101	__MINGW_EXTENSION I64vec2(__m64 q1,__m64 q0)
	102	{
	103	_MM_2QW(0,vec) = (__int64)&q0;
	104	_MM_2QW(1,vec) = (__int64)&q1;
	105	}
	106
	107	I64vec2& operator= (const M128 &a) { return *this = (I64vec2) a; }
	108
	109	I64vec2& operator&=(const M128 &a) { return *this = (I64vec2) _mm_and_si128(vec,a); }
	110	I64vec2& operator\|=(const M128 &a) { return *this = (I64vec2) _mm_or_si128(vec,a); }
	111	I64vec2& operator^=(const M128 &a) { return *this = (I64vec2) _mm_xor_si128(vec,a); }
	112
	113	I64vec2& operator +=(const I64vec2 &a) { return *this = (I64vec2) _mm_add_epi64(vec,a); }
	114	I64vec2& operator -=(const I64vec2 &a) { return *this = (I64vec2) _mm_sub_epi64(vec,a); }
	115
	116	I64vec2 operator<<(const I64vec2 &a) { return _mm_sll_epi64(vec,a); }
	117	I64vec2 operator<<(int count) { return _mm_slli_epi64(vec,count); }
	118	I64vec2& operator<<=(const I64vec2 &a) { return *this = (I64vec2) _mm_sll_epi64(vec,a); }
	119	I64vec2& operator<<=(int count) { return *this = (I64vec2) _mm_slli_epi64(vec,count); }
	120	I64vec2 operator>>(const I64vec2 &a) { return _mm_srl_epi64(vec,a); }
	121	I64vec2 operator>>(int count) { return _mm_srli_epi64(vec,count); }
	122	I64vec2& operator>>=(const I64vec2 &a) { return *this = (I64vec2) _mm_srl_epi64(vec,a); }
	123	I64vec2& operator>>=(int count) { return *this = (I64vec2) _mm_srli_epi64(vec,count); }
	124
	125	__MINGW_EXTENSION const __int64& operator[](int i)const
	126	{
	127	assert(static_cast<unsigned int>(i) < 2);
	128	return _MM_2QW(i,vec);
	129	}
	130
	131	__MINGW_EXTENSION __int64& operator[](int i)
	132	{
	133	assert(static_cast<unsigned int>(i) < 2);
	134	return _MM_2QW(i,vec);
	135	}
	136
	137	};
	138
	139	inline I64vec2 unpack_low(const I64vec2 &a,const I64vec2 &b) {return _mm_unpacklo_epi64(a,b); }
	140	inline I64vec2 unpack_high(const I64vec2 &a,const I64vec2 &b) {return _mm_unpackhi_epi64(a,b); }
	141
	142	class I32vec4 : public M128
	143	{
	144	public:
	145	I32vec4() { }
	146	I32vec4(__m128i mm) : M128(mm) { }
	147
	148	I32vec4& operator= (const M128 &a) { return *this = (I32vec4) a; }
	149
	150	I32vec4& operator&=(const M128 &a) { return *this = (I32vec4) _mm_and_si128(vec,a); }
	151	I32vec4& operator\|=(const M128 &a) { return *this = (I32vec4) _mm_or_si128(vec,a); }
	152	I32vec4& operator^=(const M128 &a) { return *this = (I32vec4) _mm_xor_si128(vec,a); }
	153
	154	I32vec4& operator +=(const I32vec4 &a) { return *this = (I32vec4)_mm_add_epi32(vec,a); }
	155	I32vec4& operator -=(const I32vec4 &a) { return *this = (I32vec4)_mm_sub_epi32(vec,a); }
	156
	157	I32vec4 operator<<(const I32vec4 &a) { return _mm_sll_epi32(vec,a); }
	158	I32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
	159	I32vec4& operator<<=(const I32vec4 &a) { return *this = (I32vec4)_mm_sll_epi32(vec,a); }
	160	I32vec4& operator<<=(int count) { return *this = (I32vec4)_mm_slli_epi32(vec,count); }
	161
	162	};
	163
	164	inline I32vec4 cmpeq(const I32vec4 &a,const I32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
	165	inline I32vec4 cmpneq(const I32vec4 &a,const I32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
	166
	167	inline I32vec4 unpack_low(const I32vec4 &a,const I32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
	168	inline I32vec4 unpack_high(const I32vec4 &a,const I32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
	169
	170	class Is32vec4 : public I32vec4
	171	{
	172	public:
	173	Is32vec4() { }
	174	Is32vec4(__m128i mm) : I32vec4(mm) { }
	175	Is32vec4(int i3,int i2,int i1,int i0)
	176	{
	177	_MM_4DW(0,vec) = i0;
	178	_MM_4DW(1,vec) = i1;
	179	_MM_4DW(2,vec) = i2;
	180	_MM_4DW(3,vec) = i3;
	181	}
	182
	183	Is32vec4& operator= (const M128 &a) { return *this = (Is32vec4) a; }
	184
	185	Is32vec4& operator&=(const M128 &a) { return *this = (Is32vec4) _mm_and_si128(vec,a); }
	186	Is32vec4& operator\|=(const M128 &a) { return *this = (Is32vec4) _mm_or_si128(vec,a); }
	187	Is32vec4& operator^=(const M128 &a) { return *this = (Is32vec4) _mm_xor_si128(vec,a); }
	188
	189	Is32vec4& operator +=(const I32vec4 &a) { return *this = (Is32vec4)_mm_add_epi32(vec,a); }
	190	Is32vec4& operator -=(const I32vec4 &a) { return *this = (Is32vec4)_mm_sub_epi32(vec,a); }
	191
	192	Is32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); }
	193	Is32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
	194	Is32vec4& operator<<=(const M128 &a) { return *this = (Is32vec4)_mm_sll_epi32(vec,a); }
	195	Is32vec4& operator<<=(int count) { return *this = (Is32vec4)_mm_slli_epi32(vec,count); }
	196
	197	Is32vec4 operator>>(const M128 &a) { return _mm_sra_epi32(vec,a); }
	198	Is32vec4 operator>>(int count) { return _mm_srai_epi32(vec,count); }
	199	Is32vec4& operator>>=(const M128 &a) { return *this = (Is32vec4) _mm_sra_epi32(vec,a); }
	200	Is32vec4& operator>>=(int count) { return *this = (Is32vec4) _mm_srai_epi32(vec,count); }
	201
	202	#if defined(_ENABLE_VEC_DEBUG)
	203
	204	friend std::ostream& operator<< (std::ostream &os,const Is32vec4 &a)
	205	{
	206	os << "[3]:" << _MM_4DW(3,a)
	207	<< " [2]:" << _MM_4DW(2,a)
	208	<< " [1]:" << _MM_4DW(1,a)
	209	<< " [0]:" << _MM_4DW(0,a);
	210	return os;
	211	}
	212	#endif
	213
	214	const int& operator[](int i)const
	215	{
	216	assert(static_cast<unsigned int>(i) < 4);
	217	return _MM_4DW(i,vec);
	218	}
	219
	220	int& operator[](int i)
	221	{
	222	assert(static_cast<unsigned int>(i) < 4);
	223	return _MM_4DW(i,vec);
	224	}
	225	};
	226
	227	inline Is32vec4 cmpeq(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
	228	inline Is32vec4 cmpneq(const Is32vec4 &a,const Is32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
	229	inline Is32vec4 cmpgt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(a,b); }
	230	inline Is32vec4 cmplt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(b,a); }
	231
	232	inline Is32vec4 unpack_low(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
	233	inline Is32vec4 unpack_high(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
	234
	235	class Iu32vec4 : public I32vec4
	236	{
	237	public:
	238	Iu32vec4() { }
	239	Iu32vec4(__m128i mm) : I32vec4(mm) { }
	240	Iu32vec4(unsigned int ui3,unsigned int ui2,unsigned int ui1,unsigned int ui0)
	241	{
	242	_MM_4UDW(0,vec) = ui0;
	243	_MM_4UDW(1,vec) = ui1;
	244	_MM_4UDW(2,vec) = ui2;
	245	_MM_4UDW(3,vec) = ui3;
	246	}
	247
	248	Iu32vec4& operator= (const M128 &a) { return *this = (Iu32vec4) a; }
	249
	250	Iu32vec4& operator&=(const M128 &a) { return *this = (Iu32vec4) _mm_and_si128(vec,a); }
	251	Iu32vec4& operator\|=(const M128 &a) { return *this = (Iu32vec4) _mm_or_si128(vec,a); }
	252	Iu32vec4& operator^=(const M128 &a) { return *this = (Iu32vec4) _mm_xor_si128(vec,a); }
	253
	254	Iu32vec4& operator +=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_add_epi32(vec,a); }
	255	Iu32vec4& operator -=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_sub_epi32(vec,a); }
	256
	257	Iu32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); }
	258	Iu32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
	259	Iu32vec4& operator<<=(const M128 &a) { return *this = (Iu32vec4)_mm_sll_epi32(vec,a); }
	260	Iu32vec4& operator<<=(int count) { return *this = (Iu32vec4)_mm_slli_epi32(vec,count); }
	261	Iu32vec4 operator>>(const M128 &a) { return _mm_srl_epi32(vec,a); }
	262	Iu32vec4 operator>>(int count) { return _mm_srli_epi32(vec,count); }
	263	Iu32vec4& operator>>=(const M128 &a) { return *this = (Iu32vec4) _mm_srl_epi32(vec,a); }
	264	Iu32vec4& operator>>=(int count) { return *this = (Iu32vec4) _mm_srli_epi32(vec,count); }
	265
	266	#if defined(_ENABLE_VEC_DEBUG)
	267
	268	friend std::ostream& operator<< (std::ostream &os,const Iu32vec4 &a)
	269	{
	270	os << "[3]:" << _MM_4UDW(3,a)
	271	<< " [2]:" << _MM_4UDW(2,a)
	272	<< " [1]:" << _MM_4UDW(1,a)
	273	<< " [0]:" << _MM_4UDW(0,a);
	274	return os;
	275	}
	276	#endif
	277
	278	const unsigned int& operator[](int i)const
	279	{
	280	assert(static_cast<unsigned int>(i) < 4);
	281	return _MM_4UDW(i,vec);
	282	}
	283
	284	unsigned int& operator[](int i)
	285	{
	286	assert(static_cast<unsigned int>(i) < 4);
	287	return _MM_4UDW(i,vec);
	288	}
	289	};
	290
	291	inline I64vec2 operator*(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_mul_epu32(a,b); }
	292	inline Iu32vec4 cmpeq(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
	293	inline Iu32vec4 cmpneq(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
	294
	295	inline Iu32vec4 unpack_low(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
	296	inline Iu32vec4 unpack_high(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
	297
	298	class I16vec8 : public M128
	299	{
	300	public:
	301	I16vec8() { }
	302	I16vec8(__m128i mm) : M128(mm) { }
	303
	304	I16vec8& operator= (const M128 &a) { return *this = (I16vec8) a; }
	305
	306	I16vec8& operator&=(const M128 &a) { return *this = (I16vec8) _mm_and_si128(vec,a); }
	307	I16vec8& operator\|=(const M128 &a) { return *this = (I16vec8) _mm_or_si128(vec,a); }
	308	I16vec8& operator^=(const M128 &a) { return *this = (I16vec8) _mm_xor_si128(vec,a); }
	309
	310	I16vec8& operator +=(const I16vec8 &a) { return *this = (I16vec8) _mm_add_epi16(vec,a); }
	311	I16vec8& operator -=(const I16vec8 &a) { return *this = (I16vec8) _mm_sub_epi16(vec,a); }
	312	I16vec8& operator =(const I16vec8 &a) { return this = (I16vec8) _mm_mullo_epi16(vec,a); }
	313
	314	I16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
	315	I16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
	316	I16vec8& operator<<=(const M128 &a) { return *this = (I16vec8)_mm_sll_epi16(vec,a); }
	317	I16vec8& operator<<=(int count) { return *this = (I16vec8)_mm_slli_epi16(vec,count); }
	318
	319	};
	320
	321	inline I16vec8 operator*(const I16vec8 &a,const I16vec8 &b) { return _mm_mullo_epi16(a,b); }
	322
	323	inline I16vec8 cmpeq(const I16vec8 &a,const I16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
	324	inline I16vec8 cmpneq(const I16vec8 &a,const I16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
	325
	326	inline I16vec8 unpack_low(const I16vec8 &a,const I16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
	327	inline I16vec8 unpack_high(const I16vec8 &a,const I16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
	328
	329	class Is16vec8 : public I16vec8
	330	{
	331	public:
	332	Is16vec8() { }
	333	Is16vec8(__m128i mm) : I16vec8(mm) { }
	334	Is16vec8(signed short s7,signed short s6,signed short s5,signed short s4,signed short s3,signed short s2,signed short s1,signed short s0)
	335	{
	336	_MM_8W(0,vec) = s0;
	337	_MM_8W(1,vec) = s1;
	338	_MM_8W(2,vec) = s2;
	339	_MM_8W(3,vec) = s3;
	340	_MM_8W(4,vec) = s4;
	341	_MM_8W(5,vec) = s5;
	342	_MM_8W(6,vec) = s6;
	343	_MM_8W(7,vec) = s7;
	344	}
	345
	346	Is16vec8& operator= (const M128 &a) { return *this = (Is16vec8) a; }
	347
	348	Is16vec8& operator&=(const M128 &a) { return *this = (Is16vec8) _mm_and_si128(vec,a); }
	349	Is16vec8& operator\|=(const M128 &a) { return *this = (Is16vec8) _mm_or_si128(vec,a); }
	350	Is16vec8& operator^=(const M128 &a) { return *this = (Is16vec8) _mm_xor_si128(vec,a); }
	351
	352	Is16vec8& operator +=(const I16vec8 &a) { return *this = (Is16vec8) _mm_add_epi16(vec,a); }
	353	Is16vec8& operator -=(const I16vec8 &a) { return *this = (Is16vec8) _mm_sub_epi16(vec,a); }
	354	Is16vec8& operator =(const I16vec8 &a) { return this = (Is16vec8) _mm_mullo_epi16(vec,a); }
	355
	356	Is16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
	357	Is16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
	358	Is16vec8& operator<<=(const M128 &a) { return *this = (Is16vec8)_mm_sll_epi16(vec,a); }
	359	Is16vec8& operator<<=(int count) { return *this = (Is16vec8)_mm_slli_epi16(vec,count); }
	360
	361	Is16vec8 operator>>(const M128 &a) { return _mm_sra_epi16(vec,a); }
	362	Is16vec8 operator>>(int count) { return _mm_srai_epi16(vec,count); }
	363	Is16vec8& operator>>=(const M128 &a) { return *this = (Is16vec8)_mm_sra_epi16(vec,a); }
	364	Is16vec8& operator>>=(int count) { return *this = (Is16vec8)_mm_srai_epi16(vec,count); }
	365
	366	#if defined(_ENABLE_VEC_DEBUG)
	367
	368	friend std::ostream& operator<< (std::ostream &os,const Is16vec8 &a)
	369	{
	370	os << "[7]:" << _MM_8W(7,a)
	371	<< " [6]:" << _MM_8W(6,a)
	372	<< " [5]:" << _MM_8W(5,a)
	373	<< " [4]:" << _MM_8W(4,a)
	374	<< " [3]:" << _MM_8W(3,a)
	375	<< " [2]:" << _MM_8W(2,a)
	376	<< " [1]:" << _MM_8W(1,a)
	377	<< " [0]:" << _MM_8W(0,a);
	378	return os;
	379	}
	380	#endif
	381
	382	const signed short& operator[](int i)const
	383	{
	384	assert(static_cast<unsigned int>(i) < 8);
	385	return _MM_8W(i,vec);
	386	}
	387
	388	signed short& operator[](int i)
	389	{
	390	assert(static_cast<unsigned int>(i) < 8);
	391	return _MM_8W(i,vec);
	392	}
	393	};
	394
	395	inline Is16vec8 operator*(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mullo_epi16(a,b); }
	396
	397	inline Is16vec8 cmpeq(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
	398	inline Is16vec8 cmpneq(const Is16vec8 &a,const Is16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
	399	inline Is16vec8 cmpgt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(a,b); }
	400	inline Is16vec8 cmplt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(b,a); }
	401
	402	inline Is16vec8 unpack_low(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
	403	inline Is16vec8 unpack_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
	404
	405	inline Is16vec8 mul_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mulhi_epi16(a,b); }
	406	inline Is32vec4 mul_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_madd_epi16(a,b);}
	407
	408	inline Is16vec8 sat_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_adds_epi16(a,b); }
	409	inline Is16vec8 sat_sub(const Is16vec8 &a,const Is16vec8 &b) { return _mm_subs_epi16(a,b); }
	410
	411	inline Is16vec8 simd_max(const Is16vec8 &a,const Is16vec8 &b) { return _mm_max_epi16(a,b); }
	412	inline Is16vec8 simd_min(const Is16vec8 &a,const Is16vec8 &b) { return _mm_min_epi16(a,b); }
	413
	414	class Iu16vec8 : public I16vec8
	415	{
	416	public:
	417	Iu16vec8() { }
	418	Iu16vec8(__m128i mm) : I16vec8(mm) { }
	419	Iu16vec8(unsigned short s7,unsigned short s6,unsigned short s5,unsigned short s4,unsigned short s3,unsigned short s2,unsigned short s1,unsigned short s0)
	420	{
	421	_MM_8UW(0,vec) = s0;
	422	_MM_8UW(1,vec) = s1;
	423	_MM_8UW(2,vec) = s2;
	424	_MM_8UW(3,vec) = s3;
	425	_MM_8UW(4,vec) = s4;
	426	_MM_8UW(5,vec) = s5;
	427	_MM_8UW(6,vec) = s6;
	428	_MM_8UW(7,vec) = s7;
	429	}
	430
	431	Iu16vec8& operator= (const M128 &a) { return *this = (Iu16vec8) a; }
	432
	433	Iu16vec8& operator&=(const M128 &a) { return *this = (Iu16vec8) _mm_and_si128(vec,a); }
	434	Iu16vec8& operator\|=(const M128 &a) { return *this = (Iu16vec8) _mm_or_si128(vec,a); }
	435	Iu16vec8& operator^=(const M128 &a) { return *this = (Iu16vec8) _mm_xor_si128(vec,a); }
	436
	437	Iu16vec8& operator +=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_add_epi16(vec,a); }
	438	Iu16vec8& operator -=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_sub_epi16(vec,a); }
	439	Iu16vec8& operator =(const I16vec8 &a) { return this = (Iu16vec8) _mm_mullo_epi16(vec,a); }
	440
	441	Iu16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
	442	Iu16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
	443	Iu16vec8& operator<<=(const M128 &a) { return *this = (Iu16vec8)_mm_sll_epi16(vec,a); }
	444	Iu16vec8& operator<<=(int count) { return *this = (Iu16vec8)_mm_slli_epi16(vec,count); }
	445	Iu16vec8 operator>>(const M128 &a) { return _mm_srl_epi16(vec,a); }
	446	Iu16vec8 operator>>(int count) { return _mm_srli_epi16(vec,count); }
	447	Iu16vec8& operator>>=(const M128 &a) { return *this = (Iu16vec8) _mm_srl_epi16(vec,a); }
	448	Iu16vec8& operator>>=(int count) { return *this = (Iu16vec8) _mm_srli_epi16(vec,count); }
	449
	450	#if defined(_ENABLE_VEC_DEBUG)
	451
	452	friend std::ostream& operator << (std::ostream &os,const Iu16vec8 &a)
	453	{
	454	os << "[7]:" << unsigned short(_MM_8UW(7,a))
	455	<< " [6]:" << unsigned short(_MM_8UW(6,a))
	456	<< " [5]:" << unsigned short(_MM_8UW(5,a))
	457	<< " [4]:" << unsigned short(_MM_8UW(4,a))
	458	<< " [3]:" << unsigned short(_MM_8UW(3,a))
	459	<< " [2]:" << unsigned short(_MM_8UW(2,a))
	460	<< " [1]:" << unsigned short(_MM_8UW(1,a))
	461	<< " [0]:" << unsigned short(_MM_8UW(0,a));
	462	return os;
	463	}
	464	#endif
	465
	466	const unsigned short& operator[](int i)const
	467	{
	468	assert(static_cast<unsigned int>(i) < 8);
	469	return _MM_8UW(i,vec);
	470	}
	471
	472	unsigned short& operator[](int i)
	473	{
	474	assert(static_cast<unsigned int>(i) < 8);
	475	return _MM_8UW(i,vec);
	476	}
	477	};
	478
	479	inline Iu16vec8 operator*(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mullo_epi16(a,b); }
	480
	481	inline Iu16vec8 cmpeq(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
	482	inline Iu16vec8 cmpneq(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
	483
	484	inline Iu16vec8 unpack_low(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
	485	inline Iu16vec8 unpack_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
	486
	487	inline Iu16vec8 sat_add(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_adds_epu16(a,b); }
	488	inline Iu16vec8 sat_sub(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_subs_epu16(a,b); }
	489
	490	inline Iu16vec8 simd_avg(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_avg_epu16(a,b); }
	491	inline I16vec8 mul_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mulhi_epu16(a,b); }
	492
	493	class I8vec16 : public M128
	494	{
	495	public:
	496	I8vec16() { }
	497	I8vec16(__m128i mm) : M128(mm) { }
	498
	499	I8vec16& operator= (const M128 &a) { return *this = (I8vec16) a; }
	500
	501	I8vec16& operator&=(const M128 &a) { return *this = (I8vec16) _mm_and_si128(vec,a); }
	502	I8vec16& operator\|=(const M128 &a) { return *this = (I8vec16) _mm_or_si128(vec,a); }
	503	I8vec16& operator^=(const M128 &a) { return *this = (I8vec16) _mm_xor_si128(vec,a); }
	504
	505	I8vec16& operator +=(const I8vec16 &a) { return *this = (I8vec16) _mm_add_epi8(vec,a); }
	506	I8vec16& operator -=(const I8vec16 &a) { return *this = (I8vec16) _mm_sub_epi8(vec,a); }
	507
	508	};
	509
	510	inline I8vec16 cmpeq(const I8vec16 &a,const I8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
	511	inline I8vec16 cmpneq(const I8vec16 &a,const I8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
	512
	513	inline I8vec16 unpack_low(const I8vec16 &a,const I8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
	514	inline I8vec16 unpack_high(const I8vec16 &a,const I8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
	515
	516	class Is8vec16 : public I8vec16
	517	{
	518	public:
	519	Is8vec16() { }
	520	Is8vec16(__m128i mm) : I8vec16(mm) { }
	521
	522	Is8vec16& operator= (const M128 &a) { return *this = (Is8vec16) a; }
	523
	524	Is8vec16& operator&=(const M128 &a) { return *this = (Is8vec16) _mm_and_si128(vec,a); }
	525	Is8vec16& operator\|=(const M128 &a) { return *this = (Is8vec16) _mm_or_si128(vec,a); }
	526	Is8vec16& operator^=(const M128 &a) { return *this = (Is8vec16) _mm_xor_si128(vec,a); }
	527
	528	Is8vec16& operator +=(const I8vec16 &a) { return *this = (Is8vec16) _mm_add_epi8(vec,a); }
	529	Is8vec16& operator -=(const I8vec16 &a) { return *this = (Is8vec16) _mm_sub_epi8(vec,a); }
	530
	531	#if defined(_ENABLE_VEC_DEBUG)
	532
	533	friend std::ostream& operator << (std::ostream &os,const Is8vec16 &a)
	534	{
	535	os << "[15]:" << short(_MM_16B(15,a))
	536	<< " [14]:" << short(_MM_16B(14,a))
	537	<< " [13]:" << short(_MM_16B(13,a))
	538	<< " [12]:" << short(_MM_16B(12,a))
	539	<< " [11]:" << short(_MM_16B(11,a))
	540	<< " [10]:" << short(_MM_16B(10,a))
	541	<< " [9]:" << short(_MM_16B(9,a))
	542	<< " [8]:" << short(_MM_16B(8,a))
	543	<< " [7]:" << short(_MM_16B(7,a))
	544	<< " [6]:" << short(_MM_16B(6,a))
	545	<< " [5]:" << short(_MM_16B(5,a))
	546	<< " [4]:" << short(_MM_16B(4,a))
	547	<< " [3]:" << short(_MM_16B(3,a))
	548	<< " [2]:" << short(_MM_16B(2,a))
	549	<< " [1]:" << short(_MM_16B(1,a))
	550	<< " [0]:" << short(_MM_16B(0,a));
	551	return os;
	552	}
	553	#endif
	554
	555	const signed char& operator[](int i)const
	556	{
	557	assert(static_cast<unsigned int>(i) < 16);
	558	return _MM_16B(i,vec);
	559	}
	560
	561	signed char& operator[](int i)
	562	{
	563	assert(static_cast<unsigned int>(i) < 16);
	564	return _MM_16B(i,vec);
	565	}
	566
	567	};
	568
	569	inline Is8vec16 cmpeq(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
	570	inline Is8vec16 cmpneq(const Is8vec16 &a,const Is8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
	571	inline Is8vec16 cmpgt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpgt_epi8(a,b); }
	572	inline Is8vec16 cmplt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmplt_epi8(a,b); }
	573
	574	inline Is8vec16 unpack_low(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
	575	inline Is8vec16 unpack_high(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
	576
	577	inline Is8vec16 sat_add(const Is8vec16 &a,const Is8vec16 &b) { return _mm_adds_epi8(a,b); }
	578	inline Is8vec16 sat_sub(const Is8vec16 &a,const Is8vec16 &b) { return _mm_subs_epi8(a,b); }
	579
	580	class Iu8vec16 : public I8vec16
	581	{
	582	public:
	583	Iu8vec16() { }
	584	Iu8vec16(__m128i mm) : I8vec16(mm) { }
	585
	586	Iu8vec16& operator= (const M128 &a) { return *this = (Iu8vec16) a; }
	587
	588	Iu8vec16& operator&=(const M128 &a) { return *this = (Iu8vec16) _mm_and_si128(vec,a); }
	589	Iu8vec16& operator\|=(const M128 &a) { return *this = (Iu8vec16) _mm_or_si128(vec,a); }
	590	Iu8vec16& operator^=(const M128 &a) { return *this = (Iu8vec16) _mm_xor_si128(vec,a); }
	591
	592	Iu8vec16& operator +=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_add_epi8(vec,a); }
	593	Iu8vec16& operator -=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_sub_epi8(vec,a); }
	594
	595	#if defined(_ENABLE_VEC_DEBUG)
	596
	597	friend std::ostream& operator << (std::ostream &os,const Iu8vec16 &a)
	598	{
	599	os << "[15]:" << unsigned short(_MM_16UB(15,a))
	600	<< " [14]:" << unsigned short(_MM_16UB(14,a))
	601	<< " [13]:" << unsigned short(_MM_16UB(13,a))
	602	<< " [12]:" << unsigned short(_MM_16UB(12,a))
	603	<< " [11]:" << unsigned short(_MM_16UB(11,a))
	604	<< " [10]:" << unsigned short(_MM_16UB(10,a))
	605	<< " [9]:" << unsigned short(_MM_16UB(9,a))
	606	<< " [8]:" << unsigned short(_MM_16UB(8,a))
	607	<< " [7]:" << unsigned short(_MM_16UB(7,a))
	608	<< " [6]:" << unsigned short(_MM_16UB(6,a))
	609	<< " [5]:" << unsigned short(_MM_16UB(5,a))
	610	<< " [4]:" << unsigned short(_MM_16UB(4,a))
	611	<< " [3]:" << unsigned short(_MM_16UB(3,a))
	612	<< " [2]:" << unsigned short(_MM_16UB(2,a))
	613	<< " [1]:" << unsigned short(_MM_16UB(1,a))
	614	<< " [0]:" << unsigned short(_MM_16UB(0,a));
	615	return os;
	616	}
	617	#endif
	618
	619	const unsigned char& operator[](int i)const
	620	{
	621	assert(static_cast<unsigned int>(i) < 16);
	622	return _MM_16UB(i,vec);
	623	}
	624
	625	unsigned char& operator[](int i)
	626	{
	627	assert(static_cast<unsigned int>(i) < 16);
	628	return _MM_16UB(i,vec);
	629	}
	630
	631	};
	632
	633	inline Iu8vec16 cmpeq(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
	634	inline Iu8vec16 cmpneq(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
	635
	636	inline Iu8vec16 unpack_low(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
	637	inline Iu8vec16 unpack_high(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
	638
	639	inline Iu8vec16 sat_add(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_adds_epu8(a,b); }
	640	inline Iu8vec16 sat_sub(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_subs_epu8(a,b); }
	641
	642	inline I64vec2 sum_abs(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_sad_epu8(a,b); }
	643
	644	inline Iu8vec16 simd_avg(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_avg_epu8(a,b); }
	645	inline Iu8vec16 simd_max(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_max_epu8(a,b); }
	646	inline Iu8vec16 simd_min(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_min_epu8(a,b); }
	647
	648	inline Is16vec8 pack_sat(const Is32vec4 &a,const Is32vec4 &b) { return _mm_packs_epi32(a,b); }
	649	inline Is8vec16 pack_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packs_epi16(a,b); }
	650	inline Iu8vec16 packu_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packus_epi16(a,b);}
	651
	652	#define IVEC128_LOGICALS(vect,element) inline I##vect##vec##element operator& (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_and_si128(a,b); } inline I##vect##vec##element operator\| (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_or_si128(a,b); } inline I##vect##vec##element operator^ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_xor_si128(a,b); } inline I##vect##vec##element andnot (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_andnot_si128(a,b); }
	653
	654	IVEC128_LOGICALS(8,16)
	655	IVEC128_LOGICALS(u8,16)
	656	IVEC128_LOGICALS(s8,16)
	657	IVEC128_LOGICALS(16,8)
	658	IVEC128_LOGICALS(u16,8)
	659	IVEC128_LOGICALS(s16,8)
	660	IVEC128_LOGICALS(32,4)
	661	IVEC128_LOGICALS(u32,4)
	662	IVEC128_LOGICALS(s32,4)
	663	IVEC128_LOGICALS(64,2)
	664	IVEC128_LOGICALS(128,1)
	665	#undef IVEC128_LOGICALS
	666
	667	#define IVEC128_ADD_SUB(vect,element,opsize) inline I##vect##vec##element operator+ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_add_##opsize(a,b); } inline I##vect##vec##element operator- (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_sub_##opsize(a,b); }
	668
	669	IVEC128_ADD_SUB(8,16,epi8)
	670	IVEC128_ADD_SUB(u8,16,epi8)
	671	IVEC128_ADD_SUB(s8,16,epi8)
	672	IVEC128_ADD_SUB(16,8,epi16)
	673	IVEC128_ADD_SUB(u16,8,epi16)
	674	IVEC128_ADD_SUB(s16,8,epi16)
	675	IVEC128_ADD_SUB(32,4,epi32)
	676	IVEC128_ADD_SUB(u32,4,epi32)
	677	IVEC128_ADD_SUB(s32,4,epi32)
	678	IVEC128_ADD_SUB(64,2,epi64)
	679	#undef IVEC128_ADD_SUB
	680
	681	#define IVEC128_SELECT(vect12,vect34,element,selop,arg1,arg2) inline I##vect34##vec##element select_##selop (const I##vect12##vec##element &a,const I##vect12##vec##element &b,const I##vect34##vec##element &c,const I##vect34##vec##element &d) { I##vect12##vec##element mask = cmp##selop(a,b); return(I##vect34##vec##element ((mask & arg1) \| I##vect12##vec##element ((_mm_andnot_si128(mask,arg2))))); }
	682	IVEC128_SELECT(8,s8,16,eq,c,d)
	683	IVEC128_SELECT(8,u8,16,eq,c,d)
	684	IVEC128_SELECT(8,8,16,eq,c,d)
	685	IVEC128_SELECT(8,s8,16,neq,c,d)
	686	IVEC128_SELECT(8,u8,16,neq,c,d)
	687	IVEC128_SELECT(8,8,16,neq,c,d)
	688
	689	IVEC128_SELECT(16,s16,8,eq,c,d)
	690	IVEC128_SELECT(16,u16,8,eq,c,d)
	691	IVEC128_SELECT(16,16,8,eq,c,d)
	692	IVEC128_SELECT(16,s16,8,neq,c,d)
	693	IVEC128_SELECT(16,u16,8,neq,c,d)
	694	IVEC128_SELECT(16,16,8,neq,c,d)
	695
	696	IVEC128_SELECT(32,s32,4,eq,c,d)
	697	IVEC128_SELECT(32,u32,4,eq,c,d)
	698	IVEC128_SELECT(32,32,4,eq,c,d)
	699	IVEC128_SELECT(32,s32,4,neq,c,d)
	700	IVEC128_SELECT(32,u32,4,neq,c,d)
	701	IVEC128_SELECT(32,32,4,neq,c,d)
	702
	703	IVEC128_SELECT(s8,s8,16,gt,c,d)
	704	IVEC128_SELECT(s8,u8,16,gt,c,d)
	705	IVEC128_SELECT(s8,8,16,gt,c,d)
	706	IVEC128_SELECT(s8,s8,16,lt,c,d)
	707	IVEC128_SELECT(s8,u8,16,lt,c,d)
	708	IVEC128_SELECT(s8,8,16,lt,c,d)
	709
	710	IVEC128_SELECT(s16,s16,8,gt,c,d)
	711	IVEC128_SELECT(s16,u16,8,gt,c,d)
	712	IVEC128_SELECT(s16,16,8,gt,c,d)
	713	IVEC128_SELECT(s16,s16,8,lt,c,d)
	714	IVEC128_SELECT(s16,u16,8,lt,c,d)
	715	IVEC128_SELECT(s16,16,8,lt,c,d)
	716
	717	#undef IVEC128_SELECT
	718
	719	class F64vec2
	720	{
	721	protected:
	722	__m128d vec;
	723	public:
	724
	725	F64vec2() {}
	726
	727	F64vec2(__m128d m) { vec = m;}
	728
	729	F64vec2(double d1,double d0) { vec= _mm_set_pd(d1,d0); }
	730
	731	EXPLICIT F64vec2(double d) { vec = _mm_set1_pd(d); }
	732
	733	operator __m128d() const { return vec; }
	734
	735	friend F64vec2 operator &(const F64vec2 &a,const F64vec2 &b) { return _mm_and_pd(a,b); }
	736	friend F64vec2 operator \|(const F64vec2 &a,const F64vec2 &b) { return _mm_or_pd(a,b); }
	737	friend F64vec2 operator ^(const F64vec2 &a,const F64vec2 &b) { return _mm_xor_pd(a,b); }
	738
	739	friend F64vec2 operator +(const F64vec2 &a,const F64vec2 &b) { return _mm_add_pd(a,b); }
	740	friend F64vec2 operator -(const F64vec2 &a,const F64vec2 &b) { return _mm_sub_pd(a,b); }
	741	friend F64vec2 operator *(const F64vec2 &a,const F64vec2 &b) { return _mm_mul_pd(a,b); }
	742	friend F64vec2 operator /(const F64vec2 &a,const F64vec2 &b) { return _mm_div_pd(a,b); }
	743
	744	F64vec2& operator +=(F64vec2 &a) { return *this = _mm_add_pd(vec,a); }
	745	F64vec2& operator -=(F64vec2 &a) { return *this = _mm_sub_pd(vec,a); }
	746	F64vec2& operator =(F64vec2 &a) { return this = _mm_mul_pd(vec,a); }
	747	F64vec2& operator /=(F64vec2 &a) { return *this = _mm_div_pd(vec,a); }
	748	F64vec2& operator &=(F64vec2 &a) { return *this = _mm_and_pd(vec,a); }
	749	F64vec2& operator \|=(F64vec2 &a) { return *this = _mm_or_pd(vec,a); }
	750	F64vec2& operator ^=(F64vec2 &a) { return *this = _mm_xor_pd(vec,a); }
	751
	752	friend double add_horizontal(F64vec2 &a)
	753	{
	754	F64vec2 ftemp = _mm_add_sd(a,_mm_shuffle_pd(a,a,1));
	755	return ftemp[0];
	756	}
	757
	758	friend F64vec2 andnot(const F64vec2 &a,const F64vec2 &b) { return _mm_andnot_pd(a,b); }
	759
	760	friend F64vec2 sqrt(const F64vec2 &a) { return _mm_sqrt_pd(a); }
	761
	762	#define F64vec2_COMP(op) friend F64vec2 cmp##op (const F64vec2 &a,const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
	763	F64vec2_COMP(eq)
	764	F64vec2_COMP(lt)
	765	F64vec2_COMP(le)
	766	F64vec2_COMP(gt)
	767	F64vec2_COMP(ge)
	768	F64vec2_COMP(ngt)
	769	F64vec2_COMP(nge)
	770	F64vec2_COMP(neq)
	771	F64vec2_COMP(nlt)
	772	F64vec2_COMP(nle)
	773	#undef F64vec2_COMP
	774
	775	friend F64vec2 simd_min(const F64vec2 &a,const F64vec2 &b) { return _mm_min_pd(a,b); }
	776	friend F64vec2 simd_max(const F64vec2 &a,const F64vec2 &b) { return _mm_max_pd(a,b); }
	777
	778	#define F64vec2_COMI(op) friend int comi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
	779	F64vec2_COMI(eq)
	780	F64vec2_COMI(lt)
	781	F64vec2_COMI(le)
	782	F64vec2_COMI(gt)
	783	F64vec2_COMI(ge)
	784	F64vec2_COMI(neq)
	785	#undef F64vec2_COMI
	786
	787	#define F64vec2_UCOMI(op) friend int ucomi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
	788	F64vec2_UCOMI(eq)
	789	F64vec2_UCOMI(lt)
	790	F64vec2_UCOMI(le)
	791	F64vec2_UCOMI(gt)
	792	F64vec2_UCOMI(ge)
	793	F64vec2_UCOMI(neq)
	794	#undef F64vec2_UCOMI
	795
	796	#if defined(_ENABLE_VEC_DEBUG)
	797
	798	friend std::ostream & operator<<(std::ostream & os,const F64vec2 &a) {
	799	double dp = (double)&a;
	800	os << " [1]:" << *(dp+1)
	801	<< " [0]:" << *dp;
	802	return os;
	803	}
	804	#endif
	805
	806	const double &operator[](int i) const {
	807	assert((0 <= i) && (i <= 1));
	808	double dp = (double)&vec;
	809	return *(dp+i);
	810	}
	811
	812	double &operator[](int i) {
	813	assert((0 <= i) && (i <= 1));
	814	double dp = (double)&vec;
	815	return *(dp+i);
	816	}
	817	};
	818
	819	inline F64vec2 unpack_low(const F64vec2 &a,const F64vec2 &b) { return _mm_unpacklo_pd(a,b); }
	820	inline F64vec2 unpack_high(const F64vec2 &a,const F64vec2 &b) { return _mm_unpackhi_pd(a,b); }
	821	inline int move_mask(const F64vec2 &a) { return _mm_movemask_pd(a); }
	822	inline void loadu(F64vec2 &a,double *p) { a = _mm_loadu_pd(p); }
	823	inline void storeu(double *p,const F64vec2 &a) { _mm_storeu_pd(p,a); }
	824	inline void store_nta(double *p,F64vec2 &a) { _mm_stream_pd(p,a); }
	825
	826	#define F64vec2_SELECT(op) inline F64vec2 select_##op (const F64vec2 &a,const F64vec2 &b,const F64vec2 &c,const F64vec2 &d) { F64vec2 mask = _mm_cmp##op##_pd(a,b); return((mask & c) \| F64vec2((_mm_andnot_pd(mask,d)))); }
	827	F64vec2_SELECT(eq)
	828	F64vec2_SELECT(lt)
	829	F64vec2_SELECT(le)
	830	F64vec2_SELECT(gt)
	831	F64vec2_SELECT(ge)
	832	F64vec2_SELECT(neq)
	833	F64vec2_SELECT(nlt)
	834	F64vec2_SELECT(nle)
	835	#undef F64vec2_SELECT
	836
	837	inline int F64vec2ToInt(const F64vec2 &a) { return _mm_cvttsd_si32(a); }
	838	inline F64vec2 F32vec4ToF64vec2(const F32vec4 &a) { return _mm_cvtps_pd(a); }
	839	inline F32vec4 F64vec2ToF32vec4(const F64vec2 &a) { return _mm_cvtpd_ps(a); }
	840	inline F64vec2 IntToF64vec2(const F64vec2 &a,int b) { return _mm_cvtsi32_sd(a,b); }
	841
	842	#pragma pack(pop)
	843
	844	#endif /* ifdef __SSE__ */
	845
	846	#pragma pack(pop)
	847	#endif
	848	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: Daodan/MSYS2/mingw32/i686-w64-mingw32/include/dvec.h@ 1194

Download in other formats: