Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log
Repository URL

fvec.h@ 1181

Last change on this file since 1181 was 1166, checked in by rossy, 3 years ago
Daodan: Replace MinGW build env with an up-to-date MSYS2 env
File size: 9.7 KB

Rev	Line
[1166]	1	/**
	2	* This file has no copyright assigned and is placed in the Public Domain.
	3	* This file is part of the mingw-w64 runtime package.
	4	* No warranty is given; refer to the file DISCLAIMER.PD within this package.
	5	*/
	6	#ifndef _FVEC_H_INCLUDED
	7	#define _FVEC_H_INCLUDED
	8
	9	#ifndef RC_INVOKED
	10	#ifndef __cplusplus
	11	#error ERROR: This file is only supported in C++ compilations!
	12	#endif
	13
	14	#include <intrin.h>
	15	#include <assert.h>
	16	#include <crtdefs.h>
	17
	18	#if defined(_ENABLE_VEC_DEBUG)
	19	#include <iostream>
	20	#endif
	21
	22	#pragma pack(push,_CRT_PACKING)
	23
	24	#ifdef __SSE__
	25
	26	#pragma pack(push,16)
	27
	28	#define EXPLICIT explicit
	29
	30	class F32vec4 {
	31	protected:
	32	__m128 vec;
	33	public:
	34	F32vec4() {}
	35	F32vec4(__m128 m) { vec = m;}
	36	F32vec4(float f3,float f2,float f1,float f0) { vec= _mm_set_ps(f3,f2,f1,f0); }
	37	EXPLICIT F32vec4(float f) { vec = _mm_set_ps1(f); }
	38	EXPLICIT F32vec4(double d) { vec = _mm_set_ps1((float) d); }
	39	F32vec4& operator =(float f) { vec = _mm_set_ps1(f); return *this; }
	40	F32vec4& operator =(double d) { vec = _mm_set_ps1((float) d); return *this; }
	41	operator __m128() const { return vec; }
	42	friend F32vec4 operator &(const F32vec4 &a,const F32vec4 &b) { return _mm_and_ps(a,b); }
	43	friend F32vec4 operator \|(const F32vec4 &a,const F32vec4 &b) { return _mm_or_ps(a,b); }
	44	friend F32vec4 operator ^(const F32vec4 &a,const F32vec4 &b) { return _mm_xor_ps(a,b); }
	45	friend F32vec4 operator +(const F32vec4 &a,const F32vec4 &b) { return _mm_add_ps(a,b); }
	46	friend F32vec4 operator -(const F32vec4 &a,const F32vec4 &b) { return _mm_sub_ps(a,b); }
	47	friend F32vec4 operator *(const F32vec4 &a,const F32vec4 &b) { return _mm_mul_ps(a,b); }
	48	friend F32vec4 operator /(const F32vec4 &a,const F32vec4 &b) { return _mm_div_ps(a,b); }
	49	F32vec4& operator =(const F32vec4 &a) { vec = a.vec; return *this; }
	50	F32vec4& operator =(const __m128 &avec) { vec = avec; return *this; }
	51	F32vec4& operator +=(F32vec4 &a) { return *this = _mm_add_ps(vec,a); }
	52	F32vec4& operator -=(F32vec4 &a) { return *this = _mm_sub_ps(vec,a); }
	53	F32vec4& operator =(F32vec4 &a) { return this = _mm_mul_ps(vec,a); }
	54	F32vec4& operator /=(F32vec4 &a) { return *this = _mm_div_ps(vec,a); }
	55	F32vec4& operator &=(F32vec4 &a) { return *this = _mm_and_ps(vec,a); }
	56	F32vec4& operator \|=(F32vec4 &a) { return *this = _mm_or_ps(vec,a); }
	57	F32vec4& operator ^=(F32vec4 &a) { return *this = _mm_xor_ps(vec,a); }
	58	friend float add_horizontal(F32vec4 &a) {
	59	F32vec4 ftemp = _mm_add_ss(a,_mm_add_ss(_mm_shuffle_ps(a,a,1),_mm_add_ss(_mm_shuffle_ps(a,a,2),_mm_shuffle_ps(a,a,3))));
	60	return ftemp[0];
	61	}
	62	friend F32vec4 sqrt(const F32vec4 &a) { return _mm_sqrt_ps(a); }
	63	friend F32vec4 rcp(const F32vec4 &a) { return _mm_rcp_ps(a); }
	64	friend F32vec4 rsqrt(const F32vec4 &a) { return _mm_rsqrt_ps(a); }
	65	friend F32vec4 rcp_nr(const F32vec4 &a) {
	66	F32vec4 Ra0 = _mm_rcp_ps(a);
	67	return _mm_sub_ps(_mm_add_ps(Ra0,Ra0),_mm_mul_ps(_mm_mul_ps(Ra0,a),Ra0));
	68	}
	69	friend F32vec4 rsqrt_nr(const F32vec4 &a) {
	70	static const F32vec4 fvecf0pt5(0.5f);
	71	static const F32vec4 fvecf3pt0(3.0f);
	72	F32vec4 Ra0 = _mm_rsqrt_ps(a);
	73	return (fvecf0pt5 Ra0) (fvecf3pt0 - (a Ra0) Ra0);
	74
	75	}
	76	#define Fvec32s4_COMP(op) friend F32vec4 cmp##op (const F32vec4 &a,const F32vec4 &b) { return _mm_cmp##op##_ps(a,b); }
	77	Fvec32s4_COMP(eq)
	78	Fvec32s4_COMP(lt)
	79	Fvec32s4_COMP(le)
	80	Fvec32s4_COMP(gt)
	81	Fvec32s4_COMP(ge)
	82	Fvec32s4_COMP(neq)
	83	Fvec32s4_COMP(nlt)
	84	Fvec32s4_COMP(nle)
	85	Fvec32s4_COMP(ngt)
	86	Fvec32s4_COMP(nge)
	87	#undef Fvec32s4_COMP
	88
	89	friend F32vec4 simd_min(const F32vec4 &a,const F32vec4 &b) { return _mm_min_ps(a,b); }
	90	friend F32vec4 simd_max(const F32vec4 &a,const F32vec4 &b) { return _mm_max_ps(a,b); }
	91
	92	#if defined(_ENABLE_VEC_DEBUG)
	93	friend std::ostream & operator<<(std::ostream & os,const F32vec4 &a) {
	94	float fp = (float)&a;
	95	os << "[3]:" << *(fp+3)
	96	<< " [2]:" << *(fp+2)
	97	<< " [1]:" << *(fp+1)
	98	<< " [0]:" << *fp;
	99	return os;
	100	}
	101	#endif
	102	const float& operator[](int i) const {
	103	assert((0 <= i) && (i <= 3));
	104	float fp = (float)&vec;
	105	return *(fp+i);
	106	}
	107	float& operator[](int i) {
	108	assert((0 <= i) && (i <= 3));
	109	float fp = (float)&vec;
	110	return *(fp+i);
	111	}
	112	};
	113
	114	inline F32vec4 unpack_low(const F32vec4 &a,const F32vec4 &b) { return _mm_unpacklo_ps(a,b); }
	115	inline F32vec4 unpack_high(const F32vec4 &a,const F32vec4 &b) { return _mm_unpackhi_ps(a,b); }
	116	inline int move_mask(const F32vec4 &a) { return _mm_movemask_ps(a); }
	117	inline void loadu(F32vec4 &a,float *p) { a = _mm_loadu_ps(p); }
	118	inline void storeu(float *p,const F32vec4 &a) { _mm_storeu_ps(p,a); }
	119	inline void store_nta(float *p,F32vec4 &a) { _mm_stream_ps(p,a); }
	120
	121	#define Fvec32s4_SELECT(op) inline F32vec4 select_##op (const F32vec4 &a,const F32vec4 &b,const F32vec4 &c,const F32vec4 &d) { F32vec4 mask = _mm_cmp##op##_ps(a,b); return((mask & c) \| F32vec4((_mm_andnot_ps(mask,d)))); }
	122	Fvec32s4_SELECT(eq)
	123	Fvec32s4_SELECT(lt)
	124	Fvec32s4_SELECT(le)
	125	Fvec32s4_SELECT(gt)
	126	Fvec32s4_SELECT(ge)
	127	Fvec32s4_SELECT(neq)
	128	Fvec32s4_SELECT(nlt)
	129	Fvec32s4_SELECT(nle)
	130	Fvec32s4_SELECT(ngt)
	131	Fvec32s4_SELECT(nge)
	132	#undef Fvec32s4_SELECT
	133
	134	#if 0 /* Commented until required types are defined */
	135	inline Is16vec4 simd_max(const Is16vec4 &a,const Is16vec4 &b) { return _m_pmaxsw(a,b); }
	136	inline Is16vec4 simd_min(const Is16vec4 &a,const Is16vec4 &b) { return _m_pminsw(a,b); }
	137	inline Iu8vec8 simd_max(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pmaxub(a,b); }
	138	inline Iu8vec8 simd_min(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pminub(a,b); }
	139	inline Iu16vec4 simd_avg(const Iu16vec4 &a,const Iu16vec4 &b) { return _m_pavgw(a,b); }
	140	inline Iu8vec8 simd_avg(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pavgb(a,b); }
	141	inline int move_mask(const I8vec8 &a) { return _m_pmovmskb(a); }
	142	inline Iu16vec4 mul_high(const Iu16vec4 &a,const Iu16vec4 &b) { return _m_pmulhuw(a,b); }
	143	inline void mask_move(const I8vec8 &a,const I8vec8 &b,char *addr) { _m_maskmovq(a,b,addr); }
	144	inline void store_nta(__m64 *p,M64 &a) { _mm_stream_pi(p,a); }
	145	inline int F32vec4ToInt(const F32vec4 &a) { return _mm_cvtt_ss2si(a); }
	146	inline Is32vec2 F32vec4ToIs32vec2 (const F32vec4 &a) {
	147	__m64 result;
	148	result = _mm_cvtt_ps2pi(a);
	149	return Is32vec2(result);
	150	}
	151	#endif
	152
	153	inline F32vec4 IntToF32vec4(const F32vec4 &a,int i) {
	154	__m128 result;
	155	result = _mm_cvt_si2ss(a,i);
	156	return F32vec4(result);
	157	}
	158
	159	#if 0 /* Commented until required types are defined */
	160	inline F32vec4 Is32vec2ToF32vec4(const F32vec4 &a,const Is32vec2 &b) {
	161	__m128 result;
	162	result = _mm_cvt_pi2ps(a,b);
	163	return F32vec4(result);
	164	}
	165	#endif
	166
	167	class F32vec1 {
	168	protected:
	169	__m128 vec;
	170	public:
	171	F32vec1() {}
	172	F32vec1(int i) { vec = _mm_cvt_si2ss(vec,i);};
	173	EXPLICIT F32vec1(float f) { vec = _mm_set_ss(f); }
	174	EXPLICIT F32vec1(double d) { vec = _mm_set_ss((float) d); }
	175	F32vec1(__m128 m) { vec = m; }
	176	operator __m128() const { return vec; }
	177	friend F32vec1 operator &(const F32vec1 &a,const F32vec1 &b) { return _mm_and_ps(a,b); }
	178	friend F32vec1 operator \|(const F32vec1 &a,const F32vec1 &b) { return _mm_or_ps(a,b); }
	179	friend F32vec1 operator ^(const F32vec1 &a,const F32vec1 &b) { return _mm_xor_ps(a,b); }
	180	friend F32vec1 operator +(const F32vec1 &a,const F32vec1 &b) { return _mm_add_ss(a,b); }
	181	friend F32vec1 operator -(const F32vec1 &a,const F32vec1 &b) { return _mm_sub_ss(a,b); }
	182	friend F32vec1 operator *(const F32vec1 &a,const F32vec1 &b) { return _mm_mul_ss(a,b); }
	183	friend F32vec1 operator /(const F32vec1 &a,const F32vec1 &b) { return _mm_div_ss(a,b); }
	184	F32vec1& operator +=(F32vec1 &a) { return *this = _mm_add_ss(vec,a); }
	185	F32vec1& operator -=(F32vec1 &a) { return *this = _mm_sub_ss(vec,a); }
	186	F32vec1& operator =(F32vec1 &a) { return this = _mm_mul_ss(vec,a); }
	187	F32vec1& operator /=(F32vec1 &a) { return *this = _mm_div_ss(vec,a); }
	188	F32vec1& operator &=(F32vec1 &a) { return *this = _mm_and_ps(vec,a); }
	189	F32vec1& operator \|=(F32vec1 &a) { return *this = _mm_or_ps(vec,a); }
	190	F32vec1& operator ^=(F32vec1 &a) { return *this = _mm_xor_ps(vec,a); }
	191	friend F32vec1 sqrt(const F32vec1 &a) { return _mm_sqrt_ss(a); }
	192	friend F32vec1 rcp(const F32vec1 &a) { return _mm_rcp_ss(a); }
	193	friend F32vec1 rsqrt(const F32vec1 &a) { return _mm_rsqrt_ss(a); }
	194	friend F32vec1 rcp_nr(const F32vec1 &a) {
	195	F32vec1 Ra0 = _mm_rcp_ss(a);
	196	return _mm_sub_ss(_mm_add_ss(Ra0,Ra0),_mm_mul_ss(_mm_mul_ss(Ra0,a),Ra0));
	197	}
	198	friend F32vec1 rsqrt_nr(const F32vec1 &a) {
	199	static const F32vec1 fvecf0pt5(0.5f);
	200	static const F32vec1 fvecf3pt0(3.0f);
	201	F32vec1 Ra0 = _mm_rsqrt_ss(a);
	202	return (fvecf0pt5 Ra0) (fvecf3pt0 - (a Ra0) Ra0);
	203	}
	204	#define Fvec32s1_COMP(op) friend F32vec1 cmp##op (const F32vec1 &a,const F32vec1 &b) { return _mm_cmp##op##_ss(a,b); }
	205	Fvec32s1_COMP(eq)
	206	Fvec32s1_COMP(lt)
	207	Fvec32s1_COMP(le)
	208	Fvec32s1_COMP(gt)
	209	Fvec32s1_COMP(ge)
	210	Fvec32s1_COMP(neq)
	211	Fvec32s1_COMP(nlt)
	212	Fvec32s1_COMP(nle)
	213	Fvec32s1_COMP(ngt)
	214	Fvec32s1_COMP(nge)
	215	#undef Fvec32s1_COMP
	216
	217	friend F32vec1 simd_min(const F32vec1 &a,const F32vec1 &b) { return _mm_min_ss(a,b); }
	218	friend F32vec1 simd_max(const F32vec1 &a,const F32vec1 &b) { return _mm_max_ss(a,b); }
	219
	220	#if defined(_ENABLE_VEC_DEBUG)
	221	friend std::ostream & operator<<(std::ostream & os,const F32vec1 &a) {
	222	float fp = (float)&a;
	223	os << "float:" << *fp;
	224	return os;
	225	}
	226	#endif
	227	};
	228
	229	#define Fvec32s1_SELECT(op) inline F32vec1 select_##op (const F32vec1 &a,const F32vec1 &b,const F32vec1 &c,const F32vec1 &d) { F32vec1 mask = _mm_cmp##op##_ss(a,b); return((mask & c) \| F32vec1((_mm_andnot_ps(mask,d)))); }
	230	Fvec32s1_SELECT(eq)
	231	Fvec32s1_SELECT(lt)
	232	Fvec32s1_SELECT(le)
	233	Fvec32s1_SELECT(gt)
	234	Fvec32s1_SELECT(ge)
	235	Fvec32s1_SELECT(neq)
	236	Fvec32s1_SELECT(nlt)
	237	Fvec32s1_SELECT(nle)
	238	Fvec32s1_SELECT(ngt)
	239	Fvec32s1_SELECT(nge)
	240	#undef Fvec32s1_SELECT
	241
	242	inline int F32vec1ToInt(const F32vec1 &a)
	243	{
	244	return _mm_cvtt_ss2si(a);
	245	}
	246
	247	#pragma pack(pop)
	248
	249	#endif /* #ifdef __SSE__ */
	250	#pragma pack(pop)
	251
	252	#include <ivec.h>
	253
	254	#endif
	255	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: Daodan/MSYS2/mingw32/i686-w64-mingw32/include/fvec.h@ 1181

Download in other formats: