Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include-fixed/README
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include-fixed/README	(revision 1046)
+++ 	(revision )
@@ -1,14 +1,0 @@
-This README file is copied into the directory for GCC-only header files
-when fixincludes is run by the makefile for GCC.
-
-Many of the files in this directory were automatically edited from the
-standard system header files by the fixincludes process.  They are
-system-specific, and will not work on any other kind of system.  They
-are also not part of GCC.  The reason we have to do this is because
-GCC requires ANSI C headers and many vendors supply ANSI-incompatible
-headers.
-
-Because this is an automated process, sometimes headers get "fixed"
-that do not, strictly speaking, need a fix.  As long as nothing is broken
-by the process, it is just an unfortunate collateral inconvenience.
-We would like to rectify it, if it is not "too inconvenient".
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include-fixed/limits.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include-fixed/limits.h	(revision 1046)
+++ 	(revision )
@@ -1,171 +1,0 @@
-/* Copyright (C) 1992-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* This administrivia gets added to the beginning of limits.h
-   if the system has its own version of limits.h.  */
-
-/* We use _GCC_LIMITS_H_ because we want this not to match
-   any macros that the system's limits.h uses for its own purposes.  */
-#ifndef _GCC_LIMITS_H_  /* Terminated in limity.h.  */
-#define _GCC_LIMITS_H_
-
-#ifndef _LIBC_LIMITS_H_
-/* Use "..." so that we find syslimits.h only in this same directory.  */
-#include "syslimits.h"
-#endif
-/* Copyright (C) 1991-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#ifndef _LIMITS_H___
-#define _LIMITS_H___
-
-/* Number of bits in a `char'.  */
-#undef CHAR_BIT
-#define CHAR_BIT __CHAR_BIT__
-
-/* Maximum length of a multibyte character.  */
-#ifndef MB_LEN_MAX
-#define MB_LEN_MAX 1
-#endif
-
-/* Minimum and maximum values a `signed char' can hold.  */
-#undef SCHAR_MIN
-#define SCHAR_MIN (-SCHAR_MAX - 1)
-#undef SCHAR_MAX
-#define SCHAR_MAX __SCHAR_MAX__
-
-/* Maximum value an `unsigned char' can hold.  (Minimum is 0).  */
-#undef UCHAR_MAX
-#if __SCHAR_MAX__ == __INT_MAX__
-# define UCHAR_MAX (SCHAR_MAX * 2U + 1U)
-#else
-# define UCHAR_MAX (SCHAR_MAX * 2 + 1)
-#endif
-
-/* Minimum and maximum values a `char' can hold.  */
-#ifdef __CHAR_UNSIGNED__
-# undef CHAR_MIN
-# if __SCHAR_MAX__ == __INT_MAX__
-#  define CHAR_MIN 0U
-# else
-#  define CHAR_MIN 0
-# endif
-# undef CHAR_MAX
-# define CHAR_MAX UCHAR_MAX
-#else
-# undef CHAR_MIN
-# define CHAR_MIN SCHAR_MIN
-# undef CHAR_MAX
-# define CHAR_MAX SCHAR_MAX
-#endif
-
-/* Minimum and maximum values a `signed short int' can hold.  */
-#undef SHRT_MIN
-#define SHRT_MIN (-SHRT_MAX - 1)
-#undef SHRT_MAX
-#define SHRT_MAX __SHRT_MAX__
-
-/* Maximum value an `unsigned short int' can hold.  (Minimum is 0).  */
-#undef USHRT_MAX
-#if __SHRT_MAX__ == __INT_MAX__
-# define USHRT_MAX (SHRT_MAX * 2U + 1U)
-#else
-# define USHRT_MAX (SHRT_MAX * 2 + 1)
-#endif
-
-/* Minimum and maximum values a `signed int' can hold.  */
-#undef INT_MIN
-#define INT_MIN (-INT_MAX - 1)
-#undef INT_MAX
-#define INT_MAX __INT_MAX__
-
-/* Maximum value an `unsigned int' can hold.  (Minimum is 0).  */
-#undef UINT_MAX
-#define UINT_MAX (INT_MAX * 2U + 1U)
-
-/* Minimum and maximum values a `signed long int' can hold.
-   (Same as `int').  */
-#undef LONG_MIN
-#define LONG_MIN (-LONG_MAX - 1L)
-#undef LONG_MAX
-#define LONG_MAX __LONG_MAX__
-
-/* Maximum value an `unsigned long int' can hold.  (Minimum is 0).  */
-#undef ULONG_MAX
-#define ULONG_MAX (LONG_MAX * 2UL + 1UL)
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-/* Minimum and maximum values a `signed long long int' can hold.  */
-# undef LLONG_MIN
-# define LLONG_MIN (-LLONG_MAX - 1LL)
-# undef LLONG_MAX
-# define LLONG_MAX __LONG_LONG_MAX__
-
-/* Maximum value an `unsigned long long int' can hold.  (Minimum is 0).  */
-# undef ULLONG_MAX
-# define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
-#endif
-
-#if defined (__GNU_LIBRARY__) ? defined (__USE_GNU) : !defined (__STRICT_ANSI__)
-/* Minimum and maximum values a `signed long long int' can hold.  */
-# undef LONG_LONG_MIN
-# define LONG_LONG_MIN (-LONG_LONG_MAX - 1LL)
-# undef LONG_LONG_MAX
-# define LONG_LONG_MAX __LONG_LONG_MAX__
-
-/* Maximum value an `unsigned long long int' can hold.  (Minimum is 0).  */
-# undef ULONG_LONG_MAX
-# define ULONG_LONG_MAX (LONG_LONG_MAX * 2ULL + 1ULL)
-#endif
-
-#endif /* _LIMITS_H___ */
-/* This administrivia gets added to the end of limits.h
-   if the system has its own version of limits.h.  */
-
-#else /* not _GCC_LIMITS_H_ */
-
-#ifdef _GCC_NEXT_LIMITS_H
-#include_next <limits.h>		/* recurse down to the real one */
-#endif
-
-#endif /* not _GCC_LIMITS_H_ */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include-fixed/syslimits.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include-fixed/syslimits.h	(revision 1046)
+++ 	(revision )
@@ -1,8 +1,0 @@
-/* syslimits.h stands for the system's own limits.h file.
-   If we can use it ok unmodified, then we install this text.
-   If fixincludes fixes it, then the fixed version is installed
-   instead of this text.  */
-
-#define _GCC_NEXT_LIMITS_H		/* tell gcc's limits.h to recurse */
-#include_next <limits.h>
-#undef _GCC_NEXT_LIMITS_H
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/adxintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/adxintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,81 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <adxintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _ADXINTRIN_H_INCLUDED
-#define _ADXINTRIN_H_INCLUDED
-
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_subborrow_u32 (unsigned char __CF, unsigned int __X,
-	        unsigned int __Y, unsigned int *__P)
-{
-    return __builtin_ia32_sbb_u32 (__CF, __Y, __X, __P);
-}
-
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_addcarry_u32 (unsigned char __CF, unsigned int __X,
-	       unsigned int __Y, unsigned int *__P)
-{
-    return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
-}
-
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_addcarryx_u32 (unsigned char __CF, unsigned int __X,
-		unsigned int __Y, unsigned int *__P)
-{
-    return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
-}
-
-#ifdef __x86_64__
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_subborrow_u64 (unsigned char __CF, unsigned long long __X,
-	        unsigned long long __Y, unsigned long long *__P)
-{
-    return __builtin_ia32_sbb_u64 (__CF, __Y, __X, __P);
-}
-
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_addcarry_u64 (unsigned char __CF, unsigned long long __X,
-	       unsigned long long __Y, unsigned long long *__P)
-{
-    return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
-}
-
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_addcarryx_u64 (unsigned char __CF, unsigned long long __X,
-		unsigned long long __Y, unsigned long long *__P)
-{
-    return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
-}
-#endif
-
-#endif /* _ADXINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ammintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ammintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,93 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the AMD Programmers
-   Manual Update, version 2.x */
-
-#ifndef _AMMINTRIN_H_INCLUDED
-#define _AMMINTRIN_H_INCLUDED
-
-/* We need definitions from the SSE3, SSE2 and SSE header files*/
-#include <pmmintrin.h>
-
-#ifndef __SSE4A__
-#pragma GCC push_options
-#pragma GCC target("sse4a")
-#define __DISABLE_SSE4A__
-#endif /* __SSE4A__ */
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_sd (double * __P, __m128d __Y)
-{
-  __builtin_ia32_movntsd (__P, (__v2df) __Y);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_ss (float * __P, __m128 __Y)
-{
-  __builtin_ia32_movntss (__P, (__v4sf) __Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_si64 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
-{
-  return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
-}
-#else
-#define _mm_extracti_si64(X, I, L)					\
-  ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X),		\
-				    (unsigned int)(I), (unsigned int)(L)))
-#endif
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_si64 (__m128i __X,__m128i __Y)
-{
-  return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
-{
-  return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
-}
-#else
-#define _mm_inserti_si64(X, Y, I, L)					\
-  ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X),		\
-				      (__v2di)(__m128i)(Y),		\
-				      (unsigned int)(I), (unsigned int)(L)))
-#endif
-
-#ifdef __DISABLE_SSE4A__
-#undef __DISABLE_SSE4A__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4A__ */
-
-#endif /* _AMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx2intrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx2intrin.h	(revision 1046)
+++ 	(revision )
@@ -1,1904 +1,0 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX2INTRIN_H_INCLUDED
-#define _AVX2INTRIN_H_INCLUDED
-
-#ifndef __AVX2__
-#pragma GCC push_options
-#pragma GCC target("avx2")
-#define __DISABLE_AVX2__
-#endif /* __AVX2__ */
-
-/* Sum absolute 8-bit integer difference of adjacent groups of 4
-   byte integers in the first 2 operands.  Starting offsets within
-   operands are determined by the 3rd mask operand.  */
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
-{
-  return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
-					      (__v32qi)__Y, __M);
-}
-#else
-#define _mm256_mpsadbw_epu8(X, Y, M)					\
-  ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X),		\
-					(__v32qi)(__m256i)(Y), (int)(M)))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_abs_epi8 (__m256i __A)
-{
-  return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_abs_epi16 (__m256i __A)
-{
-  return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_abs_epi32 (__m256i __A)
-{
-  return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_packs_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_packs_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_packus_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_packus_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v32qu)__A + (__v32qu)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v16hu)__A + (__v16hu)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v8su)__A + (__v8su)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4du)__A + (__v4du)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_adds_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_adds_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_adds_epu8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_adds_epu16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
-{
-  return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
-					      (__v4di)__B,
-					      __N * 8);
-}
-#else
-/* In that case (__N*8) will be in vreg, and insn will not be matched. */
-/* Use define instead */
-#define _mm256_alignr_epi8(A, B, N)				   \
-  ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A),	   \
-					(__v4di)(__m256i)(B),	   \
-					(int)(N) * 8))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_and_si256 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4du)__A & (__v4du)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_andnot_si256 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_avg_epu8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_avg_epu16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
-{
-  return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
-					       (__v32qi)__Y,
-					       (__v32qi)__M);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
-{
-  return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
-					      (__v16hi)__Y,
-					       __M);
-}
-#else
-#define _mm256_blend_epi16(X, Y, M)					\
-  ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X),		\
-					(__v16hi)(__m256i)(Y), (int)(M)))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v32qi)__A == (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v16hi)__A == (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v8si)__A == (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4di)__A == (__v4di)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v32qi)__A > (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v16hi)__A > (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v8si)__A > (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4di)__A > (__v4di)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
-					     (__v16hi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
-					      (__v16hi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
-					     (__v16hi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
-					      (__v16hi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
-						(__v32qi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_madd_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
-					     (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epu8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epu16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epu32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epu8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epu16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epu32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movemask_epi8 (__m256i __A)
-{
-  return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi8_epi16 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi8_epi32 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi8_epi64 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi16_epi32 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi16_epi64 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi32_epi64 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu8_epi16 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu8_epi32 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu8_epi64 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu16_epi32 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu16_epi64 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu32_epi64 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mul_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
-					       (__v16hi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mullo_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v16hu)__A * (__v16hu)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mullo_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v8su)__A * (__v8su)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mul_epu32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_or_si256 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4du)__A | (__v4du)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sad_epu8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
-					     (__v32qi)__Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_epi32 (__m256i __A, const int __mask)
-{
-  return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
-{
-  return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
-{
-  return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
-}
-#else
-#define _mm256_shuffle_epi32(A, N) \
-  ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
-#define _mm256_shufflehi_epi16(A, N) \
-  ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
-#define _mm256_shufflelo_epi16(A, N) \
-  ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sign_epi8 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sign_epi16 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sign_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_bslli_epi128 (__m256i __A, const int __N)
-{
-  return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_slli_si256 (__m256i __A, const int __N)
-{
-  return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
-}
-#else
-#define _mm256_bslli_epi128(A, N) \
-  ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
-#define _mm256_slli_si256(A, N) \
-  ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_slli_epi16 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sll_epi16 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_slli_epi32 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sll_epi32 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_slli_epi64 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sll_epi64 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srai_epi16 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sra_epi16 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srai_epi32 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sra_epi32 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_bsrli_epi128 (__m256i __A, const int __N)
-{
-  return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srli_si256 (__m256i __A, const int __N)
-{
-  return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
-}
-#else
-#define _mm256_bsrli_epi128(A, N) \
-  ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
-#define _mm256_srli_si256(A, N) \
-  ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srli_epi16 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srl_epi16 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srli_epi32 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srl_epi32 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srli_epi64 (__m256i __A, int __B)
-{
-  return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srl_epi64 (__m256i __A, __m128i __B)
-{
-  return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v32qu)__A - (__v32qu)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v16hu)__A - (__v16hu)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v8su)__A - (__v8su)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4du)__A - (__v4du)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_subs_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_subs_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_subs_epu8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_subs_epu16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_xor_si256 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4du)__A ^ (__v4du)__B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_stream_load_si256 (__m256i const *__X)
-{
-  return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcastss_ps (__m128 __X)
-{
-  return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastss_ps (__m128 __X)
-{
-  return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastsd_pd (__m128d __X)
-{
-  return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastsi128_si256 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
-{
-  return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
-					      (__v4si)__Y,
-					      __M);
-}
-#else
-#define _mm_blend_epi32(X, Y, M)					\
-  ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X),		\
-					(__v4si)(__m128i)(Y), (int)(M)))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
-{
-  return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
-					      (__v8si)__Y,
-					      __M);
-}
-#else
-#define _mm256_blend_epi32(X, Y, M)					\
-  ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X),		\
-					(__v8si)(__m256i)(Y), (int)(M)))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastb_epi8 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastw_epi16 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastd_epi32 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastq_epi64 (__m128i __X)
-{
-  return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcastb_epi8 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcastw_epi16 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcastd_epi32 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcastq_epi64 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute4x64_pd (__m256d __X, const int __M)
-{
-  return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
-}
-#else
-#define _mm256_permute4x64_pd(X, M)			       \
-  ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
-#endif
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
-{
-  return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute4x64_epi64 (__m256i __X, const int __M)
-{
-  return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
-}
-#else
-#define _mm256_permute4x64_epi64(X, M)			       \
-  ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
-#endif
-
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
-{
-  return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
-}
-#else
-#define _mm256_permute2x128_si256(X, Y, M)				\
-  ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extracti128_si256 (__m256i __X, const int __M)
-{
-  return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
-}
-#else
-#define _mm256_extracti128_si256(X, M)				\
-  ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
-{
-  return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
-}
-#else
-#define _mm256_inserti128_si256(X, Y, M)			 \
-  ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
-					   (__v2di)(__m128i)(Y), \
-					   (int)(M)))
-#endif
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskload_epi32 (int const *__X, __m256i __M )
-{
-  return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
-						(__v8si)__M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskload_epi64 (long long const *__X, __m256i __M )
-{
-  return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
-						(__v4di)__M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskload_epi32 (int const *__X, __m128i __M )
-{
-  return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
-					     (__v4si)__M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskload_epi64 (long long const *__X, __m128i __M )
-{
-  return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
-					     (__v2di)__M);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
-{
-  __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
-{
-  __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
-{
-  __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
-{
-  __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sllv_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sllv_epi64 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srav_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srav_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srlv_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srlv_epi64 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_pd (double const *base, __m128i index, const int scale)
-{
-  __v2df zero = _mm_setzero_pd ();
-  __v2df mask = _mm_cmpeq_pd (zero, zero);
-
-  return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
-						base,
-						(__v4si)index,
-						mask,
-						scale);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
-		       __m128d mask, const int scale)
-{
-  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
-						base,
-						(__v4si)index,
-						(__v2df)mask,
-						scale);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
-{
-  __v4df zero = _mm256_setzero_pd ();
-  __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
-
-  return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
-						base,
-						(__v4si)index,
-						mask,
-						scale);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_pd (__m256d src, double const *base,
-			  __m128i index, __m256d mask, const int scale)
-{
-  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
-						base,
-						(__v4si)index,
-						(__v4df)mask,
-						scale);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_pd (double const *base, __m128i index, const int scale)
-{
-  __v2df src = _mm_setzero_pd ();
-  __v2df mask = _mm_cmpeq_pd (src, src);
-
-  return (__m128d) __builtin_ia32_gatherdiv2df (src,
-						base,
-						(__v2di)index,
-						mask,
-						scale);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
-		       __m128d mask, const int scale)
-{
-  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
-						base,
-						(__v2di)index,
-						(__v2df)mask,
-						scale);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
-{
-  __v4df src = _mm256_setzero_pd ();
-  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
-
-  return (__m256d) __builtin_ia32_gatherdiv4df (src,
-						base,
-						(__v4di)index,
-						mask,
-						scale);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_pd (__m256d src, double const *base,
-			  __m256i index, __m256d mask, const int scale)
-{
-  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
-						base,
-						(__v4di)index,
-						(__v4df)mask,
-						scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_ps (float const *base, __m128i index, const int scale)
-{
-  __v4sf src = _mm_setzero_ps ();
-  __v4sf mask = _mm_cmpeq_ps (src, src);
-
-  return (__m128) __builtin_ia32_gathersiv4sf (src,
-					       base,
-					       (__v4si)index,
-					       mask,
-					       scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
-		       __m128 mask, const int scale)
-{
-  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
-					       base,
-					       (__v4si)index,
-					       (__v4sf)mask,
-					       scale);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
-{
-  __v8sf src = _mm256_setzero_ps ();
-  __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
-
-  return (__m256) __builtin_ia32_gathersiv8sf (src,
-					       base,
-					       (__v8si)index,
-					       mask,
-					       scale);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_ps (__m256 src, float const *base,
-			  __m256i index, __m256 mask, const int scale)
-{
-  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
-					       base,
-					       (__v8si)index,
-					       (__v8sf)mask,
-					       scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_ps (float const *base, __m128i index, const int scale)
-{
-  __v4sf src = _mm_setzero_ps ();
-  __v4sf mask = _mm_cmpeq_ps (src, src);
-
-  return (__m128) __builtin_ia32_gatherdiv4sf (src,
-					       base,
-					       (__v2di)index,
-					       mask,
-					       scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
-		       __m128 mask, const int scale)
-{
-  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
-						base,
-						(__v2di)index,
-						(__v4sf)mask,
-						scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
-{
-  __v4sf src = _mm_setzero_ps ();
-  __v4sf mask = _mm_cmpeq_ps (src, src);
-
-  return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
-						  base,
-						  (__v4di)index,
-						  mask,
-						  scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_ps (__m128 src, float const *base,
-			  __m256i index, __m128 mask, const int scale)
-{
-  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
-						  base,
-						  (__v4di)index,
-						  (__v4sf)mask,
-						  scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_epi64 (long long int const *base,
-		     __m128i index, const int scale)
-{
-  __v2di src = __extension__ (__v2di){ 0, 0 };
-  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
-
-  return (__m128i) __builtin_ia32_gathersiv2di (src,
-						base,
-						(__v4si)index,
-						mask,
-						scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
-			  __m128i index, __m128i mask, const int scale)
-{
-  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
-						base,
-						(__v4si)index,
-						(__v2di)mask,
-						scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_epi64 (long long int const *base,
-			__m128i index, const int scale)
-{
-  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
-  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
-
-  return (__m256i) __builtin_ia32_gathersiv4di (src,
-						base,
-						(__v4si)index,
-						mask,
-						scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
-			     __m128i index, __m256i mask, const int scale)
-{
-  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
-						base,
-						(__v4si)index,
-						(__v4di)mask,
-						scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_epi64 (long long int const *base,
-		     __m128i index, const int scale)
-{
-  __v2di src = __extension__ (__v2di){ 0, 0 };
-  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
-
-  return (__m128i) __builtin_ia32_gatherdiv2di (src,
-						base,
-						(__v2di)index,
-						mask,
-						scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
-			  __m128i mask, const int scale)
-{
-  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
-						base,
-						(__v2di)index,
-						(__v2di)mask,
-						scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_epi64 (long long int const *base,
-			__m256i index, const int scale)
-{
-  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
-  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
-
-  return (__m256i) __builtin_ia32_gatherdiv4di (src,
-						base,
-						(__v4di)index,
-						mask,
-						scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
-			     __m256i index, __m256i mask, const int scale)
-{
-  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
-						base,
-						(__v4di)index,
-						(__v4di)mask,
-						scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
-{
-  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
-  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
-
-  return (__m128i) __builtin_ia32_gathersiv4si (src,
-					       base,
-					       (__v4si)index,
-					       mask,
-					       scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
-			  __m128i mask, const int scale)
-{
-  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
-						base,
-						(__v4si)index,
-						(__v4si)mask,
-						scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
-{
-  __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
-  __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
-
-  return (__m256i) __builtin_ia32_gathersiv8si (src,
-						base,
-						(__v8si)index,
-						mask,
-						scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
-			     __m256i index, __m256i mask, const int scale)
-{
-  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
-						base,
-						(__v8si)index,
-						(__v8si)mask,
-						scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
-{
-  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
-  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
-
-  return (__m128i) __builtin_ia32_gatherdiv4si (src,
-						base,
-						(__v2di)index,
-						mask,
-						scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
-			  __m128i mask, const int scale)
-{
-  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
-						base,
-						(__v2di)index,
-						(__v4si)mask,
-						scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
-{
-  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
-  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
-
-  return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
-						  base,
-						  (__v4di)index,
-						  mask,
-						  scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
-			     __m256i index, __m128i mask, const int scale)
-{
-  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
-						   base,
-						   (__v4di)index,
-						   (__v4si)mask,
-						   scale);
-}
-#else /* __OPTIMIZE__ */
-#define _mm_i32gather_pd(BASE, INDEX, SCALE)				\
-  (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__v2df)_mm_set1_pd(		\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v4si)(__m128i)INDEX, \
-					 (__v2df)(__m128d)MASK,	 \
-					 (int)SCALE)
-
-#define _mm256_i32gather_pd(BASE, INDEX, SCALE)				\
-  (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__v4df)_mm256_set1_pd(	\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v4si)(__m128i)INDEX, \
-					 (__v4df)(__m256d)MASK,	 \
-					 (int)SCALE)
-
-#define _mm_i64gather_pd(BASE, INDEX, SCALE)				\
-  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v2di)(__m128i)INDEX,	\
-					 (__v2df)_mm_set1_pd(		\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v2di)(__m128i)INDEX, \
-					 (__v2df)(__m128d)MASK,	 \
-					 (int)SCALE)
-
-#define _mm256_i64gather_pd(BASE, INDEX, SCALE)				\
-  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v4di)(__m256i)INDEX,	\
-					 (__v4df)_mm256_set1_pd(	\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v4di)(__m256i)INDEX, \
-					 (__v4df)(__m256d)MASK,	 \
-					 (int)SCALE)
-
-#define _mm_i32gather_ps(BASE, INDEX, SCALE)				\
-  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (),	\
-					(float const *)BASE,		\
-					(__v4si)(__m128i)INDEX,		\
-					_mm_set1_ps ((float)(int) -1),	\
-					(int)SCALE)
-
-#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC,	 \
-					(float const *)BASE,	 \
-					(__v4si)(__m128i)INDEX,	 \
-					(__v4sf)(__m128d)MASK,	 \
-					(int)SCALE)
-
-#define _mm256_i32gather_ps(BASE, INDEX, SCALE)			       \
-  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
-					(float const *)BASE,	       \
-					(__v8si)(__m256i)INDEX,	       \
-					(__v8sf)_mm256_set1_ps (       \
-					  (float)(int) -1),	       \
-					(int)SCALE)
-
-#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC,	\
-					(float const *)BASE,	\
-					(__v8si)(__m256i)INDEX, \
-					(__v8sf)(__m256d)MASK,	\
-					(int)SCALE)
-
-#define _mm_i64gather_ps(BASE, INDEX, SCALE)				\
-  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (),	\
-					(float const *)BASE,		\
-					(__v2di)(__m128i)INDEX,		\
-					(__v4sf)_mm_set1_ps (		\
-					  (float)(int) -1),		\
-					(int)SCALE)
-
-#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC,	 \
-					(float const *)BASE,	 \
-					(__v2di)(__m128i)INDEX,	 \
-					(__v4sf)(__m128d)MASK,	 \
-					(int)SCALE)
-
-#define _mm256_i64gather_ps(BASE, INDEX, SCALE)				\
-  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (),	\
-					   (float const *)BASE,		\
-					   (__v4di)(__m256i)INDEX,	\
-					   (__v4sf)_mm_set1_ps(		\
-					     (float)(int) -1),		\
-					   (int)SCALE)
-
-#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	   \
-  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC,	   \
-					   (float const *)BASE,	   \
-					   (__v4di)(__m256i)INDEX, \
-					   (__v4sf)(__m128)MASK,   \
-					   (int)SCALE)
-
-#define _mm_i32gather_epi64(BASE, INDEX, SCALE)				\
-  (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
-					 (long long const *)BASE,	\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__v2di)_mm_set1_epi64x (-1),	\
-					 (int)SCALE)
-
-#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
-  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC,	  \
-					 (long long const *)BASE, \
-					 (__v4si)(__m128i)INDEX,  \
-					 (__v2di)(__m128i)MASK,	  \
-					 (int)SCALE)
-
-#define _mm256_i32gather_epi64(BASE, INDEX, SCALE)			   \
-  (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
-					 (long long const *)BASE,	   \
-					 (__v4si)(__m128i)INDEX,	   \
-					 (__v4di)_mm256_set1_epi64x (-1),  \
-					 (int)SCALE)
-
-#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC,	   \
-					 (long long const *)BASE,  \
-					 (__v4si)(__m128i)INDEX,   \
-					 (__v4di)(__m256i)MASK,	   \
-					 (int)SCALE)
-
-#define _mm_i64gather_epi64(BASE, INDEX, SCALE)				\
-  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
-					 (long long const *)BASE,	\
-					 (__v2di)(__m128i)INDEX,	\
-					 (__v2di)_mm_set1_epi64x (-1),	\
-					 (int)SCALE)
-
-#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
-  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC,	  \
-					 (long long const *)BASE, \
-					 (__v2di)(__m128i)INDEX,  \
-					 (__v2di)(__m128i)MASK,	  \
-					 (int)SCALE)
-
-#define _mm256_i64gather_epi64(BASE, INDEX, SCALE)			   \
-  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
-					 (long long const *)BASE,	   \
-					 (__v4di)(__m256i)INDEX,	   \
-					 (__v4di)_mm256_set1_epi64x (-1),  \
-					 (int)SCALE)
-
-#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC,	   \
-					 (long long const *)BASE,  \
-					 (__v4di)(__m256i)INDEX,   \
-					 (__v4di)(__m256i)MASK,	   \
-					 (int)SCALE)
-
-#define _mm_i32gather_epi32(BASE, INDEX, SCALE)				\
-  (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (),	\
-					 (int const *)BASE,		\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__v4si)_mm_set1_epi32 (-1),	\
-					 (int)SCALE)
-
-#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC,	\
-					(int const *)BASE,	\
-					(__v4si)(__m128i)INDEX, \
-					(__v4si)(__m128i)MASK,	\
-					(int)SCALE)
-
-#define _mm256_i32gather_epi32(BASE, INDEX, SCALE)			   \
-  (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
-					 (int const *)BASE,		   \
-					 (__v8si)(__m256i)INDEX,	   \
-					 (__v8si)_mm256_set1_epi32 (-1),   \
-					 (int)SCALE)
-
-#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC,	   \
-					(int const *)BASE,	   \
-					(__v8si)(__m256i)INDEX,	   \
-					(__v8si)(__m256i)MASK,	   \
-					(int)SCALE)
-
-#define _mm_i64gather_epi32(BASE, INDEX, SCALE)				\
-  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (),	\
-					 (int const *)BASE,		\
-					 (__v2di)(__m128i)INDEX,	\
-					 (__v4si)_mm_set1_epi32 (-1),	\
-					 (int)SCALE)
-
-#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC,	\
-					(int const *)BASE,	\
-					(__v2di)(__m128i)INDEX, \
-					(__v4si)(__m128i)MASK,	\
-					(int)SCALE)
-
-#define _mm256_i64gather_epi32(BASE, INDEX, SCALE)			   \
-  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
-					    (int const *)BASE,		   \
-					    (__v4di)(__m256i)INDEX,	   \
-					    (__v4si)_mm_set1_epi32(-1),	   \
-					    (int)SCALE)
-
-#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC,  \
-					   (int const *)BASE,	   \
-					   (__v4di)(__m256i)INDEX, \
-					   (__v4si)(__m128i)MASK,  \
-					   (int)SCALE)
-#endif  /* __OPTIMIZE__ */
-
-#ifdef __DISABLE_AVX2__
-#undef __DISABLE_AVX2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX2__ */
-
-#endif /* _AVX2INTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512bwintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512bwintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,2995 +1,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512BWINTRIN_H_INCLUDED
-#define _AVX512BWINTRIN_H_INCLUDED
-
-#ifndef __AVX512BW__
-#pragma GCC push_options
-#pragma GCC target("avx512bw")
-#define __DISABLE_AVX512BW__
-#endif /* __AVX512BW__ */
-
-/* Internal data types for implementing the intrinsics.  */
-typedef short __v32hi __attribute__ ((__vector_size__ (64)));
-typedef char __v64qi __attribute__ ((__vector_size__ (64)));
-
-typedef unsigned long long __mmask64;
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setzero_qi (void)
-{
-  return __extension__ (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0 };
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setzero_hi (void)
-{
-  return __extension__ (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0,
-					   0, 0, 0, 0, 0, 0, 0, 0 };
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A,
-						    (__v32hi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
-						     (__v32hi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
-						     (__v32hi)
-						     _mm512_setzero_hi (),
-						     (__mmask32) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
-{
-  __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
-				     (__v32hi) __A,
-				     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A,
-						    (__v64qi) __W,
-						    (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A,
-						    (__v64qi)
-						    _mm512_setzero_hi (),
-						    (__mmask64) __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
-  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
-					      (__mmask32) __B);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
-{
-  return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
-					      (__mmask64) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
-						     (__v64qi) __W,
-						     (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
-						     (__v64qi)
-						     _mm512_setzero_hi (),
-						     (__mmask64) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
-{
-  __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
-				     (__v64qi) __A,
-				     (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sad_epu8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
-					     (__v64qi) __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi16_epi8 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
-						  (__v32qi) _mm256_undefined_si256(),
-						  (__mmask32) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
-						  (__v32qi) __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtsepi16_epi8 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
-						   (__v32qi)_mm256_undefined_si256(),
-						   (__mmask32) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
-						   (__v32qi)__O,
-						   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
-						   (__v32qi)
-						   _mm256_setzero_si256 (),
-						   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtusepi16_epi8 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
-						    (__v32qi)_mm256_undefined_si256(),
-						    (__mmask32) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
-						    (__v32qi) __O,
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
-						    (__v32qi)
-						    _mm256_setzero_si256 (),
-						    __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastb_epi8 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
-						       (__v64qi)_mm512_undefined_si512(),
-						       (__mmask64) -
-						       1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
-						       (__v64qi) __O,
-						       __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
-						       (__v64qi)
-						       _mm512_setzero_qi(),
-						       __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
-							   (__v64qi) __O,
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
-							   (__v64qi)
-							   _mm512_setzero_qi(),
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastw_epi16 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
-						       (__v32hi)_mm512_undefined_si512(),
-						       (__mmask32)-1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
-						       (__v32hi) __O,
-						       __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
-						       (__v32hi)
-						       _mm512_setzero_hi(),
-						       __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
-							   (__v32hi) __O,
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
-							   (__v32hi)
-							   _mm512_setzero_hi(),
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mulhrs_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			  __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v32hi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mulhi_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			 __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mulhi_epu16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mulhi_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
-			 __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mullo_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v32hu) __A * (__v32hu) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			 __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi8_epi16 (__m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
-						    (__v32hi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi8_epi16 (__mmask32 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
-						    (__v32hi)
-						    _mm512_setzero_hi(),
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu8_epi16 (__m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
-						    (__v32hi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu8_epi16 (__mmask32 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
-						    (__v32hi)
-						    _mm512_setzero_hi(),
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
-						     (__v32hi) __A,
-						     (__v32hi)
-						     _mm512_setzero_hi (),
-						     (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
-				__m512i __B)
-{
-  return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
-						     (__v32hi) __A,
-						     (__v32hi)
-						     _mm512_setzero_hi(),
-						     (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
-			       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
-						     (__v32hi) __A,
-						     (__v32hi) __W,
-						     (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I
-							/* idx */ ,
-							(__v32hi) __A,
-							(__v32hi) __B,
-							(__mmask32) -
-							1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex2var_epi16 (__m512i __A, __mmask32 __U,
-				__m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I
-							/* idx */ ,
-							(__v32hi) __A,
-							(__v32hi) __B,
-							(__mmask32)
-							__U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask2_permutex2var_epi16 (__m512i __A, __m512i __I,
-				 __mmask32 __U, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
-							(__v32hi) __I
-							/* idx */ ,
-							(__v32hi) __B,
-							(__mmask32)
-							__U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A,
-				 __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varhi512_maskz ((__v32hi) __I
-							 /* idx */ ,
-							 (__v32hi) __A,
-							 (__v32hi) __B,
-							 (__mmask32)
-							 __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_avg_epu8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__v64qi)
-						 _mm512_setzero_qi (),
-						 (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
-		      __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__v64qi) __W,
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__v64qi)
-						 _mm512_setzero_qi(),
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v64qu) __A + (__v64qu) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-		      __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__v64qi) __W,
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__v64qi)
-						 _mm512_setzero_qi (),
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v64qu) __A - (__v64qu) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-		      __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__v64qi) __W,
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__v64qi)
-						 _mm512_setzero_qi (),
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_avg_epu16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__v32hi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi(),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_subs_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi) __W,
-						  (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_subs_epu8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
-						   (__v64qi) __B,
-						   (__v64qi)
-						   _mm512_setzero_qi (),
-						   (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
-						   (__v64qi) __B,
-						   (__v64qi) __W,
-						   (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
-						   (__v64qi) __B,
-						   (__v64qi)
-						   _mm512_setzero_qi (),
-						   (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_adds_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi) __W,
-						  (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_adds_epu8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
-						   (__v64qi) __B,
-						   (__v64qi)
-						   _mm512_setzero_qi (),
-						   (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
-						   (__v64qi) __B,
-						   (__v64qi) __W,
-						   (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
-						   (__v64qi) __B,
-						   (__v64qi)
-						   _mm512_setzero_qi (),
-						   (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v32hu) __A - (__v32hu) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__v32hi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_subs_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_subs_epu16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v32hu) __A + (__v32hu) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__v32hi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_adds_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_adds_epu16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srl_epi16 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srl_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-		       __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srl_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_packs_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v64qi)
-						    _mm512_setzero_qi (),
-						    (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sll_epi16 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sll_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-		       __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sll_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maddubs_epi16 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
-						     (__v64qi) __Y,
-						     (__v32hi)
-						     _mm512_setzero_hi (),
-						     (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_maddubs_epi16 (__m512i __W, __mmask32 __U, __m512i __X,
-			   __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
-						     (__v64qi) __Y,
-						     (__v32hi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_maddubs_epi16 (__mmask32 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
-						     (__v64qi) __Y,
-						     (__v32hi)
-						     _mm512_setzero_hi (),
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_madd_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v16si)
-						   _mm512_setzero_si512 (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_madd_epi16 (__m512i __W, __mmask16 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v16si) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_madd_epi16 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
-						   (__v32hi) __B,
-						   (__v16si)
-						   _mm512_setzero_si512 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpackhi_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__v64qi)
-						     _mm512_setzero_qi (),
-						     (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpackhi_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-			   __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__v64qi) __W,
-						     (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpackhi_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__v64qi)
-						     _mm512_setzero_qi(),
-						     (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpackhi_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__v32hi)
-						     _mm512_setzero_hi (),
-						     (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpackhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			    __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__v32hi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpackhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__v32hi)
-						     _mm512_setzero_hi(),
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpacklo_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__v64qi)
-						     _mm512_setzero_qi (),
-						     (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpacklo_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-			   __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__v64qi) __W,
-						     (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpacklo_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__v64qi)
-						     _mm512_setzero_qi(),
-						     (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpacklo_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__v32hi)
-						     _mm512_setzero_hi (),
-						     (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpacklo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			    __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__v32hi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpacklo_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__v32hi)
-						     _mm512_setzero_hi(),
-						     (__mmask32) __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epu8_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __A,
-						    (__v64qi) __B, 0,
-						    (__mmask64) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_pcmpeqb512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__mmask64) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epu8_mask (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __A,
-						    (__v64qi) __B, 0,
-						    __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_pcmpeqb512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epu16_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __A,
-						    (__v32hi) __B, 0,
-						    (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epi16_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpeqw512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epu16_mask (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __A,
-						    (__v32hi) __B, 0,
-						    __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpeqw512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epu8_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __A,
-						    (__v64qi) __B, 6,
-						    (__mmask64) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epi8_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_pcmpgtb512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     (__mmask64) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epu8_mask (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __A,
-						    (__v64qi) __B, 6,
-						    __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_pcmpgtb512_mask ((__v64qi) __A,
-						     (__v64qi) __B,
-						     __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epu16_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __A,
-						    (__v32hi) __B, 6,
-						    (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epi16_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpgtw512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epu16_mask (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __A,
-						    (__v32hi) __B, 6,
-						    __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpgtw512_mask ((__v32hi) __A,
-						     (__v32hi) __B,
-						     __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movepi8_mask (__m512i __A)
-{
-  return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movepi16_mask (__m512i __A)
-{
-  return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movm_epi8 (__mmask64 __A)
-{
-  return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movm_epi16 (__mmask32 __A)
-{
-  return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_test_epi8_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
-						(__v64qi) __B,
-						(__mmask64) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
-						(__v64qi) __B, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_test_epi16_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
-						(__v32hi) __B,
-						(__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
-						(__v32hi) __B, __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
-						 (__v64qi) __B,
-						 (__mmask64) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
-						 (__v64qi) __B, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
-						 (__v32hi) __B,
-						 (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
-						 (__v32hi) __B, __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-			  __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi) __W,
-						  (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epu16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi(),
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi(),
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epu8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi(),
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
-		      __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi) __W,
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi(),
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
-		      __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi) __W,
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epu8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi(),
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
-		      __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi) __W,
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi (),
-						  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi)
-						  _mm512_setzero_qi(),
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
-		      __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
-						  (__v64qi) __B,
-						  (__v64qi) __W,
-						  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi(),
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epu16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi(),
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
-		       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sra_epi16 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sra_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-		       __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sra_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
-						 (__v8hi) __B,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srav_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srav_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srav_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srlv_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srlv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srlv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sllv_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B)
-{
-  return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sllv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
-						  (__v32hi) __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_packs_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
-			 __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v64qi) __W,
-						    (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_packs_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v64qi)
-						    _mm512_setzero_qi(),
-						    __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_packus_epi16 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v64qi)
-						    _mm512_setzero_qi (),
-						    (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_packus_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
-			  __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v64qi) __W,
-						    (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_packus_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
-						    (__v32hi) __B,
-						    (__v64qi)
-						    _mm512_setzero_qi(),
-						    (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_abs_epi8 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
-						 (__v64qi)
-						 _mm512_setzero_qi (),
-						 (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
-						 (__v64qi) __W,
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
-						 (__v64qi)
-						 _mm512_setzero_qi (),
-						 (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_abs_epi16 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
-						 (__v32hi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
-						 (__v32hi)
-						 _mm512_setzero_hi (),
-						 (__mmask32) __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epu8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 4,
-						   (__mmask64) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epu8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 1,
-						   (__mmask64) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epu8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 5,
-						   (__mmask64) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epu8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 2,
-						   (__mmask64) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epu16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 4,
-						   (__mmask32) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epu16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 1,
-						   (__mmask32) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epu16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 5,
-						   (__mmask32) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epu16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 2,
-						   (__mmask32) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 4,
-						  (__mmask64) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epi8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 1,
-						  (__mmask64) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epi8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 5,
-						  (__mmask64) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epi8_mask (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 2,
-						  (__mmask64) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 4,
-						  (__mmask32) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epi16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 1,
-						  (__mmask32) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epi16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 5,
-						  (__mmask32) __M);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epi16_mask (__mmask32 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 2,
-						  (__mmask32) __M);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epu8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 4,
-						   (__mmask64) - 1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epu8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 1,
-						   (__mmask64) - 1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epu8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 5,
-						   (__mmask64) - 1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epu8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, 2,
-						   (__mmask64) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epu16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 4,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epu16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 1,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epu16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 5,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epu16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, 2,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epi8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 4,
-						  (__mmask64) - 1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epi8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 1,
-						  (__mmask64) - 1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epi8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 5,
-						  (__mmask64) - 1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epi8_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, 2,
-						  (__mmask64) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epi16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 4,
-						  (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epi16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 1,
-						  (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epi16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 5,
-						  (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epi16_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, 2,
-						  (__mmask32) - 1);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_alignr_epi8 (__m512i __A, __m512i __B, const int __N)
-{
-  return (__m512i) __builtin_ia32_palignr512 ((__v8di) __A,
-					      (__v8di) __B, __N * 8);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_alignr_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-			 __m512i __B, const int __N)
-{
-  return (__m512i) __builtin_ia32_palignr512_mask ((__v8di) __A,
-						   (__v8di) __B,
-						   __N * 8,
-						   (__v8di) __W,
-						   (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_alignr_epi8 (__mmask64 __U, __m512i __A, __m512i __B,
-			  const int __N)
-{
-  return (__m512i) __builtin_ia32_palignr512_mask ((__v8di) __A,
-						   (__v8di) __B,
-						   __N * 8,
-						   (__v8di)
-						   _mm512_setzero_si512 (),
-						   (__mmask64) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dbsad_epu8 (__m512i __A, __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
-						    (__v64qi) __B,
-						    __imm,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dbsad_epu8 (__m512i __W, __mmask32 __U, __m512i __A,
-			__m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
-						    (__v64qi) __B,
-						    __imm,
-						    (__v32hi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dbsad_epu8 (__mmask32 __U, __m512i __A, __m512i __B,
-			 const int __imm)
-{
-  return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
-						    (__v64qi) __B,
-						    __imm,
-						    (__v32hi)
-						    _mm512_setzero_hi(),
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srli_epi16 (__m512i __A, const int __imm)
-{
-  return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			const int __imm)
-{
-  return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srli_epi16 (__mmask32 __U, __m512i __A, const int __imm)
-{
-  return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_slli_epi16 (__m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_slli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			const int __B)
-{
-  return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_slli_epi16 (__mmask32 __U, __m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shufflehi_epi16 (__m512i __A, const int __imm)
-{
-  return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
-						   __imm,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shufflehi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			     const int __imm)
-{
-  return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
-						   __imm,
-						   (__v32hi) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shufflehi_epi16 (__mmask32 __U, __m512i __A,
-			      const int __imm)
-{
-  return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
-						   __imm,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shufflelo_epi16 (__m512i __A, const int __imm)
-{
-  return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
-						   __imm,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shufflelo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			     const int __imm)
-{
-  return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
-						   __imm,
-						   (__v32hi) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shufflelo_epi16 (__mmask32 __U, __m512i __A,
-			      const int __imm)
-{
-  return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
-						   __imm,
-						   (__v32hi)
-						   _mm512_setzero_hi (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srai_epi16 (__m512i __A, const int __imm)
-{
-  return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srai_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-			const int __imm)
-{
-  return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
-						  (__v32hi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srai_epi16 (__mmask32 __U, __m512i __A, const int __imm)
-{
-  return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
-						  (__v32hi)
-						  _mm512_setzero_hi (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
-{
-  return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A,
-						    (__v32hi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
-{
-  return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A,
-						    (__v64qi) __W,
-						    (__mmask64) __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi16_mask (__mmask32 __U, __m512i __X, __m512i __Y,
-			    const int __P)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, __P,
-						  (__mmask32) __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epi16_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
-						  (__v32hi) __Y, __P,
-						  (__mmask32) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
-			   const int __P)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, __P,
-						  (__mmask64) __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epi8_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
-						  (__v64qi) __Y, __P,
-						  (__mmask64) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu16_mask (__mmask32 __U, __m512i __X, __m512i __Y,
-			    const int __P)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, __P,
-						   (__mmask32) __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epu16_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
-						   (__v32hi) __Y, __P,
-						   (__mmask32) -1);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
-			   const int __P)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, __P,
-						   (__mmask64) __U);
-}
-
-extern __inline __mmask64
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epu8_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
-						   (__v64qi) __Y, __P,
-						   (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_packs_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
-						    (__v16si) __B,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_packs_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
-						    (__v16si) __B,
-						    (__v32hi)
-						    _mm512_setzero_hi(),
-						    __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_packs_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
-			 __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
-						    (__v16si) __B,
-						    (__v32hi) __W,
-						    __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_packus_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
-						    (__v16si) __B,
-						    (__v32hi)
-						    _mm512_setzero_hi (),
-						    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_packus_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
-						    (__v16si) __B,
-						    (__v32hi)
-						    _mm512_setzero_hi(),
-						    __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
-			  __m512i __B)
-{
-  return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
-						    (__v16si) __B,
-						    (__v32hi) __W,
-						    __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_bslli_epi128 (__m512i __A, const int __N)
-{
-  return (__m512i) __builtin_ia32_pslldq512 (__A, __N * 8);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_bsrli_epi128 (__m512i __A, const int __N)
-{
-  return (__m512i) __builtin_ia32_psrldq512 (__A, __N * 8);
-}
-
-#else
-#define _mm512_alignr_epi8(X, Y, N)						    \
-  ((__m512i) __builtin_ia32_palignr512 ((__v8di)(__m512i)(X),			    \
-					(__v8di)(__m512i)(Y),			    \
-					(int)(N * 8)))
-
-#define _mm512_mask_alignr_epi8(W, U, X, Y, N)					    \
-  ((__m512i) __builtin_ia32_palignr512_mask ((__v8di)(__m512i)(X),		    \
-					    (__v8di)(__m512i)(Y), (int)(N * 8),	    \
-					    (__v8di)(__m512i)(W), (__mmask64)(U)))
-
-#define _mm512_maskz_alignr_epi8(U, X, Y, N)					    \
-  ((__m512i) __builtin_ia32_palignr512_mask ((__v8di)(__m512i)(X),		    \
-					    (__v8di)(__m512i)(Y), (int)(N * 8),	    \
-					    (__v8di)(__m512i)_mm512_setzero_si512 (),   \
-					    (__mmask64)(U)))
-
-#define _mm512_dbsad_epu8(X, Y, C)                                                  \
-  ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X),               \
-                                              (__v64qi)(__m512i) (Y), (int) (C),    \
-                                              (__v32hi)(__m512i)_mm512_setzero_si512 (),\
-                                              (__mmask32)-1))
-
-#define _mm512_mask_dbsad_epu8(W, U, X, Y, C)                                       \
-  ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X),               \
-                                              (__v64qi)(__m512i) (Y), (int) (C),    \
-                                              (__v32hi)(__m512i)(W),                \
-                                              (__mmask32)(U)))
-
-#define _mm512_maskz_dbsad_epu8(U, X, Y, C)                                         \
-  ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X),               \
-                                              (__v64qi)(__m512i) (Y), (int) (C),    \
-                                              (__v32hi)(__m512i)_mm512_setzero_si512 (),\
-                                              (__mmask32)(U)))
-
-#define _mm512_srli_epi16(A, B)                                         \
-  ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A),      \
-    (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)-1))
-
-#define _mm512_mask_srli_epi16(W, U, A, B)                              \
-  ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A),      \
-    (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
-
-#define _mm512_maskz_srli_epi16(U, A, B)                                \
-  ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A),      \
-    (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)(U)))
-
-#define _mm512_slli_epi16(X, C)						   \
-  ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
-    (__v32hi)(__m512i)_mm512_setzero_hi(),\
-    (__mmask32)-1))
-
-#define _mm512_mask_slli_epi16(W, U, X, C)                                 \
-  ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
-    (__v32hi)(__m512i)(W),\
-    (__mmask32)(U)))
-
-#define _mm512_maskz_slli_epi16(U, X, C)                                   \
-  ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
-    (__v32hi)(__m512i)_mm512_setzero_hi(),\
-    (__mmask32)(U)))
-
-#define _mm512_shufflehi_epi16(A, B)                                                \
-  ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B),       \
-                                             (__v32hi)(__m512i)_mm512_setzero_hi(), \
-                                             (__mmask32)-1))
-
-#define _mm512_mask_shufflehi_epi16(W, U, A, B)                                     \
-  ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B),       \
-                                             (__v32hi)(__m512i)(W),                 \
-                                             (__mmask32)(U)))
-
-#define _mm512_maskz_shufflehi_epi16(U, A, B)                                       \
-  ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B),       \
-                                             (__v32hi)(__m512i)_mm512_setzero_hi(), \
-                                             (__mmask32)(U)))
-
-#define _mm512_shufflelo_epi16(A, B)                                                \
-  ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B),       \
-                                             (__v32hi)(__m512i)_mm512_setzero_hi(), \
-                                             (__mmask32)-1))
-
-#define _mm512_mask_shufflelo_epi16(W, U, A, B)                                     \
-  ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B),       \
-                                             (__v32hi)(__m512i)(W),                 \
-                                             (__mmask32)(U)))
-
-#define _mm512_maskz_shufflelo_epi16(U, A, B)                                       \
-  ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B),       \
-                                             (__v32hi)(__m512i)_mm512_setzero_hi(), \
-                                             (__mmask32)(U)))
-
-#define _mm512_srai_epi16(A, B)                                         \
-  ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A),      \
-    (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)-1))
-
-#define _mm512_mask_srai_epi16(W, U, A, B)                              \
-  ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A),      \
-    (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
-
-#define _mm512_maskz_srai_epi16(U, A, B)                                \
-  ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A),      \
-    (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)(U)))
-
-#define _mm512_mask_blend_epi16(__U, __A, __W)			      \
-  ((__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) (__A),	      \
-						    (__v32hi) (__W),  \
-						    (__mmask32) (__U)))
-
-#define _mm512_mask_blend_epi8(__U, __A, __W)			      \
-  ((__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) (__A),	      \
-						    (__v64qi) (__W),  \
-						    (__mmask64) (__U)))
-
-#define _mm512_cmp_epi16_mask(X, Y, P)				\
-  ((__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi)(__m512i)(X),	\
-					    (__v32hi)(__m512i)(Y), (int)(P),\
-					    (__mmask32)(-1)))
-
-#define _mm512_cmp_epi8_mask(X, Y, P)				\
-  ((__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y), (int)(P),\
-					    (__mmask64)(-1)))
-
-#define _mm512_cmp_epu16_mask(X, Y, P)				\
-  ((__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi)(__m512i)(X),	\
-					    (__v32hi)(__m512i)(Y), (int)(P),\
-					    (__mmask32)(-1)))
-
-#define _mm512_cmp_epu8_mask(X, Y, P)				\
-  ((__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y), (int)(P),\
-					    (__mmask64)(-1)))
-
-#define _mm512_mask_cmp_epi16_mask(M, X, Y, P)				\
-  ((__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi)(__m512i)(X),	\
-					    (__v32hi)(__m512i)(Y), (int)(P),\
-					    (__mmask32)(M)))
-
-#define _mm512_mask_cmp_epi8_mask(M, X, Y, P)				\
-  ((__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y), (int)(P),\
-					    (__mmask64)(M)))
-
-#define _mm512_mask_cmp_epu16_mask(M, X, Y, P)				\
-  ((__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi)(__m512i)(X),	\
-					    (__v32hi)(__m512i)(Y), (int)(P),\
-					    (__mmask32)(M)))
-
-#define _mm512_mask_cmp_epu8_mask(M, X, Y, P)				\
-  ((__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y), (int)(P),\
-					    (__mmask64)(M)))
-
-#define _mm512_bslli_epi128(A, N)                                         \
-  ((__m512i)__builtin_ia32_pslldq512 ((__m512i)(A), (int)(N) * 8))
-
-#define _mm512_bsrli_epi128(A, N)                                         \
-  ((__m512i)__builtin_ia32_psrldq512 ((__m512i)(A), (int)(N) * 8))
-
-#endif
-
-#ifdef __DISABLE_AVX512BW__
-#undef __DISABLE_AVX512BW__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BW__ */
-
-#endif /* _AVX512BWINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512cdintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512cdintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,184 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512CDINTRIN_H_INCLUDED
-#define _AVX512CDINTRIN_H_INCLUDED
-
-#ifndef __AVX512CD__
-#pragma GCC push_options
-#pragma GCC target("avx512cd")
-#define __DISABLE_AVX512CD__
-#endif /* __AVX512CD__ */
-
-/* Internal data types for implementing the intrinsics.  */
-typedef long long __v8di __attribute__ ((__vector_size__ (64)));
-typedef int __v16si __attribute__ ((__vector_size__ (64)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
-typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
-
-typedef unsigned char  __mmask8;
-typedef unsigned short __mmask16;
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_conflict_epi32 (__m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
-					       (__v16si) _mm512_setzero_si512 (),
-					       (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
-							 (__v16si) __W,
-							 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
-					       (__v16si) _mm512_setzero_si512 (),
-					       (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_conflict_epi64 (__m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
-					       (__v8di) _mm512_setzero_si512 (),
-					       (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
-							 (__v8di) __W,
-							 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
-					       (__v8di) _mm512_setzero_si512 (),
-					       (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_lzcnt_epi64 (__m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
-					   (__v8di) _mm512_setzero_si512 (),
-					   (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
-					   (__v8di) _mm512_setzero_si512 (),
-					   (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_lzcnt_epi32 (__m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
-					   (__v16si) _mm512_setzero_si512 (),
-					   (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
-						     (__v16si) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i)
-	 __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
-					   (__v16si) _mm512_setzero_si512 (),
-					   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastmb_epi64 (__mmask8 __A)
-{
-  return (__m512i) __builtin_ia32_broadcastmb512 (__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastmw_epi32 (__mmask16 __A)
-{
-  return (__m512i) __builtin_ia32_broadcastmw512 (__A);
-}
-
-#ifdef __DISABLE_AVX512CD__
-#undef __DISABLE_AVX512CD__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512CD__ */
-
-#endif /* _AVX512CDINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512dqintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512dqintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,2301 +1,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512DQINTRIN_H_INCLUDED
-#define _AVX512DQINTRIN_H_INCLUDED
-
-#ifndef __AVX512DQ__
-#pragma GCC push_options
-#pragma GCC target("avx512dq")
-#define __DISABLE_AVX512DQ__
-#endif /* __AVX512DQ__ */
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_f64x2 (__m128d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
-							   __A,
-							   _mm512_undefined_pd(),
-							   (__mmask8) -
-							   1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
-							   __A,
-							   (__v8df)
-							   __O, __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
-							   __A,
-							   (__v8df)
-							   _mm512_setzero_ps (),
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_i64x2 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
-							   __A,
-							   _mm512_undefined_si512(),
-							   (__mmask8) -
-							   1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
-							   __A,
-							   (__v8di)
-							   __O, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
-							   __A,
-							   (__v8di)
-							   _mm512_setzero_si512 (),
-							   __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_f32x2 (__m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
-							  (__v16sf)_mm512_undefined_ps(),
-							  (__mmask16) -
-							  1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
-							  (__v16sf)
-							  __O, __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
-							  (__v16sf)
-							  _mm512_setzero_ps (),
-							  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_i32x2 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
-							   __A,
-							   (__v16si)_mm512_undefined_si512(),
-							   (__mmask16)
-							   -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
-							   __A,
-							   (__v16si)
-							   __O, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
-							   __A,
-							   (__v16si)
-							   _mm512_setzero_si512 (),
-							   __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_f32x8 (__m256 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
-							  _mm512_undefined_ps(),
-							  (__mmask16) -
-							  1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
-							  (__v16sf)__O,
-							  __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
-							  (__v16sf)
-							  _mm512_setzero_ps (),
-							  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_i32x8 (__m256i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
-							   __A,
-							   (__v16si)_mm512_undefined_si512(),
-							   (__mmask16)
-							   -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
-							   __A,
-							   (__v16si)__O,
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
-							   __A,
-							   (__v16si)
-							   _mm512_setzero_si512 (),
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mullo_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v8du) __A * (__v8du) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
-			 __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_xor_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
-		    __m512d __B)
-{
-  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_xor_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_or_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
-						(__v8df) __B,
-						(__v8df)
-						_mm512_setzero_pd (),
-						(__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
-						(__v8df) __B,
-						(__v8df) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
-						(__v8df) __B,
-						(__v8df)
-						_mm512_setzero_pd (),
-						(__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_or_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
-					       (__v16sf) __B,
-					       (__v16sf)
-					       _mm512_setzero_ps (),
-					       (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
-					       (__v16sf) __B,
-					       (__v16sf) __W,
-					       (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
-					       (__v16sf) __B,
-					       (__v16sf)
-					       _mm512_setzero_ps (),
-					       (__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_and_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
-		    __m512d __B)
-{
-  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_and_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_andnot_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
-						  (__v8df) __B,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
-		       __m512d __B)
-{
-  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
-						  (__v8df) __B,
-						  (__v8df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
-						  (__v8df) __B,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_andnot_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
-						 (__v16sf) __B,
-						 (__v16sf)
-						 _mm512_setzero_ps (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
-		       __m512 __B)
-{
-  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
-						 (__v16sf) __B,
-						 (__v16sf) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
-						 (__v16sf) __B,
-						 (__v16sf)
-						 _mm512_setzero_ps (),
-						 (__mmask16) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movepi32_mask (__m512i __A)
-{
-  return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movepi64_mask (__m512i __A)
-{
-  return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movm_epi32 (__mmask16 __A)
-{
-  return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movm_epi64 (__mmask8 __A)
-{
-  return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttpd_epi64 (__m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttpd_epu64 (__m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) -1,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
-						      (__v8di) __W,
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttps_epi64 (__m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttps_epu64 (__m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) -1,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
-						      (__v8di) __W,
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtpd_epi64 (__m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtpd_epu64 (__m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtps_epi64 (__m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtps_epu64 (__m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi64_ps (__m512i __A)
-{
-  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
-{
-  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
-{
-  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu64_ps (__m512i __A)
-{
-  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
-{
-  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
-						    (__v8sf) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
-{
-  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi64_pd (__m512i __A)
-{
-  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
-{
-  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu64_pd (__m512i __A)
-{
-  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
-						     (__v8df) __W,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
-{
-  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_range_pd (__m512d __A, __m512d __B, int __C)
-{
-  return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
-						   (__v8df) __B, __C,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_range_pd (__m512d __W, __mmask8 __U,
-		      __m512d __A, __m512d __B, int __C)
-{
-  return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
-						   (__v8df) __B, __C,
-						   (__v8df) __W,
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
-{
-  return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
-						   (__v8df) __B, __C,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_range_ps (__m512 __A, __m512 __B, int __C)
-{
-  return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
-						  (__v16sf) __B, __C,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) -1,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_range_ps (__m512 __W, __mmask16 __U,
-		      __m512 __A, __m512 __B, int __C)
-{
-  return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
-						  (__v16sf) __B, __C,
-						  (__v16sf) __W,
-						  (__mmask16) __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
-{
-  return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
-						  (__v16sf) __B, __C,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_reduce_sd (__m128d __A, __m128d __B, int __C)
-{
-  return (__m128d) __builtin_ia32_reducesd ((__v2df) __A,
-						 (__v2df) __B, __C);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_reduce_ss (__m128 __A, __m128 __B, int __C)
-{
-  return (__m128) __builtin_ia32_reducess ((__v4sf) __A,
-						(__v4sf) __B, __C);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_range_sd (__m128d __A, __m128d __B, int __C)
-{
-  return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
-						   (__v2df) __B, __C,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_range_ss (__m128 __A, __m128 __B, int __C)
-{
-  return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
-						  (__v4sf) __B, __C,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
-{
-  return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
-						   (__v2df) __B, __C,
-						   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
-{
-  return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
-						  (__v4sf) __B, __C,
-						  __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fpclass_ss_mask (__m128 __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fpclass_sd_mask (__m128d __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
-						      (__v8di) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
-						      (__v8di) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) -1,
-						    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) -1,
-						    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) -1,
-						   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
-			       const int __R)
-{
-  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U,
-						   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
-				const int __R)
-{
-  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U,
-						   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) -1,
-						    __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
-			       const int __R)
-{
-  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
-						    (__v8sf) __W,
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
-				const int __R)
-{
-  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) -1,
-						    __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
-			       const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
-				const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U,
-						    __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
-			       const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
-						     (__v8df) __W,
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
-				const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_pd (__m512d __A, int __B)
-{
-  return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
-{
-  return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
-						    (__v8df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
-{
-  return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_ps (__m512 __A, int __B)
-{
-  return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
-{
-  return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
-						   (__v16sf) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
-{
-  return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extractf32x8_ps (__m512 __A, const int __imm)
-{
-  return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
-						    __imm,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
-			     const int __imm)
-{
-  return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
-						    __imm,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
-			      const int __imm)
-{
-  return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
-						    __imm,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extractf64x2_pd (__m512d __A, const int __imm)
-{
-  return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
-							 __imm,
-							 (__v2df)
-							 _mm_setzero_pd (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
-			     const int __imm)
-{
-  return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
-							 __imm,
-							 (__v2df) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
-			      const int __imm)
-{
-  return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
-							 __imm,
-							 (__v2df)
-							 _mm_setzero_pd (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
-						     __imm,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
-				const int __imm)
-{
-  return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
-						     __imm,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
-				 const int __imm)
-{
-  return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
-						     __imm,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
-							 __imm,
-							 (__v2di)
-							 _mm_setzero_di (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
-				const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
-							 __imm,
-							 (__v2di) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
-				 const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
-							 __imm,
-							 (__v2di)
-							 _mm_setzero_di (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
-		       const int __R)
-{
-  return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
-						   (__v8df) __B, __C,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) -1,
-						   __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
-			    __m512d __A, __m512d __B, int __C,
-			    const int __R)
-{
-  return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
-						   (__v8df) __B, __C,
-						   (__v8df) __W,
-						   (__mmask8) __U,
-						   __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			     int __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
-						   (__v8df) __B, __C,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __U,
-						   __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
-						  (__v16sf) __B, __C,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) -1,
-						  __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
-			    __m512 __A, __m512 __B, int __C,
-			    const int __R)
-{
-  return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
-						  (__v16sf) __B, __C,
-						  (__v16sf) __W,
-						  (__mmask16) __U,
-						  __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			     int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
-						  (__v16sf) __B, __C,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __U,
-						  __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
-						    (__v8si) __B,
-						    __imm,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
-			 __m256i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
-						    (__v8si) __B,
-						    __imm,
-						    (__v16si) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
-			  const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
-						    (__v8si) __B,
-						    __imm,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
-{
-  return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
-						   (__v8sf) __B,
-						   __imm,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
-			 __m256 __B, const int __imm)
-{
-  return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
-						   (__v8sf) __B,
-						   __imm,
-						   (__v16sf) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
-			  const int __imm)
-{
-  return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
-						   (__v8sf) __B,
-						   __imm,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
-							(__v2di) __B,
-							__imm,
-							(__v8di)
-							_mm512_setzero_si512 (),
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
-			 __m128i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
-							(__v2di) __B,
-							__imm,
-							(__v8di) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
-			  const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
-							(__v2di) __B,
-							__imm,
-							(__v8di)
-							_mm512_setzero_si512 (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
-{
-  return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
-							(__v2df) __B,
-							__imm,
-							(__v8df)
-							_mm512_setzero_pd (),
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
-			 __m128d __B, const int __imm)
-{
-  return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
-							(__v2df) __B,
-							__imm,
-							(__v8df) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
-			  const int __imm)
-{
-  return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
-							(__v2df) __B,
-							__imm,
-							(__v8df)
-							_mm512_setzero_pd (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
-			     const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
-						      __imm, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fpclass_pd_mask (__m512d __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
-						      __imm,
-						      (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
-			     const int __imm)
-{
-  return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
-						       __imm, __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fpclass_ps_mask (__m512 __A, const int __imm)
-{
-  return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
-						       __imm,
-						       (__mmask16) -
-						       1);
-}
-
-#else
-#define _mm_range_sd(A, B, C)						\
-  ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C),					\
-    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_range_ss(A, B, C)						\
-  ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C),					\
-    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_range_round_sd(A, B, C, R)					\
-  ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C), (R)))
-
-#define _mm_range_round_ss(A, B, C, R)					\
-  ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C), (R)))
-
-#define _mm512_cvtt_roundpd_epi64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvtt_roundpd_epi64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvtt_roundpd_epu64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvtt_roundpd_epu64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvtt_roundps_epi64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvtt_roundps_epi64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvtt_roundps_epi64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvtt_roundps_epu64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvtt_roundps_epu64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvtt_roundps_epu64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvt_roundpd_epi64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvt_roundpd_epi64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundpd_epi64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvt_roundpd_epu64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvt_roundpd_epu64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundpd_epu64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvt_roundps_epi64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvt_roundps_epi64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundps_epi64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvt_roundps_epu64(A, B)		    \
-    ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
-
-#define _mm512_mask_cvt_roundps_epu64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)(W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundps_epu64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
-
-#define _mm512_cvt_roundepi64_ps(A, B)		    \
-    ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
-
-#define _mm512_mask_cvt_roundepi64_ps(W, U, A, B)   \
-    ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundepi64_ps(U, A, B)     \
-    ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
-
-#define _mm512_cvt_roundepu64_ps(A, B)		    \
-    ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
-
-#define _mm512_mask_cvt_roundepu64_ps(W, U, A, B)   \
-    ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundepu64_ps(U, A, B)     \
-    ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
-
-#define _mm512_cvt_roundepi64_pd(A, B)		    \
-    ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
-
-#define _mm512_mask_cvt_roundepi64_pd(W, U, A, B)   \
-    ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundepi64_pd(U, A, B)     \
-    ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
-
-#define _mm512_cvt_roundepu64_pd(A, B)		    \
-    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
-
-#define _mm512_mask_cvt_roundepu64_pd(W, U, A, B)   \
-    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (W), (U), (B)))
-
-#define _mm512_maskz_cvt_roundepu64_pd(U, A, B)     \
-    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
-
-#define _mm512_reduce_pd(A, B)						\
-  ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),	\
-    (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)-1))
-
-#define _mm512_mask_reduce_pd(W, U, A, B)				\
-  ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),	\
-    (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
-
-#define _mm512_maskz_reduce_pd(U, A, B)					\
-  ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),	\
-    (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)(U)))
-
-#define _mm512_reduce_ps(A, B)						\
-  ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),	\
-    (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)-1))
-
-#define _mm512_mask_reduce_ps(W, U, A, B)				\
-  ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),	\
-    (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
-
-#define _mm512_maskz_reduce_ps(U, A, B)					\
-  ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),	\
-    (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U)))
-
-#define _mm512_extractf32x8_ps(X, C)                                    \
-  ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
-    (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8)-1))
-
-#define _mm512_mask_extractf32x8_ps(W, U, X, C)                         \
-  ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
-    (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
-
-#define _mm512_maskz_extractf32x8_ps(U, X, C)                           \
-  ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
-    (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8) (U)))
-
-#define _mm512_extractf64x2_pd(X, C)                                    \
-  ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
-
-#define _mm512_mask_extractf64x2_pd(W, U, X, C)                         \
-  ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
-
-#define _mm512_maskz_extractf64x2_pd(U, X, C)                           \
-  ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
-
-#define _mm512_extracti32x8_epi32(X, C)                                 \
-  ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
-    (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8)-1))
-
-#define _mm512_mask_extracti32x8_epi32(W, U, X, C)                      \
-  ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
-    (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
-
-#define _mm512_maskz_extracti32x8_epi32(U, X, C)                        \
-  ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
-    (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8) (U)))
-
-#define _mm512_extracti64x2_epi64(X, C)                                 \
-  ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
-
-#define _mm512_mask_extracti64x2_epi64(W, U, X, C)                      \
-  ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
-
-#define _mm512_maskz_extracti64x2_epi64(U, X, C)                        \
-  ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
-
-#define _mm512_range_pd(A, B, C)					\
-  ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),	\
-    (__v8df)(__m512d)(B), (int)(C),					\
-    (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_range_pd(W, U, A, B, C)				\
-  ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),	\
-    (__v8df)(__m512d)(B), (int)(C),					\
-    (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_range_pd(U, A, B, C)				\
-  ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),	\
-    (__v8df)(__m512d)(B), (int)(C),					\
-    (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_range_ps(A, B, C)					\
-  ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),	\
-    (__v16sf)(__m512)(B), (int)(C),					\
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_range_ps(W, U, A, B, C)				\
-  ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),	\
-    (__v16sf)(__m512)(B), (int)(C),					\
-    (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_range_ps(U, A, B, C)				\
-  ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),	\
-    (__v16sf)(__m512)(B), (int)(C),					\
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_range_round_pd(A, B, C, R)					\
-  ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),	\
-    (__v8df)(__m512d)(B), (int)(C),					\
-    (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R)))
-
-#define _mm512_mask_range_round_pd(W, U, A, B, C, R)				\
-  ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),	\
-    (__v8df)(__m512d)(B), (int)(C),					\
-    (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
-
-#define _mm512_maskz_range_round_pd(U, A, B, C, R)				\
-  ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),	\
-    (__v8df)(__m512d)(B), (int)(C),					\
-    (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R)))
-
-#define _mm512_range_round_ps(A, B, C, R)					\
-  ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),	\
-    (__v16sf)(__m512)(B), (int)(C),					\
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R)))
-
-#define _mm512_mask_range_round_ps(W, U, A, B, C, R)				\
-  ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),	\
-    (__v16sf)(__m512)(B), (int)(C),					\
-    (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
-
-#define _mm512_maskz_range_round_ps(U, A, B, C, R)				\
-  ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),	\
-    (__v16sf)(__m512)(B), (int)(C),					\
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R)))
-
-#define _mm512_insertf64x2(X, Y, C)                                     \
-  ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X),            \
-    (__mmask8)-1))
-
-#define _mm512_mask_insertf64x2(W, U, X, Y, C)                          \
-  ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W),            \
-    (__mmask8) (U)))
-
-#define _mm512_maskz_insertf64x2(U, X, Y, C)                            \
-  ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (__v2df)(__m128d) (Y), (int) (C),                                   \
-    (__v8df)(__m512d) _mm512_setzero_pd(), (__mmask8) (U)))
-
-#define _mm512_inserti64x2(X, Y, C)                                     \
-  ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
-
-#define _mm512_mask_inserti64x2(W, U, X, Y, C)                          \
-  ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W),            \
-    (__mmask8) (U)))
-
-#define _mm512_maskz_inserti64x2(U, X, Y, C)                            \
-  ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (__v2di)(__m128i) (Y), (int) (C),                                   \
-    (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
-
-#define _mm512_insertf32x8(X, Y, C)                                     \
-  ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
-    (__v8sf)(__m256) (Y), (int) (C),\
-    (__v16sf)(__m512)_mm512_setzero_ps(),\
-    (__mmask16)-1))
-
-#define _mm512_mask_insertf32x8(W, U, X, Y, C)                          \
-  ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
-    (__v8sf)(__m256) (Y), (int) (C),\
-    (__v16sf)(__m512)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_insertf32x8(U, X, Y, C)                            \
-  ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
-    (__v8sf)(__m256) (Y), (int) (C),\
-    (__v16sf)(__m512)_mm512_setzero_ps(),\
-    (__mmask16)(U)))
-
-#define _mm512_inserti32x8(X, Y, C)                                     \
-  ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X),   \
-    (__v8si)(__m256i) (Y), (int) (C),\
-    (__v16si)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask16)-1))
-
-#define _mm512_mask_inserti32x8(W, U, X, Y, C)                          \
-  ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X),   \
-    (__v8si)(__m256i) (Y), (int) (C),\
-    (__v16si)(__m512i)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_inserti32x8(U, X, Y, C)                            \
-  ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X),   \
-    (__v8si)(__m256i) (Y), (int) (C),\
-    (__v16si)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask16)(U)))
-
-#define _mm_fpclass_ss_mask(X, C)						\
-  ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C)))  \
-
-#define _mm_fpclass_sd_mask(X, C)						\
-  ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
-
-#define _mm512_mask_fpclass_pd_mask(u, X, C)                            \
-  ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
-						(int) (C), (__mmask8)(u)))
-
-#define _mm512_mask_fpclass_ps_mask(u, x, c)				\
-  ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
-						 (int) (c),(__mmask8)(u)))
-
-#define _mm512_fpclass_pd_mask(X, C)                                    \
-  ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
-						(int) (C), (__mmask8)-1))
-
-#define _mm512_fpclass_ps_mask(x, c)                                    \
-  ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
-						 (int) (c),(__mmask8)-1))
-
-#define _mm_reduce_sd(A, B, C)						\
-  ((__m128d) __builtin_ia32_reducesd ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C)))					\
-
-#define _mm_reduce_ss(A, B, C)						\
-  ((__m128) __builtin_ia32_reducess ((__v4sf)(__m128)(A),		\
-    (__v4sf)(__m128)(A), (int)(C)))					\
-
-#endif
-
-#ifdef __DISABLE_AVX512DQ__
-#undef __DISABLE_AVX512DQ__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ__ */
-
-#endif /* _AVX512DQINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512erintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512erintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,394 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512ERINTRIN_H_INCLUDED
-#define _AVX512ERINTRIN_H_INCLUDED
-
-#ifndef __AVX512ER__
-#pragma GCC push_options
-#pragma GCC target("avx512er")
-#define __DISABLE_AVX512ER__
-#endif /* __AVX512ER__ */
-
-/* Internal data types for implementing the intrinsics.  */
-typedef double __v8df __attribute__ ((__vector_size__ (64)));
-typedef float __v16sf __attribute__ ((__vector_size__ (64)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
-typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
-
-typedef unsigned char  __mmask8;
-typedef unsigned short __mmask16;
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_exp2a23_round_pd (__m512d __A, int __R)
-{
-  __m512d __W;
-  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
-					       (__v8df) __W,
-					       (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
-{
-  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
-					       (__v8df) __W,
-					       (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
-{
-  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
-					       (__v8df) _mm512_setzero_pd (),
-					       (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_exp2a23_round_ps (__m512 __A, int __R)
-{
-  __m512 __W;
-  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
-					      (__v16sf) __W,
-					      (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
-{
-  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
-					      (__v16sf) __W,
-					      (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
-{
-  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
-					      (__v16sf) _mm512_setzero_ps (),
-					      (__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp28_round_pd (__m512d __A, int __R)
-{
-  __m512d __W;
-  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
-						(__v8df) __W,
-						(__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
-{
-  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
-						(__v8df) __W,
-						(__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
-{
-  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
-						(__v8df) _mm512_setzero_pd (),
-						(__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp28_round_ps (__m512 __A, int __R)
-{
-  __m512 __W;
-  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
-					       (__v16sf) __W,
-					       (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
-{
-  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
-					       (__v16sf) __W,
-					       (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
-{
-  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
-					       (__v16sf) _mm512_setzero_ps (),
-					       (__mmask16) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
-{
-  return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
-						 (__v2df) __A,
-						 __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
-{
-  return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
-						(__v4sf) __A,
-						__R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt28_round_pd (__m512d __A, int __R)
-{
-  __m512d __W;
-  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
-						  (__v8df) __W,
-						  (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
-{
-  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
-						  (__v8df) __W,
-						  (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
-{
-  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
-						  (__v8df) _mm512_setzero_pd (),
-						  (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt28_round_ps (__m512 __A, int __R)
-{
-  __m512 __W;
-  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
-						 (__v16sf) __W,
-						 (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
-{
-  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
-						 (__v16sf) __W,
-						 (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
-{
-  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
-						 (__v16sf) _mm512_setzero_ps (),
-						 (__mmask16) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
-{
-  return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
-						   (__v2df) __A,
-						   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
-{
-  return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
-						  (__v4sf) __A,
-						  __R);
-}
-
-#else
-#define _mm512_exp2a23_round_pd(A, C)            \
-    __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
-
-#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
-    __builtin_ia32_exp2pd_mask(A, W, U, C)
-
-#define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
-    __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_exp2a23_round_ps(A, C)            \
-    __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
-
-#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
-    __builtin_ia32_exp2ps_mask(A, W, U, C)
-
-#define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
-    __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm512_rcp28_round_pd(A, C)            \
-    __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
-
-#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
-    __builtin_ia32_rcp28pd_mask(A, W, U, C)
-
-#define _mm512_maskz_rcp28_round_pd(U, A, C)   \
-    __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_rcp28_round_ps(A, C)            \
-    __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
-
-#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
-    __builtin_ia32_rcp28ps_mask(A, W, U, C)
-
-#define _mm512_maskz_rcp28_round_ps(U, A, C)   \
-    __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm512_rsqrt28_round_pd(A, C)            \
-    __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
-
-#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
-    __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
-
-#define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
-    __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_rsqrt28_round_ps(A, C)            \
-    __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
-
-#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
-    __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
-
-#define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
-    __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm_rcp28_round_sd(A, B, R)	\
-    __builtin_ia32_rcp28sd_round(A, B, R)
-
-#define _mm_rcp28_round_ss(A, B, R)	\
-    __builtin_ia32_rcp28ss_round(A, B, R)
-
-#define _mm_rsqrt28_round_sd(A, B, R)	\
-    __builtin_ia32_rsqrt28sd_round(A, B, R)
-
-#define _mm_rsqrt28_round_ss(A, B, R)	\
-    __builtin_ia32_rsqrt28ss_round(A, B, R)
-
-#endif
-
-#define _mm512_exp2a23_pd(A)                    \
-    _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_pd(W, U, A)   \
-    _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_pd(U, A)     \
-    _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_exp2a23_ps(A)                    \
-    _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_ps(W, U, A)   \
-    _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_ps(U, A)     \
-    _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rcp28_pd(A)                    \
-    _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_pd(W, U, A)   \
-    _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_pd(U, A)     \
-    _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rcp28_ps(A)                    \
-    _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_ps(W, U, A)   \
-    _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_ps(U, A)     \
-    _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rsqrt28_pd(A)                    \
-    _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_pd(W, U, A)   \
-    _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_pd(U, A)     \
-    _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rsqrt28_ps(A)                    \
-    _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_ps(W, U, A)   \
-    _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_ps(U, A)     \
-    _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rcp28_sd(A, B)	\
-    __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rcp28_ss(A, B)	\
-    __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_sd(A, B)	\
-    __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_ss(A, B)	\
-    __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
-
-#ifdef __DISABLE_AVX512ER__
-#undef __DISABLE_AVX512ER__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512ER__ */
-
-#endif /* _AVX512ERINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512fintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512fintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,13142 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512FINTRIN_H_INCLUDED
-#define _AVX512FINTRIN_H_INCLUDED
-
-#ifndef __AVX512F__
-#pragma GCC push_options
-#pragma GCC target("avx512f")
-#define __DISABLE_AVX512F__
-#endif /* __AVX512F__ */
-
-/* Internal data types for implementing the intrinsics.  */
-typedef double __v8df __attribute__ ((__vector_size__ (64)));
-typedef float __v16sf __attribute__ ((__vector_size__ (64)));
-typedef long long __v8di __attribute__ ((__vector_size__ (64)));
-typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
-typedef int __v16si __attribute__ ((__vector_size__ (64)));
-typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
-typedef short __v32hi __attribute__ ((__vector_size__ (64)));
-typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
-typedef char __v64qi __attribute__ ((__vector_size__ (64)));
-typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
-typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
-typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
-
-typedef unsigned char  __mmask8;
-typedef unsigned short __mmask16;
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set_epi64 (long long __A, long long __B, long long __C,
-		  long long __D, long long __E, long long __F,
-		  long long __G, long long __H)
-{
-  return __extension__ (__m512i) (__v8di)
-	 { __H, __G, __F, __E, __D, __C, __B, __A };
-}
-
-/* Create the vector [A B C D E F G H I J K L M N O P].  */
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set_epi32 (int __A, int __B, int __C, int __D,
-		  int __E, int __F, int __G, int __H,
-		  int __I, int __J, int __K, int __L,
-		  int __M, int __N, int __O, int __P)
-{
-  return __extension__ (__m512i)(__v16si)
-	 { __P, __O, __N, __M, __L, __K, __J, __I,
-	   __H, __G, __F, __E, __D, __C, __B, __A };
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set_pd (double __A, double __B, double __C, double __D,
-	       double __E, double __F, double __G, double __H)
-{
-  return __extension__ (__m512d)
-	 { __H, __G, __F, __E, __D, __C, __B, __A };
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set_ps (float __A, float __B, float __C, float __D,
-	       float __E, float __F, float __G, float __H,
-	       float __I, float __J, float __K, float __L,
-	       float __M, float __N, float __O, float __P)
-{
-  return __extension__ (__m512)
-	 { __P, __O, __N, __M, __L, __K, __J, __I,
-	   __H, __G, __F, __E, __D, __C, __B, __A };
-}
-
-#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)			      \
-  _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
-
-#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,			      \
-			  e8,e9,e10,e11,e12,e13,e14,e15)		      \
-  _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
-
-#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)				      \
-  _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
-
-#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
-  _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_undefined_ps (void)
-{
-  __m512 __Y = __Y;
-  return __Y;
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_undefined_pd (void)
-{
-  __m512d __Y = __Y;
-  return __Y;
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_undefined_si512 (void)
-{
-  __m512i __Y = __Y;
-  return __Y;
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set1_epi8 (char __A)
-{
-  return __extension__ (__m512i)(__v64qi)
-	 { __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A };
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set1_epi16 (short __A)
-{
-  return __extension__ (__m512i)(__v32hi)
-	 { __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A,
-	   __A, __A, __A, __A, __A, __A, __A, __A };
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set1_pd (double __A)
-{
-  return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
-						  (__v2df) { __A, },
-						  (__v8df)
-						  _mm512_undefined_pd (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set1_ps (float __A)
-{
-  return (__m512) __builtin_ia32_broadcastss512 (__extension__
-						 (__v4sf) { __A, },
-						 (__v16sf)
-						 _mm512_undefined_ps (),
-						 (__mmask16) -1);
-}
-
-/* Create the vector [A B C D A B C D A B C D A B C D].  */
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
-{
-  return __extension__ (__m512i)(__v16si)
-	 { __D, __C, __B, __A, __D, __C, __B, __A,
-	   __D, __C, __B, __A, __D, __C, __B, __A };
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set4_epi64 (long long __A, long long __B, long long __C,
-		   long long __D)
-{
-  return __extension__ (__m512i) (__v8di)
-	 { __D, __C, __B, __A, __D, __C, __B, __A };
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set4_pd (double __A, double __B, double __C, double __D)
-{
-  return __extension__ (__m512d)
-	 { __D, __C, __B, __A, __D, __C, __B, __A };
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set4_ps (float __A, float __B, float __C, float __D)
-{
-  return __extension__ (__m512)
-	 { __D, __C, __B, __A, __D, __C, __B, __A,
-	   __D, __C, __B, __A, __D, __C, __B, __A };
-}
-
-#define _mm512_setr4_epi64(e0,e1,e2,e3)					      \
-  _mm512_set4_epi64(e3,e2,e1,e0)
-
-#define _mm512_setr4_epi32(e0,e1,e2,e3)					      \
-  _mm512_set4_epi32(e3,e2,e1,e0)
-
-#define _mm512_setr4_pd(e0,e1,e2,e3)					      \
-  _mm512_set4_pd(e3,e2,e1,e0)
-
-#define _mm512_setr4_ps(e0,e1,e2,e3)					      \
-  _mm512_set4_ps(e3,e2,e1,e0)
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setzero_ps (void)
-{
-  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-				 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setzero_pd (void)
-{
-  return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setzero_epi32 (void)
-{
-  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setzero_si512 (void)
-{
-  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
-						  (__v8df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
-						 (__v16sf) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
-						 (__v16sf)
-						 _mm512_setzero_ps (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_load_pd (void const *__P)
-{
-  return *(__m512d *) __P;
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
-{
-  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
-						   (__v8df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
-{
-  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_store_pd (void *__P, __m512d __A)
-{
-  *(__m512d *) __P = __A;
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
-{
-  __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_load_ps (void const *__P)
-{
-  return *(__m512 *) __P;
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
-						  (__v16sf) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_store_ps (void *__P, __m512 __A)
-{
-  *(__m512 *) __P = __A;
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
-{
-  __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
-				   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
-						     (__v8di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_load_epi64 (void const *__P)
-{
-  return *(__m512i *) __P;
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
-							(__v8di) __W,
-							(__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
-							(__v8di)
-							_mm512_setzero_si512 (),
-							(__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_store_epi64 (void *__P, __m512i __A)
-{
-  *(__m512i *) __P = __A;
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
-{
-  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
-					(__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
-						     (__v16si) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_load_si512 (void const *__P)
-{
-  return *(__m512i *) __P;
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_load_epi32 (void const *__P)
-{
-  return *(__m512i *) __P;
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
-							(__v16si) __W,
-							(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
-							(__v16si)
-							_mm512_setzero_si512 (),
-							(__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_store_si512 (void *__P, __m512i __A)
-{
-  *(__m512i *) __P = __A;
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_store_epi32 (void *__P, __m512i __A)
-{
-  *(__m512i *) __P = __A;
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
-{
-  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mullo_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A * (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srav_epi32 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v8du) __A + (__v8du) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v8du) __A - (__v8du) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srav_epi64 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
-						 (__v8di) __Y,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A + (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_epi32 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v8di) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
-						  (__v16si) __Y,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A - (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_epu32 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
-						   (__v16si) __Y,
-						   (__v8di)
-						   _mm512_undefined_si512 (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
-						   (__v16si) __Y,
-						   (__v8di) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
-						   (__v16si) __Y,
-						   (__v8di)
-						   _mm512_setzero_si512 (),
-						   __M);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_slli_epi64 (__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
-			unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
-						  (__v8di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-#else
-#define _mm512_slli_epi64(X, C)						   \
-  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask8)-1))
-
-#define _mm512_mask_slli_epi64(W, U, X, C)				   \
-  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_slli_epi64(U, X, C)                                   \
-  ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask8)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sll_epi64 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srli_epi64 (__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
-			__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
-						  (__v8di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-#else
-#define _mm512_srli_epi64(X, C)						   \
-  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask8)-1))
-
-#define _mm512_mask_srli_epi64(W, U, X, C)				   \
-  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_srli_epi64(U, X, C)                                   \
-  ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask8)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srl_epi64 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srai_epi64 (__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
-			unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
-						  (__v8di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-#else
-#define _mm512_srai_epi64(X, C)						   \
-  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask8)-1))
-
-#define _mm512_mask_srai_epi64(W, U, X, C)				   \
-  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_srai_epi64(U, X, C)				   \
-  ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
-    (__v8di)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask8)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sra_epi64 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
-						 (__v2di) __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_slli_epi32 (__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-			unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-#else
-#define _mm512_slli_epi32(X, C)						    \
-  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask16)-1))
-
-#define _mm512_mask_slli_epi32(W, U, X, C)                                  \
-  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_slli_epi32(U, X, C)                                    \
-  ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask16)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sll_epi32 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si)
-						 _mm512_undefined_si512 (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srli_epi32 (__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
-			__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-#else
-#define _mm512_srli_epi32(X, C)						    \
-  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask16)-1))
-
-#define _mm512_mask_srli_epi32(W, U, X, C)                                  \
-  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_srli_epi32(U, X, C)				    \
-  ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask16)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srl_epi32 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si)
-						 _mm512_undefined_si512 (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srai_epi32 (__m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-			unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
-{
-  return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-#else
-#define _mm512_srai_epi32(X, C)						    \
-  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask16)-1))
-
-#define _mm512_mask_srai_epi32(W, U, X, C)				    \
-  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_srai_epi32(U, X, C)				    \
-  ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask16)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sra_epi32 (__m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si)
-						 _mm512_undefined_si512 (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
-{
-  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
-						 (__v4si) __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
-					       (__v2df) __B,
-					       __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
-					      (__v4sf) __B,
-					      __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
-					       (__v2df) __B,
-					       __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
-					      (__v4sf) __B,
-					      __R);
-}
-
-#else
-#define _mm_add_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_addsd_round(A, B, C)
-
-#define _mm_add_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_addss_round(A, B, C)
-
-#define _mm_sub_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_subsd_round(A, B, C)
-
-#define _mm_sub_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_subss_round(A, B, C)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
-{
-  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
-						     (__v8di) __B,
-						     (__v8di) __C, imm,
-						     (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
-				__m512i __C, const int imm)
-{
-  return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
-						     (__v8di) __B,
-						     (__v8di) __C, imm,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
-				 __m512i __C, const int imm)
-{
-  return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
-						      (__v8di) __B,
-						      (__v8di) __C,
-						      imm, (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
-{
-  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si) __C,
-						     imm, (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
-				__m512i __C, const int imm)
-{
-  return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si) __C,
-						     imm, (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
-				 __m512i __C, const int imm)
-{
-  return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
-						      (__v16si) __B,
-						      (__v16si) __C,
-						      imm, (__mmask16) __U);
-}
-#else
-#define _mm512_ternarylogic_epi64(A, B, C, I)				\
-  ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
-    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
-#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I)			\
-  ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A),	\
-    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
-#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I)			\
-  ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A),	\
-    (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
-#define _mm512_ternarylogic_epi32(A, B, C, I)				\
-  ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
-    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
-    (__mmask16)-1))
-#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I)			\
-  ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A),	\
-    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
-    (__mmask16)(U)))
-#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I)			\
-  ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A),	\
-    (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I),		\
-    (__mmask16)(U)))
-#endif
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp14_pd (__m512d __A)
-{
-  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
-						   (__v8df)
-						   _mm512_undefined_pd (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
-						   (__v8df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp14_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
-						  (__v16sf)
-						  _mm512_undefined_ps (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
-						  (__v16sf) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp14_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
-					   (__v2df) __A);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp14_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
-					  (__v4sf) __A);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt14_pd (__m512d __A)
-{
-  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
-						     (__v8df)
-						     _mm512_undefined_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
-						     (__v8df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt14_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
-						    (__v16sf)
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
-						    (__v16sf) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt14_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
-					     (__v2df) __A);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt14_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
-					    (__v4sf) __A);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_round_pd (__m512d __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
-						  (__v8df)
-						  _mm512_undefined_pd (),
-						  (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			   const int __R)
-{
-  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
-						  (__v8df) __W,
-						  (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_round_ps (__m512 __A, const int __R)
-{
-  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
-						 (__v16sf)
-						 _mm512_undefined_ps (),
-						 (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
-						 (__v16sf) __W,
-						 (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
-						 (__v16sf)
-						 _mm512_setzero_ps (),
-						 (__mmask16) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
-						(__v2df) __A,
-						__R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
-					       (__v4sf) __A,
-					       __R);
-}
-#else
-#define _mm512_sqrt_round_pd(A, C)            \
-    (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
-    (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
-
-#define _mm512_maskz_sqrt_round_pd(U, A, C)   \
-    (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_sqrt_round_ps(A, C)            \
-    (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
-
-#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
-    (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
-
-#define _mm512_maskz_sqrt_round_ps(U, A, C)   \
-    (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm_sqrt_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
-
-#define _mm_sqrt_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_sqrtss_round(A, B, C)
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi8_epi32 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
-						    (__v16si)
-						    _mm512_undefined_si512 (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi8_epi64 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
-						    (__v8di)
-						    _mm512_undefined_si512 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi16_epi32 (__m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
-						    (__v16si)
-						    _mm512_undefined_si512 (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi16_epi64 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
-						    (__v8di)
-						    _mm512_undefined_si512 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi32_epi64 (__m256i __X)
-{
-  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
-						    (__v8di)
-						    _mm512_undefined_si512 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
-{
-  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
-{
-  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu8_epi32 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
-						    (__v16si)
-						    _mm512_undefined_si512 (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu8_epi64 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
-						    (__v8di)
-						    _mm512_undefined_si512 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu16_epi32 (__m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
-						    (__v16si)
-						    _mm512_undefined_si512 (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu16_epi64 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
-						    (__v8di)
-						    _mm512_undefined_si512 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu32_epi64 (__m256i __X)
-{
-  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
-						    (__v8di)
-						    _mm512_undefined_si512 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
-{
-  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
-{
-  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			  __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			   const int __R)
-{
-  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			  __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			  __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			   const int __R)
-{
-  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			  __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U, __R);
-}
-#else
-#define _mm512_add_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_add_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_add_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_add_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
-
-#define _mm512_mask_add_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_add_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm512_sub_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_sub_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_sub_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
-
-#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_sub_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			  __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			   const int __R)
-{
-  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			  __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
-{
-  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
-						 (__v8df) __V,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
-			  __m512d __V, const int __R)
-{
-  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
-						 (__v8df) __V,
-						 (__v8df) __W,
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
-			   const int __R)
-{
-  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
-						 (__v8df) __V,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			  __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
-					       (__v2df) __B,
-					       __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
-					      (__v4sf) __B,
-					      __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
-					       (__v2df) __B,
-					       __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
-					      (__v4sf) __B,
-					      __R);
-}
-
-#else
-#define _mm512_mul_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_mul_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_mul_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
-
-#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_mul_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm512_div_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_div_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_div_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_div_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
-
-#define _mm512_mask_div_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_div_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm_mul_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_mulsd_round(A, B, C)
-
-#define _mm_mul_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_mulss_round(A, B, C)
-
-#define _mm_div_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_divsd_round(A, B, C)
-
-#define _mm_div_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_divss_round(A, B, C)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			  __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			   const int __R)
-{
-  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			  __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			  __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			   const int __R)
-{
-  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			  __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U, __R);
-}
-#else
-#define _mm512_max_round_pd(A, B,  R) \
-    (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
-
-#define _mm512_mask_max_round_pd(W, U,  A, B, R) \
-    (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
-
-#define _mm512_maskz_max_round_pd(U, A,  B, R) \
-    (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
-
-#define _mm512_max_round_ps(A, B,  R) \
-    (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
-
-#define _mm512_mask_max_round_ps(W, U,  A, B, R) \
-    (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
-
-#define _mm512_maskz_max_round_ps(U, A,  B, R) \
-    (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
-
-#define _mm512_min_round_pd(A, B,  R) \
-    (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
-
-#define _mm512_mask_min_round_pd(W, U,  A, B, R) \
-    (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
-
-#define _mm512_maskz_min_round_pd(U, A,  B, R) \
-    (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
-
-#define _mm512_min_round_ps(A, B, R) \
-    (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
-
-#define _mm512_mask_min_round_ps(W, U,  A, B, R) \
-    (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
-
-#define _mm512_maskz_min_round_ps(U, A,  B, R) \
-    (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			     __m512d __B, const int __R)
-{
-  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __W,
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			      const int __R)
-{
-  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			     __m512 __B, const int __R)
-{
-  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __W,
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			      const int __R)
-{
-  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
-						  (__v2df) __B,
-						  __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
-						 (__v4sf) __B,
-						 __R);
-}
-#else
-#define _mm512_scalef_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
-
-#define _mm512_scalef_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
-
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
-
-#define _mm_scalef_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
-
-#define _mm_scalef_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_scalefss_round(A, B, C)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __C,
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			    __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __C,
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
-			     __mmask8 __U, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			     __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __C,
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			    __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __C,
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
-			     __mmask16 __U, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			     __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    -(__v8df) __C,
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			    __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    -(__v8df) __C,
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
-			     __mmask8 __U, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			     __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-						     (__v8df) __B,
-						     -(__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   -(__v16sf) __C,
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			    __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   -(__v16sf) __C,
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
-			     __mmask16 __U, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			     __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-						    (__v16sf) __B,
-						    -(__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       (__v8df) __C,
-						       (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			       __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       (__v8df) __C,
-						       (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
-				__mmask8 __U, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
-							(__v8df) __B,
-							(__v8df) __C,
-							(__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-				__m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
-							(__v8df) __B,
-							(__v8df) __C,
-							(__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      (__v16sf) __C,
-						      (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			       __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      (__v16sf) __C,
-						      (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
-				__mmask16 __U, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
-						       (__v16sf) __B,
-						       (__v16sf) __C,
-						       (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-				__m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
-						       (__v16sf) __B,
-						       (__v16sf) __C,
-						       (__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       -(__v8df) __C,
-						       (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			       __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       -(__v8df) __C,
-						       (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
-				__mmask8 __U, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
-							(__v8df) __B,
-							(__v8df) __C,
-							(__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-				__m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
-							(__v8df) __B,
-							-(__v8df) __C,
-							(__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      -(__v16sf) __C,
-						      (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			       __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      -(__v16sf) __C,
-						      (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
-				__mmask16 __U, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
-						       (__v16sf) __B,
-						       (__v16sf) __C,
-						       (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-				__m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
-						       (__v16sf) __B,
-						       -(__v16sf) __C,
-						       (__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __C,
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			     __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
-			      __mmask8 __U, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			      __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __C,
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			     __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
-			      __mmask16 __U, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			      __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
-						    (__v8df) __B,
-						    -(__v8df) __C,
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			     __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
-			      __mmask8 __U, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
-						      (__v8df) __B,
-						      (__v8df) __C,
-						      (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			      __m512d __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-						     (__v8df) __B,
-						     -(__v8df) __C,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
-						   (__v16sf) __B,
-						   -(__v16sf) __C,
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			     __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
-			      __mmask16 __U, const int __R)
-{
-  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
-						     (__v16sf) __B,
-						     (__v16sf) __C,
-						     (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			      __m512 __C, const int __R)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-						    (__v16sf) __B,
-						    -(__v16sf) __C,
-						    (__mmask16) __U, __R);
-}
-#else
-#define _mm512_fmadd_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
-
-#define _mm512_mask_fmadd_round_pd(A, U, B, C, R)    \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
-
-#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
-
-#define _mm512_fmadd_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
-
-#define _mm512_mask_fmadd_round_ps(A, U, B, C, R)    \
-    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
-
-#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
-
-#define _mm512_fmsub_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
-
-#define _mm512_mask_fmsub_round_pd(A, U, B, C, R)    \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
-
-#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R)   \
-    (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
-
-#define _mm512_fmsub_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
-
-#define _mm512_mask_fmsub_round_ps(A, U, B, C, R)    \
-    (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
-
-#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R)   \
-    (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
-
-#define _mm512_fmaddsub_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
-
-#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R)    \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
-
-#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R)   \
-    (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
-
-#define _mm512_fmaddsub_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
-
-#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R)    \
-    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
-
-#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R)   \
-    (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
-
-#define _mm512_fmsubadd_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
-
-#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R)    \
-    (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
-
-#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R)   \
-    (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
-
-#define _mm512_fmsubadd_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
-
-#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R)    \
-    (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
-
-#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R)   \
-    (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
-
-#define _mm512_fnmadd_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
-
-#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R)    \
-    (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
-
-#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
-
-#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
-
-#define _mm512_fnmadd_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
-
-#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R)    \
-    (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
-
-#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
-
-#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
-
-#define _mm512_fnmsub_round_pd(A, B, C, R)            \
-    (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
-
-#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R)    \
-    (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
-
-#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R)   \
-    (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R)   \
-    (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
-
-#define _mm512_fnmsub_round_ps(A, B, C, R)            \
-    (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
-
-#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R)    \
-    (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
-
-#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R)   \
-    (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
-
-#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R)   \
-    (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_abs_epi64 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_abs_epi32 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
-						 (__v16si)
-						 _mm512_undefined_si512 (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastss_ps (__m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
-						 (__v16sf)
-						 _mm512_undefined_ps (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
-						 (__v16sf) __O, __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
-						 (__v16sf)
-						 _mm512_setzero_ps (),
-						 __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastsd_pd (__m128d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
-						  (__v8df)
-						  _mm512_undefined_pd (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
-						  (__v8df) __O, __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastd_epi32 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
-						  (__v16si) __O, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set1_epi32 (int __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
-							   (__v16si)
-							   _mm512_undefined_si512 (),
-							   (__mmask16)(-1));
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
-{
-  return (__m512i)
-	 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
-						 (__v16si) _mm512_setzero_si512 (),
-						 __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcastq_epi64 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
-						  (__v8di) __O, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set1_epi64 (long long __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
-							   (__v8di)
-							   _mm512_undefined_si512 (),
-							   (__mmask8)(-1));
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
-{
-  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
-							   __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
-  return (__m512i)
-	 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
-						 (__v8di) _mm512_setzero_si512 (),
-						 __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_f32x4 (__m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
-						     (__v16sf)
-						     _mm512_undefined_ps (),
-						     (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
-						     (__v16sf) __O,
-						     __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
-{
-  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
-						     (__v16sf)
-						     _mm512_setzero_ps (),
-						     __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_i32x4 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
-						      (__v16si)
-						      _mm512_undefined_si512 (),
-						      (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
-						      (__v16si) __O,
-						      __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
-						      (__v16si)
-						      _mm512_setzero_si512 (),
-						      __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_f64x4 (__m256d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
-						      (__v8df)
-						      _mm512_undefined_pd (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
-						      (__v8df) __O,
-						      __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
-{
-  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
-						      (__v8df)
-						      _mm512_setzero_pd (),
-						      __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_broadcast_i64x4 (__m256i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
-						      (__v8di)
-						      _mm512_undefined_si512 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
-						      (__v8di) __O,
-						      __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
-{
-  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      __M);
-}
-
-typedef enum
-{
-  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
-  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
-  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
-  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
-  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
-  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
-  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
-  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
-  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
-  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
-  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
-  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
-  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
-  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
-  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
-  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
-  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
-  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
-  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
-  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
-  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
-  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
-  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
-  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
-  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
-  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
-  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
-  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
-  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
-  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
-  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
-  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
-  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
-  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
-  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
-  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
-  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
-  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
-  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
-  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
-  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
-  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
-  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
-  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
-  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
-  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
-  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
-  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
-  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
-  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
-  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
-  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
-  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
-  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
-  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
-  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
-  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
-  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
-  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
-  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
-  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
-  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
-  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
-  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
-  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
-  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
-  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
-  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
-  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
-  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
-  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
-  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
-  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
-  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
-  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
-  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
-  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
-  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
-  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
-  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
-  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
-  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
-  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
-  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
-  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
-  _MM_PERM_DDDD = 0xFF
-} _MM_PERM_ENUM;
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
-{
-  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
-						  __mask,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-			   _MM_PERM_ENUM __mask)
-{
-  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
-						  __mask,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
-{
-  return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
-						  __mask,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
-						   (__v8di) __B, __imm,
-						   (__v8di)
-						   _mm512_undefined_si512 (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
-			   __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
-						   (__v8di) __B, __imm,
-						   (__v8di) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
-			    const int __imm)
-{
-  return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
-						   (__v8di) __B, __imm,
-						   (__v8di)
-						   _mm512_setzero_si512 (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
-						   (__v16si) __B,
-						   __imm,
-						   (__v16si)
-						   _mm512_undefined_si512 (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
-			   __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
-						   (__v16si) __B,
-						   __imm,
-						   (__v16si) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
-			    const int __imm)
-{
-  return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
-						   (__v16si) __B,
-						   __imm,
-						   (__v16si)
-						   _mm512_setzero_si512 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
-{
-  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
-						   (__v8df) __B, __imm,
-						   (__v8df)
-						   _mm512_undefined_pd (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
-			   __m512d __B, const int __imm)
-{
-  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
-						   (__v8df) __B, __imm,
-						   (__v8df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
-			    const int __imm)
-{
-  return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
-						   (__v8df) __B, __imm,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
-{
-  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
-						  (__v16sf) __B, __imm,
-						  (__v16sf)
-						  _mm512_undefined_ps (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
-			   __m512 __B, const int __imm)
-{
-  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
-						  (__v16sf) __B, __imm,
-						  (__v16sf) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
-			    const int __imm)
-{
-  return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
-						  (__v16sf) __B, __imm,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __U);
-}
-
-#else
-#define _mm512_shuffle_epi32(X, C)                                      \
-  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask16)-1))
-
-#define _mm512_mask_shuffle_epi32(W, U, X, C)                           \
-  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_shuffle_epi32(U, X, C)                             \
-  ((__m512i)  __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
-    (__v16si)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask16)(U)))
-
-#define _mm512_shuffle_i64x2(X, Y, C)                                   \
-  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
-      (__v8di)(__m512i)(Y), (int)(C),\
-    (__v8di)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask8)-1))
-
-#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C)                        \
-  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
-      (__v8di)(__m512i)(Y), (int)(C),\
-    (__v8di)(__m512i)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_shuffle_i64x2(U, X, Y, C)                          \
-  ((__m512i)  __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X),     \
-      (__v8di)(__m512i)(Y), (int)(C),\
-    (__v8di)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask8)(U)))
-
-#define _mm512_shuffle_i32x4(X, Y, C)                                   \
-  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
-      (__v16si)(__m512i)(Y), (int)(C),\
-    (__v16si)(__m512i)_mm512_undefined_si512 (),\
-    (__mmask16)-1))
-
-#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C)                        \
-  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
-      (__v16si)(__m512i)(Y), (int)(C),\
-    (__v16si)(__m512i)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_shuffle_i32x4(U, X, Y, C)                          \
-  ((__m512i)  __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X),    \
-      (__v16si)(__m512i)(Y), (int)(C),\
-    (__v16si)(__m512i)_mm512_setzero_si512 (),\
-    (__mmask16)(U)))
-
-#define _mm512_shuffle_f64x2(X, Y, C)                                   \
-  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
-      (__v8df)(__m512d)(Y), (int)(C),\
-    (__v8df)(__m512d)_mm512_undefined_pd(),\
-    (__mmask8)-1))
-
-#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C)                        \
-  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),     \
-      (__v8df)(__m512d)(Y), (int)(C),\
-    (__v8df)(__m512d)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_shuffle_f64x2(U, X, Y, C)                         \
-  ((__m512d)  __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X),    \
-      (__v8df)(__m512d)(Y), (int)(C),\
-    (__v8df)(__m512d)_mm512_setzero_pd(),\
-    (__mmask8)(U)))
-
-#define _mm512_shuffle_f32x4(X, Y, C)                                  \
-  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
-      (__v16sf)(__m512)(Y), (int)(C),\
-    (__v16sf)(__m512)_mm512_undefined_ps(),\
-    (__mmask16)-1))
-
-#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C)                       \
-  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
-      (__v16sf)(__m512)(Y), (int)(C),\
-    (__v16sf)(__m512)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_shuffle_f32x4(U, X, Y, C)                         \
-  ((__m512)  __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X),     \
-      (__v16sf)(__m512)(Y), (int)(C),\
-    (__v16sf)(__m512)_mm512_setzero_ps(),\
-    (__mmask16)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rolv_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rorv_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rolv_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rorv_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_undefined_si256 (),
-						     (__mmask8) -1, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
-						      (__v8si)
-						      _mm256_undefined_si256 (),
-						      (__mmask8) -1, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
-						      (__v8si) __W,
-						      (__mmask8) __U, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U, __R);
-}
-#else
-#define _mm512_cvtt_roundpd_epi32(A, B)		     \
-    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
-
-#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B)   \
-    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
-
-#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B)     \
-    ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
-
-#define _mm512_cvtt_roundpd_epu32(A, B)		     \
-    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
-
-#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B)   \
-    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
-
-#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B)     \
-    ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
-						    (__v8si)
-						    _mm256_undefined_si256 (),
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_undefined_si256 (),
-						     (__mmask8) -1, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U, __R);
-}
-#else
-#define _mm512_cvt_roundpd_epi32(A, B)		    \
-    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
-
-#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B)   \
-    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
-
-#define _mm512_maskz_cvt_roundpd_epi32(U, A, B)     \
-    ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
-
-#define _mm512_cvt_roundpd_epu32(A, B)		    \
-    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
-
-#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B)   \
-    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
-
-#define _mm512_maskz_cvt_roundpd_epu32(U, A, B)     \
-    ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_undefined_si512 (),
-						     (__mmask16) -1, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
-						     (__v16si) __W,
-						     (__mmask16) __U, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_undefined_si512 (),
-						      (__mmask16) -1, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
-						      (__v16si) __W,
-						      (__mmask16) __U, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_setzero_si512 (),
-						      (__mmask16) __U, __R);
-}
-#else
-#define _mm512_cvtt_roundps_epi32(A, B)		     \
-    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
-
-#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
-
-#define _mm512_maskz_cvtt_roundps_epi32(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
-
-#define _mm512_cvtt_roundps_epu32(A, B)		     \
-    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
-
-#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
-
-#define _mm512_maskz_cvtt_roundps_epu32(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
-						    (__v16si)
-						    _mm512_undefined_si512 (),
-						    (__mmask16) -1, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_undefined_si512 (),
-						     (__mmask16) -1, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-						     (__v16si) __W,
-						     (__mmask16) __U, __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U, __R);
-}
-#else
-#define _mm512_cvt_roundps_epi32(A, B)		    \
-    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
-
-#define _mm512_mask_cvt_roundps_epi32(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
-
-#define _mm512_maskz_cvt_roundps_epi32(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
-
-#define _mm512_cvt_roundps_epu32(A, B)		    \
-    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
-
-#define _mm512_mask_cvt_roundps_epu32(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
-
-#define _mm512_maskz_cvt_roundps_epu32(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
-#endif
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu32_sd (__m128d __A, unsigned __B)
-{
-  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
-}
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
-}
-#else
-#define _mm_cvt_roundu64_sd(A, B, C)   \
-    (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
-
-#define _mm_cvt_roundi64_sd(A, B, C)   \
-    (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
-
-#define _mm_cvt_roundsi64_sd(A, B, C)   \
-    (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
-#endif
-
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
-}
-#else
-#define _mm_cvt_roundu32_ss(A, B, C)   \
-    (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
-
-#define _mm_cvt_roundi32_ss(A, B, C)   \
-    (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
-
-#define _mm_cvt_roundsi32_ss(A, B, C)   \
-    (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
-#endif
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
-}
-#else
-#define _mm_cvt_roundu64_ss(A, B, C)   \
-    (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
-
-#define _mm_cvt_roundi64_ss(A, B, C)   \
-    (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
-
-#define _mm_cvt_roundsi64_ss(A, B, C)   \
-    (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
-#endif
-
-#endif
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi32_epi8 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
-						  (__v16qi)
-						  _mm_undefined_si128 (),
-						  (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
-{
-  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtsepi32_epi8 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
-						   (__v16qi)
-						   _mm_undefined_si128 (),
-						   (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
-{
-  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtusepi32_epi8 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
-						    (__v16qi)
-						    _mm_undefined_si128 (),
-						    (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
-{
-  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi32_epi16 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
-						  (__v16hi)
-						  _mm256_undefined_si256 (),
-						  (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
-{
-  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
-						  (__v16hi) __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtsepi32_epi16 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
-						   (__v16hi)
-						   _mm256_undefined_si256 (),
-						   (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
-{
-  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
-						   (__v16hi) __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
-						   (__v16hi)
-						   _mm256_setzero_si256 (),
-						   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtusepi32_epi16 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
-						    (__v16hi)
-						    _mm256_undefined_si256 (),
-						    (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
-{
-  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
-						    (__v16hi) __O,
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi64_epi32 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
-						  (__v8si)
-						  _mm256_undefined_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
-						  (__v8si) __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtsepi64_epi32 (__m512i __A)
-{
-  __v8si __O;
-  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
-						   (__v8si)
-						   _mm256_undefined_si256 (),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
-						   (__v8si) __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
-						   (__v8si)
-						   _mm256_setzero_si256 (),
-						   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtusepi64_epi32 (__m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
-						    (__v8si)
-						    _mm256_undefined_si256 (),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
-						    (__v8si) __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
-{
-  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi64_epi16 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
-						  (__v8hi)
-						  _mm_undefined_si128 (),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
-						  (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtsepi64_epi16 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
-						   (__v8hi)
-						   _mm_undefined_si128 (),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
-						   (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
-						   (__v8hi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtusepi64_epi16 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
-						    (__v8hi)
-						    _mm_undefined_si128 (),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
-						    (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi64_epi8 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
-						  (__v16qi)
-						  _mm_undefined_si128 (),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtsepi64_epi8 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
-						   (__v16qi)
-						   _mm_undefined_si128 (),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtusepi64_epi8 (__m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
-						    (__v16qi)
-						    _mm_undefined_si128 (),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
-{
-  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi32_pd (__m256i __A)
-{
-  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
-{
-  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
-{
-  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu32_pd (__m256i __A)
-{
-  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
-						     (__v8df)
-						     _mm512_undefined_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
-{
-  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
-						     (__v8df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
-{
-  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
-			       const int __R)
-{
-  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
-						   (__v16sf) __W,
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
-						    (__v16sf)
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
-			       const int __R)
-{
-  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
-						    (__v16sf) __W,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U, __R);
-}
-
-#else
-#define _mm512_cvt_roundepi32_ps(A, B)        \
-    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
-
-#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B)   \
-    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
-
-#define _mm512_maskz_cvt_roundepi32_ps(U, A, B)      \
-    (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
-
-#define _mm512_cvt_roundepu32_ps(A, B)        \
-    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
-
-#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B)   \
-    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
-
-#define _mm512_maskz_cvt_roundepu32_ps(U, A, B)      \
-    (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extractf64x4_pd (__m512d __A, const int __imm)
-{
-  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
-						     __imm,
-						     (__v4df)
-						     _mm256_undefined_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
-			     const int __imm)
-{
-  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
-						     __imm,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
-{
-  return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
-						     __imm,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extractf32x4_ps (__m512 __A, const int __imm)
-{
-  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
-						    __imm,
-						    (__v4sf)
-						    _mm_undefined_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
-			     const int __imm)
-{
-  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
-						    __imm,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
-{
-  return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
-						    __imm,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
-						     __imm,
-						     (__v4di)
-						     _mm256_undefined_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
-				const int __imm)
-{
-  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
-						     __imm,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
-						     __imm,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
-						     __imm,
-						     (__v4si)
-						     _mm_undefined_si128 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
-				const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
-						     __imm,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
-						     __imm,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-#else
-
-#define _mm512_extractf64x4_pd(X, C)                                    \
-  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
-    (int) (C),\
-    (__v4df)(__m256d)_mm256_undefined_pd(),\
-    (__mmask8)-1))
-
-#define _mm512_mask_extractf64x4_pd(W, U, X, C)                         \
-  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
-    (int) (C),\
-    (__v4df)(__m256d)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_extractf64x4_pd(U, X, C)                           \
-  ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X),   \
-    (int) (C),\
-    (__v4df)(__m256d)_mm256_setzero_pd(),\
-    (__mmask8)(U)))
-
-#define _mm512_extractf32x4_ps(X, C)                                    \
-  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
-    (int) (C),\
-    (__v4sf)(__m128)_mm_undefined_ps(),\
-    (__mmask8)-1))
-
-#define _mm512_mask_extractf32x4_ps(W, U, X, C)                         \
-  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
-    (int) (C),\
-    (__v4sf)(__m128)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_extractf32x4_ps(U, X, C)                           \
-  ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X),    \
-    (int) (C),\
-    (__v4sf)(__m128)_mm_setzero_ps(),\
-    (__mmask8)(U)))
-
-#define _mm512_extracti64x4_epi64(X, C)                                 \
-  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
-    (int) (C),\
-    (__v4di)(__m256i)_mm256_undefined_si256 (),\
-    (__mmask8)-1))
-
-#define _mm512_mask_extracti64x4_epi64(W, U, X, C)                      \
-  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
-    (int) (C),\
-    (__v4di)(__m256i)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_extracti64x4_epi64(U, X, C)                        \
-  ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X),   \
-    (int) (C),\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),\
-    (__mmask8)(U)))
-
-#define _mm512_extracti32x4_epi32(X, C)                                 \
-  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
-    (int) (C),\
-    (__v4si)(__m128i)_mm_undefined_si128 (),\
-    (__mmask8)-1))
-
-#define _mm512_mask_extracti32x4_epi32(W, U, X, C)                      \
-  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
-    (int) (C),\
-    (__v4si)(__m128i)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_extracti32x4_epi32(U, X, C)                        \
-  ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X),  \
-    (int) (C),\
-    (__v4si)(__m128i)_mm_setzero_si128 (),\
-    (__mmask8)(U)))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
-						    (__v4si) __B,
-						    __imm,
-						    (__v16si) __A, -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
-{
-  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
-						   (__v4sf) __B,
-						   __imm,
-						   (__v16sf) __A, -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
-						    (__v4di) __B,
-						    __imm,
-						    (__v8di)
-						    _mm512_undefined_si512 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
-			 __m256i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
-						    (__v4di) __B,
-						    __imm,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
-			  const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
-						    (__v4di) __B,
-						    __imm,
-						    (__v8di)
-						    _mm512_setzero_si512 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
-{
-  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
-						    (__v4df) __B,
-						    __imm,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
-			 __m256d __B, const int __imm)
-{
-  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
-						    (__v4df) __B,
-						    __imm,
-						    (__v8df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
-			  const int __imm)
-{
-  return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
-						    (__v4df) __B,
-						    __imm,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U);
-}
-#else
-#define _mm512_insertf32x4(X, Y, C)                                     \
-  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
-    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
-
-#define _mm512_inserti32x4(X, Y, C)                                     \
-  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
-    (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
-
-#define _mm512_insertf64x4(X, Y, C)                                     \
-  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
-    (__v4df)(__m256d) (Y), (int) (C),					\
-    (__v8df)(__m512d)_mm512_undefined_pd(),				\
-    (__mmask8)-1))
-
-#define _mm512_mask_insertf64x4(W, U, X, Y, C)                          \
-  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
-    (__v4df)(__m256d) (Y), (int) (C),					\
-    (__v8df)(__m512d)(W),						\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_insertf64x4(U, X, Y, C)                            \
-  ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X),    \
-    (__v4df)(__m256d) (Y), (int) (C),					\
-    (__v8df)(__m512d)_mm512_setzero_pd(),				\
-    (__mmask8)(U)))
-
-#define _mm512_inserti64x4(X, Y, C)                                     \
-  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
-    (__v4di)(__m256i) (Y), (int) (C),					\
-    (__v8di)(__m512i)_mm512_undefined_si512 (),				\
-    (__mmask8)-1))
-
-#define _mm512_mask_inserti64x4(W, U, X, Y, C)                          \
-  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
-    (__v4di)(__m256i) (Y), (int) (C),\
-    (__v8di)(__m512i)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_inserti64x4(U, X, Y, C)                            \
-  ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X),    \
-    (__v4di)(__m256i) (Y), (int) (C),					\
-    (__v8di)(__m512i)_mm512_setzero_si512 (),				\
-    (__mmask8)(U)))
-#endif
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_loadu_pd (void const *__P)
-{
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
-						   (__v8df)
-						   _mm512_undefined_pd (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
-{
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
-						   (__v8df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
-{
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_storeu_pd (void *__P, __m512d __A)
-{
-  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
-				   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
-{
-  __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_loadu_ps (void const *__P)
-{
-  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
-						  (__v16sf)
-						  _mm512_undefined_ps (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
-						  (__v16sf) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_storeu_ps (void *__P, __m512 __A)
-{
-  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
-				   (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
-{
-  __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
-				   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
-						     (__v8di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
-{
-  __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
-				     (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_loadu_si512 (void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
-						     (__v16si) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_storeu_si512 (void *__P, __m512i __A)
-{
-  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
-				     (__mmask16) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
-{
-  __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
-				     (__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutevar_pd (__m512d __A, __m512i __C)
-{
-  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
-							(__v8di) __C,
-							(__v8df)
-							_mm512_undefined_pd (),
-							(__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
-{
-  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
-							(__v8di) __C,
-							(__v8df) __W,
-							(__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
-{
-  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
-							(__v8di) __C,
-							(__v8df)
-							_mm512_setzero_pd (),
-							(__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutevar_ps (__m512 __A, __m512i __C)
-{
-  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
-						       (__v16si) __C,
-						       (__v16sf)
-						       _mm512_undefined_ps (),
-						       (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
-{
-  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
-						       (__v16si) __C,
-						       (__v16sf) __W,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
-{
-  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
-						       (__v16si) __C,
-						       (__v16sf)
-						       _mm512_setzero_ps (),
-						       (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
-						       /* idx */ ,
-						       (__v8di) __A,
-						       (__v8di) __B,
-						       (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
-				__m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
-						       /* idx */ ,
-						       (__v8di) __A,
-						       (__v8di) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
-				 __mmask8 __U, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
-						       (__v8di) __I
-						       /* idx */ ,
-						       (__v8di) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
-				 __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
-							/* idx */ ,
-							(__v8di) __A,
-							(__v8di) __B,
-							(__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
-						       /* idx */ ,
-						       (__v16si) __A,
-						       (__v16si) __B,
-						       (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
-				__m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
-						       /* idx */ ,
-						       (__v16si) __A,
-						       (__v16si) __B,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
-				 __mmask16 __U, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
-						       (__v16si) __I
-						       /* idx */ ,
-						       (__v16si) __B,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
-				 __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
-							/* idx */ ,
-							(__v16si) __A,
-							(__v16si) __B,
-							(__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
-							/* idx */ ,
-							(__v8df) __A,
-							(__v8df) __B,
-							(__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
-			     __m512d __B)
-{
-  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
-							/* idx */ ,
-							(__v8df) __A,
-							(__v8df) __B,
-							(__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
-			      __m512d __B)
-{
-  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
-							(__v8di) __I
-							/* idx */ ,
-							(__v8df) __B,
-							(__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
-			      __m512d __B)
-{
-  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
-							 /* idx */ ,
-							 (__v8df) __A,
-							 (__v8df) __B,
-							 (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
-{
-  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
-						       /* idx */ ,
-						       (__v16sf) __A,
-						       (__v16sf) __B,
-						       (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
-{
-  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
-						       /* idx */ ,
-						       (__v16sf) __A,
-						       (__v16sf) __B,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
-			      __m512 __B)
-{
-  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
-						       (__v16si) __I
-						       /* idx */ ,
-						       (__v16sf) __B,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
-			      __m512 __B)
-{
-  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
-							/* idx */ ,
-							(__v16sf) __A,
-							(__v16sf) __B,
-							(__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permute_pd (__m512d __X, const int __C)
-{
-  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
-						     (__v8df)
-						     _mm512_undefined_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
-{
-  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
-						     (__v8df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
-{
-  return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permute_ps (__m512 __X, const int __C)
-{
-  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
-						    (__v16sf)
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
-{
-  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
-						    (__v16sf) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
-{
-  return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U);
-}
-#else
-#define _mm512_permute_pd(X, C)							    \
-  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
-					      (__v8df)(__m512d)_mm512_undefined_pd(),\
-					      (__mmask8)(-1)))
-
-#define _mm512_mask_permute_pd(W, U, X, C)					    \
-  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
-					      (__v8df)(__m512d)(W),		    \
-					      (__mmask8)(U)))
-
-#define _mm512_maskz_permute_pd(U, X, C)					    \
-  ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C),	    \
-					      (__v8df)(__m512d)_mm512_setzero_pd(), \
-					      (__mmask8)(U)))
-
-#define _mm512_permute_ps(X, C)							    \
-  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
-					      (__v16sf)(__m512)_mm512_undefined_ps(),\
-					      (__mmask16)(-1)))
-
-#define _mm512_mask_permute_ps(W, U, X, C)					    \
-  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
-					      (__v16sf)(__m512)(W),		    \
-					      (__mmask16)(U)))
-
-#define _mm512_maskz_permute_ps(U, X, C)					    \
-  ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C),	    \
-					      (__v16sf)(__m512)_mm512_setzero_ps(), \
-					      (__mmask16)(U)))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex_epi64 (__m512i __X, const int __I)
-{
-  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) (-1));
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
-			    __m512i __X, const int __I)
-{
-  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
-						  (__v8di) __W,
-						  (__mmask8) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
-{
-  return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex_pd (__m512d __X, const int __M)
-{
-  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
-						  (__v8df)
-						  _mm512_undefined_pd (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
-{
-  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
-						  (__v8df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
-{
-  return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  (__mmask8) __U);
-}
-#else
-#define _mm512_permutex_pd(X, M)						\
-  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
-					    (__v8df)(__m512d)_mm512_undefined_pd(),\
-					    (__mmask8)-1))
-
-#define _mm512_mask_permutex_pd(W, U, X, M)					\
-  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
-					    (__v8df)(__m512d)(W), (__mmask8)(U)))
-
-#define _mm512_maskz_permutex_pd(U, X, M)					\
-  ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M),	\
-					    (__v8df)(__m512d)_mm512_setzero_pd(),\
-					    (__mmask8)(U)))
-
-#define _mm512_permutex_epi64(X, I)			          \
-  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
-					    (int)(I),             \
-					    (__v8di)(__m512i)	  \
-					    (_mm512_undefined_si512 ()),\
-					    (__mmask8)(-1)))
-
-#define _mm512_maskz_permutex_epi64(M, X, I)                 \
-  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
-					    (int)(I),             \
-					    (__v8di)(__m512i)     \
-					    (_mm512_setzero_si512 ()),\
-					    (__mmask8)(M)))
-
-#define _mm512_mask_permutex_epi64(W, M, X, I)               \
-  ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
-					    (int)(I),             \
-					    (__v8di)(__m512i)(W), \
-					    (__mmask8)(M)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
-						     (__v8di) __X,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
-						     (__v8di) __X,
-						     (__v8di)
-						     _mm512_undefined_si512 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
-			       __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
-						     (__v8di) __X,
-						     (__v8di) __W,
-						     __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
-						     (__v16si) __X,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
-						     (__v16si) __X,
-						     (__v16si)
-						     _mm512_undefined_si512 (),
-						     (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
-			       __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
-						     (__v16si) __X,
-						     (__v16si) __W,
-						     __M);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
-{
-  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
-						     (__v8di) __X,
-						     (__v8df)
-						     _mm512_undefined_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
-{
-  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
-						     (__v8di) __X,
-						     (__v8df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
-{
-  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
-						     (__v8di) __X,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
-{
-  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
-						    (__v16si) __X,
-						    (__v16sf)
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
-{
-  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
-						    (__v16si) __X,
-						    (__v16sf) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
-{
-  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
-						    (__v16si) __X,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
-{
-  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
-						 (__v16sf) __V, __imm,
-						 (__v16sf)
-						 _mm512_undefined_ps (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
-			__m512 __V, const int __imm)
-{
-  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
-						 (__v16sf) __V, __imm,
-						 (__v16sf) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
-{
-  return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
-						 (__v16sf) __V, __imm,
-						 (__v16sf)
-						 _mm512_setzero_ps (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
-{
-  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
-						  (__v8df) __V, __imm,
-						  (__v8df)
-						  _mm512_undefined_pd (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
-			__m512d __V, const int __imm)
-{
-  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
-						  (__v8df) __V, __imm,
-						  (__v8df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
-			 const int __imm)
-{
-  return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
-						  (__v8df) __V, __imm,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
-			  const int __imm, const int __R)
-{
-  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
-						      (__v8df) __B,
-						      (__v8di) __C,
-						      __imm,
-						      (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			       __m512i __C, const int __imm, const int __R)
-{
-  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
-						      (__v8df) __B,
-						      (__v8di) __C,
-						      __imm,
-						      (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-				__m512i __C, const int __imm, const int __R)
-{
-  return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
-						       (__v8df) __B,
-						       (__v8di) __C,
-						       __imm,
-						       (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
-			  const int __imm, const int __R)
-{
-  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
-						     (__v16sf) __B,
-						     (__v16si) __C,
-						     __imm,
-						     (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			       __m512i __C, const int __imm, const int __R)
-{
-  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
-						     (__v16sf) __B,
-						     (__v16si) __C,
-						     __imm,
-						     (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-				__m512i __C, const int __imm, const int __R)
-{
-  return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
-						      (__v16sf) __B,
-						      (__v16si) __C,
-						      __imm,
-						      (__mmask16) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
-		       const int __imm, const int __R)
-{
-  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
-						   (__v2df) __B,
-						   (__v2di) __C, __imm,
-						   (__mmask8) -1, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
-			    __m128i __C, const int __imm, const int __R)
-{
-  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
-						   (__v2df) __B,
-						   (__v2di) __C, __imm,
-						   (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
-			     __m128i __C, const int __imm, const int __R)
-{
-  return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2di) __C,
-						    __imm,
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
-		       const int __imm, const int __R)
-{
-  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
-						  (__v4sf) __B,
-						  (__v4si) __C, __imm,
-						  (__mmask8) -1, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
-			    __m128i __C, const int __imm, const int __R)
-{
-  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
-						  (__v4sf) __B,
-						  (__v4si) __C, __imm,
-						  (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
-			     __m128i __C, const int __imm, const int __R)
-{
-  return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4si) __C, __imm,
-						   (__mmask8) __U, __R);
-}
-
-#else
-#define _mm512_shuffle_pd(X, Y, C)                                      \
-    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
-        (__v8df)(__m512d)(Y), (int)(C),\
-    (__v8df)(__m512d)_mm512_undefined_pd(),\
-    (__mmask8)-1))
-
-#define _mm512_mask_shuffle_pd(W, U, X, Y, C)                           \
-    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
-        (__v8df)(__m512d)(Y), (int)(C),\
-    (__v8df)(__m512d)(W),\
-    (__mmask8)(U)))
-
-#define _mm512_maskz_shuffle_pd(U, X, Y, C)                             \
-    ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X),           \
-        (__v8df)(__m512d)(Y), (int)(C),\
-    (__v8df)(__m512d)_mm512_setzero_pd(),\
-    (__mmask8)(U)))
-
-#define _mm512_shuffle_ps(X, Y, C)                                      \
-    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
-        (__v16sf)(__m512)(Y), (int)(C),\
-    (__v16sf)(__m512)_mm512_undefined_ps(),\
-    (__mmask16)-1))
-
-#define _mm512_mask_shuffle_ps(W, U, X, Y, C)                           \
-    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
-        (__v16sf)(__m512)(Y), (int)(C),\
-    (__v16sf)(__m512)(W),\
-    (__mmask16)(U)))
-
-#define _mm512_maskz_shuffle_ps(U, X, Y, C)                             \
-    ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X),            \
-        (__v16sf)(__m512)(Y), (int)(C),\
-    (__v16sf)(__m512)_mm512_setzero_ps(),\
-    (__mmask16)(U)))
-
-#define _mm512_fixupimm_round_pd(X, Y, Z, C, R)					\
-  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
-      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
-      (__mmask8)(-1), (R)))
-
-#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R)                          \
-  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
-      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
-      (__mmask8)(U), (R)))
-
-#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R)                         \
-  ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
-      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
-      (__mmask8)(U), (R)))
-
-#define _mm512_fixupimm_round_ps(X, Y, Z, C, R)					\
-  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
-    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
-    (__mmask16)(-1), (R)))
-
-#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R)                          \
-  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
-    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
-    (__mmask16)(U), (R)))
-
-#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R)                         \
-  ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
-    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
-    (__mmask16)(U), (R)))
-
-#define _mm_fixupimm_round_sd(X, Y, Z, C, R)					\
-    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
-      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(-1), (R)))
-
-#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R)				\
-    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
-      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), (R)))
-
-#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R)				\
-    ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
-      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), (R)))
-
-#define _mm_fixupimm_round_ss(X, Y, Z, C, R)					\
-    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
-      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(-1), (R)))
-
-#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R)				\
-    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
-      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), (R)))
-
-#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R)				\
-    ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
-      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), (R)))
-#endif
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movehdup_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
-						   (__v16sf) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_moveldup_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
-						   (__v16sf) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_or_si512 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A | (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_or_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A | (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
-						(__v16si) __B,
-						(__v16si) __W,
-						(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
-						(__v16si) __B,
-						(__v16si)
-						_mm512_setzero_si512 (),
-						(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_or_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v8du) __A | (__v8du) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
-						(__v8di) __B,
-						(__v8di) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
-						(__v8di) __B,
-						(__v8di)
-						_mm512_setzero_si512 (),
-						(__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_xor_si512 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A ^ (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_xor_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A ^ (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_xor_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v8du) __A ^ (__v8du) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rol_epi32 (__m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
-						 (__v16si)
-						 _mm512_undefined_si512 (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ror_epi32 (__m512i __A, int __B)
-{
-  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
-						 (__v16si)
-						 _mm512_undefined_si512 (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
-{
-  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
-{
-  return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rol_epi64 (__m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
-{
-  return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ror_epi64 (__m512i __A, int __B)
-{
-  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
-						 (__v8di)
-						 _mm512_undefined_si512 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
-{
-  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
-						 (__v8di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
-{
-  return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-#else
-#define _mm512_rol_epi32(A, B)						  \
-    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v16si)_mm512_undefined_si512 (), \
-					    (__mmask16)(-1)))
-#define _mm512_mask_rol_epi32(W, U, A, B)				  \
-    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v16si)(__m512i)(W),	  \
-					    (__mmask16)(U)))
-#define _mm512_maskz_rol_epi32(U, A, B)					  \
-    ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v16si)_mm512_setzero_si512 (), \
-					    (__mmask16)(U)))
-#define _mm512_ror_epi32(A, B)						  \
-    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v16si)_mm512_undefined_si512 (), \
-					    (__mmask16)(-1)))
-#define _mm512_mask_ror_epi32(W, U, A, B)				  \
-    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v16si)(__m512i)(W),	  \
-					    (__mmask16)(U)))
-#define _mm512_maskz_ror_epi32(U, A, B)					  \
-    ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v16si)_mm512_setzero_si512 (), \
-					    (__mmask16)(U)))
-#define _mm512_rol_epi64(A, B)						  \
-    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v8di)_mm512_undefined_si512 (),  \
-					    (__mmask8)(-1)))
-#define _mm512_mask_rol_epi64(W, U, A, B)				  \
-    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v8di)(__m512i)(W),	  \
-					    (__mmask8)(U)))
-#define _mm512_maskz_rol_epi64(U, A, B)					  \
-    ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v8di)_mm512_setzero_si512 (),  \
-					    (__mmask8)(U)))
-
-#define _mm512_ror_epi64(A, B)						  \
-    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v8di)_mm512_undefined_si512 (),  \
-					    (__mmask8)(-1)))
-#define _mm512_mask_ror_epi64(W, U, A, B)				  \
-    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v8di)(__m512i)(W),	  \
-					    (__mmask8)(U)))
-#define _mm512_maskz_ror_epi64(U, A, B)					  \
-    ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A),	  \
-					    (int)(B),			  \
-					    (__v8di)_mm512_setzero_si512 (),  \
-					    (__mmask8)(U)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_and_si512 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A & (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_and_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v16su) __A & (__v16su) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
-						 (__v16si) __B,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_and_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) ((__v8du) __A & (__v8du) __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di) __W, __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
-						 (__v8di) __B,
-						 (__v8di)
-						 _mm512_setzero_pd (),
-						 __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_andnot_si512 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_andnot_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_andnot_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W, __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_pd (),
-						  __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_test_epi32_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
-						(__v16si) __B,
-						(__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
-						(__v16si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_test_epi64_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
-					       (__v8di) __B,
-					       (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
-						 (__v16si) __B,
-						 (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
-						 (__v16si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
-						(__v8di) __B,
-						(__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
-						(__v8di) __B, __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si)
-						     _mm512_undefined_si512 (),
-						     (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-			    __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
-						      (__v8di) __B,
-						      (__v8di)
-						      _mm512_undefined_si512 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
-						      (__v8di) __B,
-						      (__v8di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
-						      (__v8di) __B,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si)
-						     _mm512_undefined_si512 (),
-						     (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-			    __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
-						      (__v8di) __B,
-						      (__v8di)
-						      _mm512_undefined_si512 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
-						      (__v8di) __B,
-						      (__v8di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
-						      (__v8di) __B,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) __U);
-}
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_u64 (__m128 __A, const int __R)
-{
-  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_si64 (__m128 __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_i64 (__m128 __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
-{
-  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
-}
-#else
-#define _mm_cvt_roundss_u64(A, B)   \
-    ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
-
-#define _mm_cvt_roundss_si64(A, B)   \
-    ((long long)__builtin_ia32_vcvtss2si64(A, B))
-
-#define _mm_cvt_roundss_i64(A, B)   \
-    ((long long)__builtin_ia32_vcvtss2si64(A, B))
-
-#define _mm_cvtt_roundss_u64(A, B)  \
-    ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
-
-#define _mm_cvtt_roundss_i64(A, B)  \
-    ((long long)__builtin_ia32_vcvttss2si64(A, B))
-
-#define _mm_cvtt_roundss_si64(A, B)  \
-    ((long long)__builtin_ia32_vcvttss2si64(A, B))
-#endif
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_u32 (__m128 __A, const int __R)
-{
-  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_si32 (__m128 __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_i32 (__m128 __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
-{
-  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
-}
-#else
-#define _mm_cvt_roundss_u32(A, B)   \
-    ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
-
-#define _mm_cvt_roundss_si32(A, B)   \
-    ((int)__builtin_ia32_vcvtss2si32(A, B))
-
-#define _mm_cvt_roundss_i32(A, B)   \
-    ((int)__builtin_ia32_vcvtss2si32(A, B))
-
-#define _mm_cvtt_roundss_u32(A, B)  \
-    ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
-
-#define _mm_cvtt_roundss_si32(A, B)  \
-    ((int)__builtin_ia32_vcvttss2si32(A, B))
-
-#define _mm_cvtt_roundss_i32(A, B)  \
-    ((int)__builtin_ia32_vcvttss2si32(A, B))
-#endif
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
-{
-  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
-{
-  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
-{
-  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
-}
-#else
-#define _mm_cvt_roundsd_u64(A, B)   \
-    ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
-
-#define _mm_cvt_roundsd_si64(A, B)   \
-    ((long long)__builtin_ia32_vcvtsd2si64(A, B))
-
-#define _mm_cvt_roundsd_i64(A, B)   \
-    ((long long)__builtin_ia32_vcvtsd2si64(A, B))
-
-#define _mm_cvtt_roundsd_u64(A, B)   \
-    ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
-
-#define _mm_cvtt_roundsd_si64(A, B)   \
-    ((long long)__builtin_ia32_vcvttsd2si64(A, B))
-
-#define _mm_cvtt_roundsd_i64(A, B)   \
-    ((long long)__builtin_ia32_vcvttsd2si64(A, B))
-#endif
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
-{
-  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
-{
-  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
-{
-  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
-}
-#else
-#define _mm_cvt_roundsd_u32(A, B)   \
-    ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
-
-#define _mm_cvt_roundsd_si32(A, B)   \
-    ((int)__builtin_ia32_vcvtsd2si32(A, B))
-
-#define _mm_cvt_roundsd_i32(A, B)   \
-    ((int)__builtin_ia32_vcvtsd2si32(A, B))
-
-#define _mm_cvtt_roundsd_u32(A, B)   \
-    ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
-
-#define _mm_cvtt_roundsd_si32(A, B)   \
-    ((int)__builtin_ia32_vcvttsd2si32(A, B))
-
-#define _mm_cvtt_roundsd_i32(A, B)   \
-    ((int)__builtin_ia32_vcvttsd2si32(A, B))
-#endif
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_movedup_pd (__m512d __A)
-{
-  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
-						   (__v8df)
-						   _mm512_undefined_pd (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
-						   (__v8df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpacklo_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpackhi_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpackhi_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundps_pd (__m256 __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
-			    const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundph_ps (__m256i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
-						    (__v16sf)
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
-			    const int __R)
-{
-  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
-						    (__v16sf) __W,
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U, __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundps_ph (__m512 __A, const int __I)
-{
-  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
-						     __I,
-						     (__v16hi)
-						     _mm256_undefined_si256 (),
-						     -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtps_ph (__m512 __A, const int __I)
-{
-  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
-						     __I,
-						     (__v16hi)
-						     _mm256_undefined_si256 (),
-						     -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
-			    const int __I)
-{
-  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
-						     __I,
-						     (__v16hi) __U,
-						     (__mmask16) __W);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
-{
-  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
-						     __I,
-						     (__v16hi) __U,
-						     (__mmask16) __W);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
-{
-  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
-						     __I,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __W);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
-{
-  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
-						     __I,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __W);
-}
-#else
-#define _mm512_cvt_roundps_pd(A, B)		 \
-    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
-
-#define _mm512_mask_cvt_roundps_pd(W, U, A, B)   \
-    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
-
-#define _mm512_maskz_cvt_roundps_pd(U, A, B)     \
-    (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
-
-#define _mm512_cvt_roundph_ps(A, B)		 \
-    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
-
-#define _mm512_mask_cvt_roundph_ps(W, U, A, B)   \
-    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
-
-#define _mm512_maskz_cvt_roundph_ps(U, A, B)     \
-    (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
-
-#define _mm512_cvt_roundps_ph(A, I)						 \
-  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
-    (__v16hi)_mm256_undefined_si256 (), -1))
-#define _mm512_cvtps_ph(A, I)						 \
-  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
-    (__v16hi)_mm256_undefined_si256 (), -1))
-#define _mm512_mask_cvt_roundps_ph(U, W, A, I)				 \
-  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
-    (__v16hi)(__m256i)(U), (__mmask16) (W)))
-#define _mm512_mask_cvtps_ph(U, W, A, I)				 \
-  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
-    (__v16hi)(__m256i)(U), (__mmask16) (W)))
-#define _mm512_maskz_cvt_roundps_ph(W, A, I)					 \
-  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
-    (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
-#define _mm512_maskz_cvtps_ph(W, A, I)					 \
-  ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
-    (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-						   (__v8sf)
-						   _mm256_undefined_ps (),
-						   (__mmask8) -1, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
-			    const int __R)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
-						 (__v2df) __B,
-						 __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
-						  (__v4sf) __B,
-						  __R);
-}
-#else
-#define _mm512_cvt_roundpd_ps(A, B)		 \
-    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
-
-#define _mm512_mask_cvt_roundpd_ps(W, U, A, B)   \
-    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
-
-#define _mm512_maskz_cvt_roundpd_ps(U, A, B)     \
-    (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
-
-#define _mm_cvt_roundsd_ss(A, B, C)		 \
-    (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
-
-#define _mm_cvt_roundss_sd(A, B, C)		 \
-    (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
-#endif
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_stream_si512 (__m512i * __P, __m512i __A)
-{
-  __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_stream_ps (float *__P, __m512 __A)
-{
-  __builtin_ia32_movntps512 (__P, (__v16sf) __A);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_stream_pd (double *__P, __m512d __A)
-{
-  __builtin_ia32_movntpd512 (__P, (__v8df) __A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_stream_load_si512 (void *__P)
-{
-  return __builtin_ia32_movntdqa512 ((__v8di *)__P);
-}
-
-/* Constants for mantissa extraction */
-typedef enum
-{
-  _MM_MANT_NORM_1_2,		/* interval [1, 2)      */
-  _MM_MANT_NORM_p5_2,		/* interval [0.5, 2)    */
-  _MM_MANT_NORM_p5_1,		/* interval [0.5, 1)    */
-  _MM_MANT_NORM_p75_1p5		/* interval [0.75, 1.5) */
-} _MM_MANTISSA_NORM_ENUM;
-
-typedef enum
-{
-  _MM_MANT_SIGN_src,		/* sign = sign(SRC)     */
-  _MM_MANT_SIGN_zero,		/* sign = 0             */
-  _MM_MANT_SIGN_nan		/* DEST = NaN if sign(SRC) = 1 */
-} _MM_MANTISSA_SIGN_ENUM;
-
-#ifdef __OPTIMIZE__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
-						    (__v4sf) __B,
-						    __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
-						     (__v2df) __B,
-						     __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_round_ps (__m512 __A, const int __R)
-{
-  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			     const int __R)
-{
-  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
-						   (__v16sf) __W,
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_round_pd (__m512d __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			     const int __R)
-{
-  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
-						     (__C << 2) | __B,
-						     _mm512_undefined_pd (),
-						     (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			      _MM_MANTISSA_NORM_ENUM __B,
-			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
-						     (__C << 2) | __B,
-						     (__v8df) __W, __U,
-						     __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
-			       _MM_MANTISSA_NORM_ENUM __B,
-			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
-						     (__C << 2) | __B,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
-						    (__C << 2) | __B,
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			      _MM_MANTISSA_NORM_ENUM __B,
-			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
-						    (__C << 2) | __B,
-						    (__v16sf) __W, __U,
-						    __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
-			       _MM_MANTISSA_NORM_ENUM __B,
-			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
-						    (__C << 2) | __B,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_round_sd (__m128d __A, __m128d __B,
-		      _MM_MANTISSA_NORM_ENUM __C,
-		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
-  return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
-						  (__v2df) __B,
-						  (__D << 2) | __C,
-						   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_round_ss (__m128 __A, __m128 __B,
-		      _MM_MANTISSA_NORM_ENUM __C,
-		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
-  return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
-						  (__v4sf) __B,
-						  (__D << 2) | __C,
-						  __R);
-}
-
-#else
-#define _mm512_getmant_round_pd(X, B, C, R)                                                  \
-  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
-                                              (int)(((C)<<2) | (B)),                \
-                                              (__v8df)(__m512d)_mm512_undefined_pd(), \
-                                              (__mmask8)-1,\
-					      (R)))
-
-#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R)                                       \
-  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
-                                              (int)(((C)<<2) | (B)),                \
-                                              (__v8df)(__m512d)(W),                 \
-                                              (__mmask8)(U),\
-					      (R)))
-
-#define _mm512_maskz_getmant_round_pd(U, X, B, C, R)                                         \
-  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
-                                              (int)(((C)<<2) | (B)),                \
-                                              (__v8df)(__m512d)_mm512_setzero_pd(), \
-                                              (__mmask8)(U),\
-					      (R)))
-#define _mm512_getmant_round_ps(X, B, C, R)                                                  \
-  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
-                                             (int)(((C)<<2) | (B)),                 \
-                                             (__v16sf)(__m512)_mm512_undefined_ps(), \
-                                             (__mmask16)-1,\
-					     (R)))
-
-#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R)                                       \
-  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
-                                             (int)(((C)<<2) | (B)),                 \
-                                             (__v16sf)(__m512)(W),                  \
-                                             (__mmask16)(U),\
-					     (R)))
-
-#define _mm512_maskz_getmant_round_ps(U, X, B, C, R)                                         \
-  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
-                                             (int)(((C)<<2) | (B)),                 \
-                                             (__v16sf)(__m512)_mm512_setzero_ps(),  \
-                                             (__mmask16)(U),\
-					     (R)))
-#define _mm_getmant_round_sd(X, Y, C, D, R)                                                  \
-  ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
-					    (__v2df)(__m128d)(Y),	\
-					    (int)(((D)<<2) | (C)),	\
-					    (R)))
-
-#define _mm_getmant_round_ss(X, Y, C, D, R)                                                  \
-  ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
-					   (__v4sf)(__m128)(Y),		\
-					   (int)(((D)<<2) | (C)),	\
-					   (R)))
-
-#define _mm_getexp_round_ss(A, B, R)						      \
-  ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
-
-#define _mm_getexp_round_sd(A, B, R)						       \
-  ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
-
-#define _mm512_getexp_round_ps(A, R)						\
-  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
-  (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
-
-#define _mm512_mask_getexp_round_ps(W, U, A, R)					\
-  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
-  (__v16sf)(__m512)(W), (__mmask16)(U), R))
-
-#define _mm512_maskz_getexp_round_ps(U, A, R)					\
-  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
-  (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
-
-#define _mm512_getexp_round_pd(A, R)						\
-  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
-  (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
-
-#define _mm512_mask_getexp_round_pd(W, U, A, R)					\
-  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
-  (__v8df)(__m512d)(W), (__mmask8)(U), R))
-
-#define _mm512_maskz_getexp_round_pd(U, A, R)					\
-  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
-  (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
-						  (__v16sf)
-						  _mm512_undefined_ps (),
-						  -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
-				 const int __imm, const int __R)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
-						  (__v16sf) __A,
-						  (__mmask16) __B, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
-				  const int __imm, const int __R)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
-						  __imm,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __A, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
-						   (__v8df)
-						   _mm512_undefined_pd (),
-						   -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
-				 __m512d __C, const int __imm, const int __R)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
-						   (__v8df) __A,
-						   (__mmask8) __B, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
-				  const int __imm, const int __R)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
-						   __imm,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __A, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
-{
-  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
-						   (__v4sf) __B, __imm, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
-			 const int __R)
-{
-  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
-						    (__v2df) __B, __imm, __R);
-}
-
-#else
-#define _mm512_roundscale_round_ps(A, B, R) \
-  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
-    (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
-#define _mm512_mask_roundscale_round_ps(A, B, C, D, R)				\
-  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
-					    (int)(D),			\
-					    (__v16sf)(__m512)(A),	\
-					    (__mmask16)(B), R))
-#define _mm512_maskz_roundscale_round_ps(A, B, C, R)				\
-  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
-					    (int)(C),			\
-					    (__v16sf)_mm512_setzero_ps(),\
-					    (__mmask16)(A), R))
-#define _mm512_roundscale_round_pd(A, B, R) \
-  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
-    (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
-#define _mm512_mask_roundscale_round_pd(A, B, C, D, R)				\
-  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
-					     (int)(D),			\
-					     (__v8df)(__m512d)(A),	\
-					     (__mmask8)(B), R))
-#define _mm512_maskz_roundscale_round_pd(A, B, C, R)				\
-  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
-					     (int)(C),			\
-					     (__v8df)_mm512_setzero_pd(),\
-					     (__mmask8)(A), R))
-#define _mm_roundscale_round_ss(A, B, C, R)					\
-  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C), R))
-#define _mm_roundscale_round_sd(A, B, C, R)					\
-  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C), R))
-#endif
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_floor_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
-						  _MM_FROUND_FLOOR,
-						  (__v16sf) __A, -1,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_floor_pd (__m512d __A)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
-						   _MM_FROUND_FLOOR,
-						   (__v8df) __A, -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ceil_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
-						  _MM_FROUND_CEIL,
-						  (__v16sf) __A, -1,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ceil_pd (__m512d __A)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
-						   _MM_FROUND_CEIL,
-						   (__v8df) __A, -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
-						  _MM_FROUND_FLOOR,
-						  (__v16sf) __W, __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
-						   _MM_FROUND_FLOOR,
-						   (__v8df) __W, __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
-						  _MM_FROUND_CEIL,
-						  (__v16sf) __W, __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
-						   _MM_FROUND_CEIL,
-						   (__v8df) __W, __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
-						  (__v16si) __B, __imm,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
-			  __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
-						  (__v16si) __B, __imm,
-						  (__v16si) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
-			   const int __imm)
-{
-  return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
-						  (__v16si) __B, __imm,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
-						  (__v8di) __B, __imm,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
-			  __m512i __B, const int __imm)
-{
-  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
-						  (__v8di) __B, __imm,
-						  (__v8di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
-			   const int __imm)
-{
-  return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
-						  (__v8di) __B, __imm,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-#else
-#define _mm512_alignr_epi32(X, Y, C)                                        \
-    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
-        (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
-        (__mmask16)-1))
-
-#define _mm512_mask_alignr_epi32(W, U, X, Y, C)                             \
-    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
-        (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W),             \
-        (__mmask16)(U)))
-
-#define _mm512_maskz_alignr_epi32(U, X, Y, C)                               \
-    ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X),         \
-        (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
-        (__mmask16)(U)))
-
-#define _mm512_alignr_epi64(X, Y, C)                                        \
-    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
-        (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (),  \
-	(__mmask8)-1))
-
-#define _mm512_mask_alignr_epi64(W, U, X, Y, C)                             \
-    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
-        (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
-
-#define _mm512_maskz_alignr_epi64(U, X, Y, C)                               \
-    ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X),          \
-        (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
-        (__mmask8)(U)))
-#endif
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
-						     (__v16si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
-						    (__v8di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
-						    (__v8di) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
-						     (__v16si) __B,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
-						     (__v16si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
-						    (__v8di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
-						    (__v8di) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 5,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 5,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 5,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 5,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 5,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 5,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 5,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 5,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 2,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 2,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 2,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 2,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 2,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 2,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 2,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 2,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 1,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 1,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 1,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 1,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 1,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 1,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 1,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 1,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 4,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 4,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 4,
-						    (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						    (__v16si) __Y, 4,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 4,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 4,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 4,
-						    (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						    (__v8di) __Y, 4,
-						    (__mmask8) -1);
-}
-
-#define _MM_CMPINT_EQ	    0x0
-#define _MM_CMPINT_LT	    0x1
-#define _MM_CMPINT_LE	    0x2
-#define _MM_CMPINT_UNUSED   0x3
-#define _MM_CMPINT_NE	    0x4
-#define _MM_CMPINT_NLT	    0x5
-#define _MM_CMPINT_GE	    0x5
-#define _MM_CMPINT_NLE	    0x6
-#define _MM_CMPINT_GT	    0x6
-
-#ifdef __OPTIMIZE__
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						 (__v8di) __Y, __P,
-						 (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						  (__v16si) __Y, __P,
-						  (__mmask16) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						  (__v8di) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						   (__v16si) __Y, __P,
-						   (__mmask16) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
-			  const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
-						  (__v8df) __Y, __P,
-						  (__mmask8) -1, __R);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
-{
-  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
-						   (__v16sf) __Y, __P,
-						   (__mmask16) -1, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
-			    const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
-						 (__v8di) __Y, __P,
-						 (__mmask8) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
-			    const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
-						  (__v16si) __Y, __P,
-						  (__mmask16) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
-			    const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
-						  (__v8di) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
-			    const int __P)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
-						   (__v16si) __Y, __P,
-						   (__mmask16) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
-			       const int __P, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
-						  (__v8df) __Y, __P,
-						  (__mmask8) __U, __R);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
-			       const int __P, const int __R)
-{
-  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
-						   (__v16sf) __Y, __P,
-						   (__mmask16) __U, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
-					       (__v2df) __Y, __P,
-					       (__mmask8) -1, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
-			    const int __P, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
-					       (__v2df) __Y, __P,
-					       (__mmask8) __M, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
-					       (__v4sf) __Y, __P,
-					       (__mmask8) -1, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
-			    const int __P, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
-					       (__v4sf) __Y, __P,
-					       (__mmask8) __M, __R);
-}
-
-#else
-#define _mm512_cmp_epi64_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
-					   (__v8di)(__m512i)(Y), (int)(P),\
-					   (__mmask8)-1))
-
-#define _mm512_cmp_epi32_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
-					   (__v16si)(__m512i)(Y), (int)(P),\
-					   (__mmask16)-1))
-
-#define _mm512_cmp_epu64_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
-					    (__v8di)(__m512i)(Y), (int)(P),\
-					    (__mmask8)-1))
-
-#define _mm512_cmp_epu32_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
-					    (__v16si)(__m512i)(Y), (int)(P),\
-					    (__mmask16)-1))
-
-#define _mm512_cmp_round_pd_mask(X, Y, P, R)					\
-  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
-					    (__v8df)(__m512d)(Y), (int)(P),\
-					    (__mmask8)-1, R))
-
-#define _mm512_cmp_round_ps_mask(X, Y, P, R)					\
-  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
-					     (__v16sf)(__m512)(Y), (int)(P),\
-					     (__mmask16)-1, R))
-
-#define _mm512_mask_cmp_epi64_mask(M, X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),	\
-					   (__v8di)(__m512i)(Y), (int)(P),\
-					   (__mmask8)M))
-
-#define _mm512_mask_cmp_epi32_mask(M, X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),	\
-					   (__v16si)(__m512i)(Y), (int)(P),\
-					   (__mmask16)M))
-
-#define _mm512_mask_cmp_epu64_mask(M, X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),	\
-					    (__v8di)(__m512i)(Y), (int)(P),\
-					    (__mmask8)M))
-
-#define _mm512_mask_cmp_epu32_mask(M, X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),	\
-					    (__v16si)(__m512i)(Y), (int)(P),\
-					    (__mmask16)M))
-
-#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)					\
-  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
-					    (__v8df)(__m512d)(Y), (int)(P),\
-					    (__mmask8)M, R))
-
-#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)					\
-  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
-					     (__v16sf)(__m512)(Y), (int)(P),\
-					     (__mmask16)M, R))
-
-#define _mm_cmp_round_sd_mask(X, Y, P, R)					\
-  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
-					 (__v2df)(__m128d)(Y), (int)(P),\
-					 (__mmask8)-1, R))
-
-#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)					\
-  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
-					 (__v2df)(__m128d)(Y), (int)(P),\
-					 (M), R))
-
-#define _mm_cmp_round_ss_mask(X, Y, P, R)					\
-  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
-					 (__v4sf)(__m128)(Y), (int)(P), \
-					 (__mmask8)-1, R))
-
-#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)					\
-  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
-					 (__v4sf)(__m128)(Y), (int)(P), \
-					 (M), R))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
-{
-  __m512 v1_old = _mm512_undefined_ps ();
-  __mmask16 mask = 0xFFFF;
-
-  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
-						__addr,
-						(__v16si) __index,
-						mask, __scale);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
-			  __m512i __index, float const *__addr, int __scale)
-{
-  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
-						__addr,
-						(__v16si) __index,
-						__mask, __scale);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
-{
-  __m512d v1_old = _mm512_undefined_pd ();
-  __mmask8 mask = 0xFF;
-
-  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
-						__addr,
-						(__v8si) __index, mask,
-						__scale);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
-			  __m256i __index, double const *__addr, int __scale)
-{
-  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
-						__addr,
-						(__v8si) __index,
-						__mask, __scale);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
-{
-  __m256 v1_old = _mm256_undefined_ps ();
-  __mmask8 mask = 0xFF;
-
-  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
-						__addr,
-						(__v8di) __index, mask,
-						__scale);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
-			  __m512i __index, float const *__addr, int __scale)
-{
-  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
-						__addr,
-						(__v8di) __index,
-						__mask, __scale);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
-{
-  __m512d v1_old = _mm512_undefined_pd ();
-  __mmask8 mask = 0xFF;
-
-  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
-						__addr,
-						(__v8di) __index, mask,
-						__scale);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
-			  __m512i __index, double const *__addr, int __scale)
-{
-  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
-						__addr,
-						(__v8di) __index,
-						__mask, __scale);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
-{
-  __m512i v1_old = _mm512_undefined_si512 ();
-  __mmask16 mask = 0xFFFF;
-
-  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
-						 __addr,
-						 (__v16si) __index,
-						 mask, __scale);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
-			     __m512i __index, int const *__addr, int __scale)
-{
-  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
-						 __addr,
-						 (__v16si) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
-{
-  __m512i v1_old = _mm512_undefined_si512 ();
-  __mmask8 mask = 0xFF;
-
-  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
-						__addr,
-						(__v8si) __index, mask,
-						__scale);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
-			     __m256i __index, long long const *__addr,
-			     int __scale)
-{
-  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
-						__addr,
-						(__v8si) __index,
-						__mask, __scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
-{
-  __m256i v1_old = _mm256_undefined_si256 ();
-  __mmask8 mask = 0xFF;
-
-  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
-						 __addr,
-						 (__v8di) __index,
-						 mask, __scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
-			     __m512i __index, int const *__addr, int __scale)
-{
-  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
-						 __addr,
-						 (__v8di) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
-{
-  __m512i v1_old = _mm512_undefined_si512 ();
-  __mmask8 mask = 0xFF;
-
-  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
-						__addr,
-						(__v8di) __index, mask,
-						__scale);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
-			     __m512i __index, long long const *__addr,
-			     int __scale)
-{
-  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
-						__addr,
-						(__v8di) __index,
-						__mask, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
-{
-  __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
-				 (__v16si) __index, (__v16sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
-			   __m512i __index, __m512 __v1, int __scale)
-{
-  __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
-				 (__v16sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
-		      int __scale)
-{
-  __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
-				(__v8si) __index, (__v8df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
-			   __m256i __index, __m512d __v1, int __scale)
-{
-  __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
-				(__v8df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
-{
-  __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
-				 (__v8di) __index, (__v8sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
-			   __m512i __index, __m256 __v1, int __scale)
-{
-  __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
-				 (__v8sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
-		      int __scale)
-{
-  __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
-				(__v8di) __index, (__v8df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
-			   __m512i __index, __m512d __v1, int __scale)
-{
-  __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
-				(__v8df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32scatter_epi32 (int *__addr, __m512i __index,
-			 __m512i __v1, int __scale)
-{
-  __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
-				 (__v16si) __index, (__v16si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
-			      __m512i __index, __m512i __v1, int __scale)
-{
-  __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
-				 (__v16si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
-			 __m512i __v1, int __scale)
-{
-  __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
-				(__v8si) __index, (__v8di) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
-			      __m256i __index, __m512i __v1, int __scale)
-{
-  __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
-				(__v8di) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64scatter_epi32 (int *__addr, __m512i __index,
-			 __m256i __v1, int __scale)
-{
-  __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
-				 (__v8di) __index, (__v8si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
-			      __m512i __index, __m256i __v1, int __scale)
-{
-  __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
-				 (__v8si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
-			 __m512i __v1, int __scale)
-{
-  __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
-				(__v8di) __index, (__v8di) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
-			      __m512i __index, __m512i __v1, int __scale)
-{
-  __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
-				(__v8di) __v1, __scale);
-}
-#else
-#define _mm512_i32gather_ps(INDEX, ADDR, SCALE)				\
-  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
-					 (float const *)ADDR,		\
-					 (__v16si)(__m512i)INDEX,	\
-					 (__mmask16)0xFFFF, (int)SCALE)
-
-#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,	\
-					 (float const *)ADDR,		\
-					 (__v16si)(__m512i)INDEX,	\
-					 (__mmask16)MASK, (int)SCALE)
-
-#define _mm512_i32gather_pd(INDEX, ADDR, SCALE)				\
-  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(),	\
-					 (double const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
-
-#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,	\
-					 (double const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm512_i64gather_ps(INDEX, ADDR, SCALE)				\
-  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(),	\
-					 (float const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
-
-#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,		\
-					 (float const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm512_i64gather_pd(INDEX, ADDR, SCALE)				\
-  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(),	\
-					 (double const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
-
-#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,	\
-					 (double const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)			\
-  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (),	\
-					  (int const *)ADDR,		\
-					  (__v16si)(__m512i)INDEX,	\
-					  (__mmask16)0xFFFF, (int)SCALE)
-
-#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,	\
-					  (int const *)ADDR,		\
-					  (__v16si)(__m512i)INDEX,	\
-					  (__mmask16)MASK, (int)SCALE)
-
-#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)			\
-  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (),	\
-					 (long long const *)ADDR,	\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
-
-#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,	\
-					 (long long const *)ADDR,	\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)			  \
-  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
-					  (int const *)ADDR,		  \
-					  (__v8di)(__m512i)INDEX,	  \
-					  (__mmask8)0xFF, (int)SCALE)
-
-#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,	\
-					  (int const *)ADDR,		\
-					  (__v8di)(__m512i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)			\
-  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (),	\
-					 (long long const *)ADDR,	\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
-
-#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,	\
-					 (long long const *)ADDR,	\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF,	\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16sf)(__m512)V1, (int)SCALE)
-
-#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK,		\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16sf)(__m512)V1, (int)SCALE)
-
-#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
-
-#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
-
-#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8sf)(__m256)V1, (int)SCALE)
-
-#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8sf)(__m256)V1, (int)SCALE)
-
-#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF,		\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
-
-#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK,		\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
-
-#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF,	\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16si)(__m512i)V1, (int)SCALE)
-
-#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK,		\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16si)(__m512i)V1, (int)SCALE)
-
-#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
-
-#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK,	\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
-
-#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8si)(__m256i)V1, (int)SCALE)
-
-#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8si)(__m256i)V1, (int)SCALE)
-
-#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF,	\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
-
-#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK,	\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
-#endif
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
-						      (__v8df) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
-						      (__v8df)
-						      _mm512_setzero_pd (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
-{
-  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
-						     (__v16sf) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
-						     (__v16sf)
-						     _mm512_setzero_ps (),
-						     (__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
-{
-  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
-						      (__v8di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
-						      (__v8di)
-						      _mm512_setzero_si512 (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
-{
-  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
-						      (__v16si) __W,
-						      (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
-						      (__v16si)
-						      _mm512_setzero_si512 (),
-						      (__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
-{
-  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
-{
-  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
-							(__v8df) __W,
-							(__mmask8) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
-{
-  return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
-							 (__v8df)
-							 _mm512_setzero_pd (),
-							 (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
-						   (__v16sf) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
-						       (__v16sf) __W,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
-							(__v16sf)
-							_mm512_setzero_ps (),
-							(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
-						     (__v8di)
-						     _mm512_setzero_si512 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
-							(__v8di) __W,
-							(__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m512i)
-	 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
-					       (__v8di)
-					       _mm512_setzero_si512 (),
-					       (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
-							(__v16si) __W,
-							(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
-{
-  return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
-							 (__v16si)
-							 _mm512_setzero_si512
-							 (), (__mmask16) __U);
-}
-
-/* Mask arithmetic operations */
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kand (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kandn (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kor (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kortestz (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
-						(__mmask16) __B);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kortestc (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
-						(__mmask16) __B);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kxnor (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kxor (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_knot (__mmask16 __A)
-{
-  return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
-			  const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
-						    (__v4si) __D,
-						    __imm,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    __B);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
-			  const int __imm)
-{
-  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
-						   (__v4sf) __D,
-						   __imm,
-						   (__v16sf)
-						   _mm512_setzero_ps (), __B);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
-			 __m128i __D, const int __imm)
-{
-  return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
-						    (__v4si) __D,
-						    __imm,
-						    (__v16si) __A,
-						    __B);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
-			 __m128 __D, const int __imm)
-{
-  return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
-						   (__v4sf) __D,
-						   __imm,
-						   (__v16sf) __A, __B);
-}
-#else
-#define _mm512_maskz_insertf32x4(A, X, Y, C)                            \
-  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
-    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(),      \
-    (__mmask8)(A)))
-
-#define _mm512_maskz_inserti32x4(A, X, Y, C)                            \
-  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
-    (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (),     \
-    (__mmask8)(A)))
-
-#define _mm512_mask_insertf32x4(A, B, X, Y, C)                          \
-  ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X),     \
-    (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A),             \
-					     (__mmask8)(B)))
-
-#define _mm512_mask_inserti32x4(A, B, X, Y, C)                          \
-  ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X),   \
-    (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A),           \
-					      (__mmask8)(B)))
-#endif
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epi64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epu64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epu64 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_undefined_si512 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
-						  (__v8di) __B,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epi32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_epu32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W, __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_epu32 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_undefined_si512 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si)
-						  _mm512_setzero_si512 (),
-						  __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
-						  (__v16si) __B,
-						  (__v16si) __W, __M);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_unpacklo_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
-					       (__v2df) __B,
-					       __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
-					      (__v4sf) __B,
-					      __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
-					       (__v2df) __B,
-					       __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
-					      (__v4sf) __B,
-					      __R);
-}
-
-#else
-#define _mm_max_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_addsd_round(A, B, C)
-
-#define _mm_max_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_addss_round(A, B, C)
-
-#define _mm_min_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_subsd_round(A, B, C)
-
-#define _mm_min_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_subss_round(A, B, C)
-#endif
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
-{
-  return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
-						     (__v8df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
-{
-  return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
-						    (__v16sf) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
-{
-  return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
-						    (__v8di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
-{
-  return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
-						   (__v2df) __A,
-						   (__v2df) __B,
-						   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
-						  (__v4sf) __A,
-						  (__v4sf) __B,
-						  __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
-						   (__v2df) __A,
-						   -(__v2df) __B,
-						   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
-						  (__v4sf) __A,
-						  -(__v4sf) __B,
-						  __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
-						   -(__v2df) __A,
-						   (__v2df) __B,
-						   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
-						  -(__v4sf) __A,
-						  (__v4sf) __B,
-						  __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
-						   -(__v2df) __A,
-						   -(__v2df) __B,
-						   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
-  return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
-						  -(__v4sf) __A,
-						  -(__v4sf) __B,
-						  __R);
-}
-#else
-#define _mm_fmadd_round_sd(A, B, C, R)            \
-    (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
-
-#define _mm_fmadd_round_ss(A, B, C, R)            \
-    (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
-
-#define _mm_fmsub_round_sd(A, B, C, R)            \
-    (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
-
-#define _mm_fmsub_round_ss(A, B, C, R)            \
-    (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
-
-#define _mm_fnmadd_round_sd(A, B, C, R)            \
-    (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
-
-#define _mm_fnmadd_round_ss(A, B, C, R)            \
-   (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
-
-#define _mm_fnmsub_round_sd(A, B, C, R)            \
-    (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
-
-#define _mm_fnmsub_round_ss(A, B, C, R)            \
-    (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
-{
-  return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
-{
-  return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
-}
-#else
-#define _mm_comi_round_ss(A, B, C, D)\
-__builtin_ia32_vcomiss(A, B, C, D)
-#define _mm_comi_round_sd(A, B, C, D)\
-__builtin_ia32_vcomisd(A, B, C, D)
-#endif
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_pd (__m512d __A)
-{
-  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
-						  (__v8df)
-						  _mm512_undefined_pd (),
-						  (__mmask8) -1,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
-						  (__v8df) __W,
-						  (__mmask8) __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
-						  (__v8df)
-						  _mm512_setzero_pd (),
-						  (__mmask8) __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
-						 (__v16sf)
-						 _mm512_undefined_ps (),
-						 (__mmask16) -1,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
-						 (__v16sf) __W,
-						 (__mmask16) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
-						 (__v16sf)
-						 _mm512_setzero_ps (),
-						 (__mmask16) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) ((__v8df)__A + (__v8df)__B);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) ((__v16sf)__A + (__v16sf)__B);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) ((__v8df)__A - (__v8df)__B);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) ((__v16sf)__A - (__v16sf)__B);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) ((__v8df)__A * (__v8df)__B);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) ((__v16sf)__A * (__v16sf)__B);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_pd (__m512d __M, __m512d __V)
-{
-  return (__m512d) ((__v8df)__M / (__v8df)__V);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
-{
-  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
-						 (__v8df) __V,
-						 (__v8df) __W,
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
-{
-  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
-						 (__v8df) __V,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) ((__v16sf)__A / (__v16sf)__B);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_undefined_pd (),
-						 (__mmask8) -1,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df) __W,
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
-						 (__v8df) __B,
-						 (__v8df)
-						 _mm512_setzero_pd (),
-						 (__mmask8) __U,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_undefined_ps (),
-						(__mmask16) -1,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf) __W,
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
-						(__v16sf) __B,
-						(__v16sf)
-						_mm512_setzero_ps (),
-						(__mmask16) __U,
-						_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_pd (__m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
-  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_ps (__m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __W,
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
-  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
-						  (__v2df) __B,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
-						 (__v4sf) __B,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __C,
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __C,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __C,
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __C,
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    -(__v8df) __C,
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-						    (__v8df) __B,
-						    -(__v8df) __C,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-						     (__v8df) __B,
-						     -(__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   -(__v16sf) __C,
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-						   (__v16sf) __B,
-						   -(__v16sf) __C,
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-						    (__v16sf) __B,
-						    -(__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       (__v8df) __C,
-						       (__mmask8) -1,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       (__v8df) __C,
-						       (__mmask8) __U,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
-							(__v8df) __B,
-							(__v8df) __C,
-							(__mmask8) __U,
-							_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
-							(__v8df) __B,
-							(__v8df) __C,
-							(__mmask8) __U,
-							_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      (__v16sf) __C,
-						      (__mmask16) -1,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      (__v16sf) __C,
-						      (__mmask16) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
-						       (__v16sf) __B,
-						       (__v16sf) __C,
-						       (__mmask16) __U,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
-						       (__v16sf) __B,
-						       (__v16sf) __C,
-						       (__mmask16) __U,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       -(__v8df) __C,
-						       (__mmask8) -1,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
-						       (__v8df) __B,
-						       -(__v8df) __C,
-						       (__mmask8) __U,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
-							(__v8df) __B,
-							(__v8df) __C,
-							(__mmask8) __U,
-							_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
-							(__v8df) __B,
-							-(__v8df) __C,
-							(__mmask8) __U,
-							_MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      -(__v16sf) __C,
-						      (__mmask16) -1,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
-						      (__v16sf) __B,
-						      -(__v16sf) __C,
-						      (__mmask16) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
-						       (__v16sf) __B,
-						       (__v16sf) __C,
-						       (__mmask16) __U,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
-						       (__v16sf) __B,
-						       -(__v16sf) __C,
-						       (__mmask16) __U,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
-						    (__v8df) __B,
-						    (__v8df) __C,
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
-						   (__v16sf) __B,
-						   (__v16sf) __C,
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
-						    (__v8df) __B,
-						    -(__v8df) __C,
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
-						     (__v8df) __B,
-						     (__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
-						      (__v8df) __B,
-						      (__v8df) __C,
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-						     (__v8df) __B,
-						     -(__v8df) __C,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
-						   (__v16sf) __B,
-						   -(__v16sf) __C,
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
-						    (__v16sf) __B,
-						    (__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
-						     (__v16sf) __B,
-						     (__v16sf) __C,
-						     (__mmask16) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-						    (__v16sf) __B,
-						    -(__v16sf) __C,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttpd_epi32 (__m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_undefined_si256 (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttpd_epu32 (__m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
-						      (__v8si)
-						      _mm256_undefined_si256 (),
-						      (__mmask8) -1,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
-						      (__v8si) __W,
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtpd_epi32 (__m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
-						    (__v8si)
-						    _mm256_undefined_si256 (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtpd_epu32 (__m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_undefined_si256 (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttps_epi32 (__m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_undefined_si512 (),
-						     (__mmask16) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
-						     (__v16si) __W,
-						     (__mmask16) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvttps_epu32 (__m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_undefined_si512 (),
-						      (__mmask16) -1,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
-						      (__v16si) __W,
-						      (__mmask16) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_setzero_si512 (),
-						      (__mmask16) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtps_epi32 (__m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
-						    (__v16si)
-						    _mm512_undefined_si512 (),
-						    (__mmask16) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
-						    (__v16si) __W,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
-						    (__v16si)
-						    _mm512_setzero_si512 (),
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtps_epu32 (__m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_undefined_si512 (),
-						     (__mmask16) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-						     (__v16si) __W,
-						     (__mmask16) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-						     (__v16si)
-						     _mm512_setzero_si512 (),
-						     (__mmask16) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __x86_64__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
-{
-  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
-					      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
-{
-  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
-					       _MM_FROUND_CUR_DIRECTION);
-}
-#endif
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu32_ss (__m128 __A, unsigned __B)
-{
-  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
-					      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepi32_ps (__m512i __A)
-{
-  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
-						   (__v16sf) __W,
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
-{
-  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtepu32_ps (__m512i __A)
-{
-  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
-						    (__v16sf)
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
-						    (__v16sf) __W,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
-{
-  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
-{
-  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
-						      (__v8df) __B,
-						      (__v8di) __C,
-						      __imm,
-						      (__mmask8) -1,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
-			 __m512i __C, const int __imm)
-{
-  return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
-						      (__v8df) __B,
-						      (__v8di) __C,
-						      __imm,
-						      (__mmask8) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			  __m512i __C, const int __imm)
-{
-  return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
-						       (__v8df) __B,
-						       (__v8di) __C,
-						       __imm,
-						       (__mmask8) __U,
-						       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
-{
-  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
-						     (__v16sf) __B,
-						     (__v16si) __C,
-						     __imm,
-						     (__mmask16) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
-			 __m512i __C, const int __imm)
-{
-  return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
-						     (__v16sf) __B,
-						     (__v16si) __C,
-						     __imm,
-						     (__mmask16) __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			  __m512i __C, const int __imm)
-{
-  return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
-						      (__v16sf) __B,
-						      (__v16si) __C,
-						      __imm,
-						      (__mmask16) __U,
-						      _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
-{
-  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
-						   (__v2df) __B,
-						   (__v2di) __C, __imm,
-						   (__mmask8) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
-		      __m128i __C, const int __imm)
-{
-  return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
-						   (__v2df) __B,
-						   (__v2di) __C, __imm,
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
-		       __m128i __C, const int __imm)
-{
-  return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2di) __C,
-						    __imm,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
-{
-  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
-						  (__v4sf) __B,
-						  (__v4si) __C, __imm,
-						  (__mmask8) -1,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
-		      __m128i __C, const int __imm)
-{
-  return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
-						  (__v4sf) __B,
-						  (__v4si) __C, __imm,
-						  (__mmask8) __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
-		       __m128i __C, const int __imm)
-{
-  return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4si) __C, __imm,
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-#else
-#define _mm512_fixupimm_pd(X, Y, Z, C)					\
-  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),	\
-      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),		\
-      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C)                          \
-  ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X),    \
-      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
-      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C)                         \
-  ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X),   \
-      (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C),             \
-      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_fixupimm_ps(X, Y, Z, C)					\
-  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),	\
-    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),		\
-    (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C)                          \
-  ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X),     \
-    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
-    (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C)                         \
-  ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X),    \
-    (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C),              \
-    (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_fixupimm_sd(X, Y, Z, C)					\
-    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
-      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_fixupimm_sd(X, U, Y, Z, C)				\
-    ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X),	\
-      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C)				\
-    ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X),	\
-      (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_fixupimm_ss(X, Y, Z, C)					\
-    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
-      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_fixupimm_ss(X, U, Y, Z, C)				\
-    ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X),	\
-      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C)				\
-    ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X),	\
-      (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C),		\
-      (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-#endif
-
-#ifdef __x86_64__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_u64 (__m128 __A)
-{
-  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
-							   __A,
-							   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_u64 (__m128 __A)
-{
-  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
-							    __A,
-							    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_i64 (__m128 __A)
-{
-  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-#endif /* __x86_64__ */
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_u32 (__m128 __A)
-{
-  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_u32 (__m128 __A)
-{
-  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_i32 (__m128 __A)
-{
-  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
-					    _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __x86_64__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_u64 (__m128d __A)
-{
-  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
-							   __A,
-							   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_u64 (__m128d __A)
-{
-  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
-							    __A,
-							    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_i64 (__m128d __A)
-{
-  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-#endif /* __x86_64__ */
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_u32 (__m128d __A)
-{
-  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
-						 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_u32 (__m128d __A)
-{
-  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_i32 (__m128d __A)
-{
-  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
-					    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtps_pd (__m256 __A)
-{
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
-{
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
-{
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtph_ps (__m256i __A)
-{
-  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
-						    (__v16sf)
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
-{
-  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
-						    (__v16sf) __W,
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
-{
-  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    (__mmask16) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtpd_ps (__m512d __A)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-						   (__v8sf)
-						   _mm256_undefined_ps (),
-						   (__mmask8) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_ps (__m512 __A)
-{
-  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_undefined_ps (),
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
-						   (__v16sf) __W,
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
-						   (__v16sf)
-						   _mm512_setzero_ps (),
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_pd (__m512d __A)
-{
-  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
-						    (__v8df)
-						    _mm512_undefined_pd (),
-						    (__mmask8) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
-						    (__v8df) __W,
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
-						    (__v8df)
-						    _mm512_setzero_pd (),
-						    (__mmask8) __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
-						    (__v4sf) __B,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
-						     (__v2df) __B,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
-		   _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
-						     (__C << 2) | __B,
-						     _mm512_undefined_pd (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
-						     (__C << 2) | __B,
-						     (__v8df) __W, __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
-			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
-						     (__C << 2) | __B,
-						     (__v8df)
-						     _mm512_setzero_pd (),
-						     __U,
-						     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
-		   _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
-						    (__C << 2) | __B,
-						    _mm512_undefined_ps (),
-						    (__mmask16) -1,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			_MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
-						    (__C << 2) | __B,
-						    (__v16sf) __W, __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
-			 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
-						    (__C << 2) | __B,
-						    (__v16sf)
-						    _mm512_setzero_ps (),
-						    __U,
-						    _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
-		_MM_MANTISSA_SIGN_ENUM __D)
-{
-  return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
-						   (__v2df) __B,
-						   (__D << 2) | __C,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
-		_MM_MANTISSA_SIGN_ENUM __D)
-{
-  return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
-						  (__v4sf) __B,
-						  (__D << 2) | __C,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-#else
-#define _mm512_getmant_pd(X, B, C)                                                  \
-  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
-                                              (int)(((C)<<2) | (B)),                \
-                                              (__v8df)_mm512_undefined_pd(),        \
-                                              (__mmask8)-1,\
-					      _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_getmant_pd(W, U, X, B, C)                                       \
-  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
-                                              (int)(((C)<<2) | (B)),                \
-                                              (__v8df)(__m512d)(W),                 \
-                                              (__mmask8)(U),\
-					      _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_getmant_pd(U, X, B, C)                                         \
-  ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X),                 \
-                                              (int)(((C)<<2) | (B)),                \
-                                              (__v8df)_mm512_setzero_pd(),          \
-                                              (__mmask8)(U),\
-					      _MM_FROUND_CUR_DIRECTION))
-#define _mm512_getmant_ps(X, B, C)                                                  \
-  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
-                                             (int)(((C)<<2) | (B)),                 \
-                                             (__v16sf)_mm512_undefined_ps(),        \
-                                             (__mmask16)-1,\
-					     _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_getmant_ps(W, U, X, B, C)                                       \
-  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
-                                             (int)(((C)<<2) | (B)),                 \
-                                             (__v16sf)(__m512)(W),                  \
-                                             (__mmask16)(U),\
-					     _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_getmant_ps(U, X, B, C)                                         \
-  ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X),                  \
-                                             (int)(((C)<<2) | (B)),                 \
-                                             (__v16sf)_mm512_setzero_ps(),          \
-                                             (__mmask16)(U),\
-					     _MM_FROUND_CUR_DIRECTION))
-#define _mm_getmant_sd(X, Y, C, D)                                                  \
-  ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X),                    \
-                                           (__v2df)(__m128d)(Y),                    \
-                                           (int)(((D)<<2) | (C)),                   \
-					   _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_getmant_ss(X, Y, C, D)                                                  \
-  ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X),                      \
-                                          (__v4sf)(__m128)(Y),                      \
-                                          (int)(((D)<<2) | (C)),                    \
-					  _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_getexp_ss(A, B)						      \
-  ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B),  \
-					   _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_getexp_sd(A, B)						       \
-  ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
-					    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_getexp_ps(A)						\
-  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
-  (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_getexp_ps(W, U, A)					\
-  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
-  (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_getexp_ps(U, A)					\
-  ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A),		\
-  (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_getexp_pd(A)						\
-  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
-  (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_getexp_pd(W, U, A)					\
-  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
-  (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_getexp_pd(U, A)					\
-  ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A),		\
-  (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_ps (__m512 __A, const int __imm)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
-						  (__v16sf)
-						  _mm512_undefined_ps (),
-						  -1,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
-			   const int __imm)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
-						  (__v16sf) __A,
-						  (__mmask16) __B,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
-						  __imm,
-						  (__v16sf)
-						  _mm512_setzero_ps (),
-						  (__mmask16) __A,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_pd (__m512d __A, const int __imm)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
-						   (__v8df)
-						   _mm512_undefined_pd (),
-						   -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
-			   const int __imm)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
-						   (__v8df) __A,
-						   (__mmask8) __B,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
-						   __imm,
-						   (__v8df)
-						   _mm512_setzero_pd (),
-						   (__mmask8) __A,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
-{
-  return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
-						   (__v4sf) __B, __imm,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
-{
-  return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
-						    (__v2df) __B, __imm,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-#else
-#define _mm512_roundscale_ps(A, B) \
-  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
-    (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
-#define _mm512_mask_roundscale_ps(A, B, C, D)				\
-  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C),	\
-					    (int)(D),			\
-					    (__v16sf)(__m512)(A),	\
-					    (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
-#define _mm512_maskz_roundscale_ps(A, B, C)				\
-  ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B),	\
-					    (int)(C),			\
-					    (__v16sf)_mm512_setzero_ps(),\
-					    (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
-#define _mm512_roundscale_pd(A, B) \
-  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
-    (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
-#define _mm512_mask_roundscale_pd(A, B, C, D)				\
-  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C),	\
-					     (int)(D),			\
-					     (__v8df)(__m512d)(A),	\
-					     (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
-#define _mm512_maskz_roundscale_pd(A, B, C)				\
-  ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B),	\
-					     (int)(C),			\
-					     (__v8df)_mm512_setzero_pd(),\
-					     (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
-#define _mm_roundscale_ss(A, B, C)					\
-  ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A),	\
-  (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
-#define _mm_roundscale_sd(A, B, C)					\
-  ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
-						  (__v8df) __Y, __P,
-						  (__mmask8) -1,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
-						   (__v16sf) __Y, __P,
-						   (__mmask16) -1,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
-						   (__v16sf) __Y, __P,
-						   (__mmask16) __U,
-						   _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
-						  (__v8df) __Y, __P,
-						  (__mmask8) __U,
-						  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
-					       (__v2df) __Y, __P,
-					       (__mmask8) -1,
-					       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
-					       (__v2df) __Y, __P,
-					       (__mmask8) __M,
-					       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
-					       (__v4sf) __Y, __P,
-					       (__mmask8) -1,
-					       _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
-					       (__v4sf) __Y, __P,
-					       (__mmask8) __M,
-					       _MM_FROUND_CUR_DIRECTION);
-}
-
-#else
-#define _mm512_cmp_pd_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
-					    (__v8df)(__m512d)(Y), (int)(P),\
-					    (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_cmp_ps_mask(X, Y, P)					\
-  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
-					     (__v16sf)(__m512)(Y), (int)(P),\
-					     (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_cmp_pd_mask(M, X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),	\
-					    (__v8df)(__m512d)(Y), (int)(P),\
-					    (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_cmp_ps_mask(M, X, Y, P)					\
-  ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),	\
-					     (__v16sf)(__m512)(Y), (int)(P),\
-					     (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm_cmp_sd_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
-					 (__v2df)(__m128d)(Y), (int)(P),\
-					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_cmp_sd_mask(M, X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),		\
-					 (__v2df)(__m128d)(Y), (int)(P),\
-					 M,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm_cmp_ss_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
-					 (__v4sf)(__m128)(Y), (int)(P), \
-					 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_cmp_ss_mask(M, X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),		\
-					 (__v4sf)(__m128)(Y), (int)(P), \
-					 M,_MM_FROUND_CUR_DIRECTION))
-#endif
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kmov (__mmask16 __A)
-{
-  return __builtin_ia32_kmov16 (__A);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castpd_ps (__m512d __A)
-{
-  return (__m512) (__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castpd_si512 (__m512d __A)
-{
-  return (__m512i) (__A);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castps_pd (__m512 __A)
-{
-  return (__m512d) (__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castps_si512 (__m512 __A)
-{
-  return (__m512i) (__A);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castsi512_ps (__m512i __A)
-{
-  return (__m512) (__A);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castsi512_pd (__m512i __A)
-{
-  return (__m512d) (__A);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castpd512_pd128 (__m512d __A)
-{
-  return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castps512_ps128 (__m512 __A)
-{
-  return _mm512_extractf32x4_ps(__A, 0);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castsi512_si128 (__m512i __A)
-{
-  return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castpd512_pd256 (__m512d __A)
-{
-  return _mm512_extractf64x4_pd(__A, 0);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castps512_ps256 (__m512 __A)
-{
-  return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castsi512_si256 (__m512i __A)
-{
-  return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castpd128_pd512 (__m128d __A)
-{
-  return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castps128_ps512 (__m128 __A)
-{
-  return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castsi128_si512 (__m128i __A)
-{
-  return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castpd256_pd512 (__m256d __A)
-{
-  return __builtin_ia32_pd512_256pd (__A);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castps256_ps512 (__m256 __A)
-{
-  return __builtin_ia32_ps512_256ps (__A);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castsi256_si512 (__m256i __A)
-{
-  return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
-						     (__v16si) __B, 0,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
-						     (__v16si) __B, 0, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
-						    (__v8di) __B, 0, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
-						    (__v8di) __B, 0,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
-						     (__v16si) __B, 6,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
-						     (__v16si) __B, 6,  __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
-						    (__v8di) __B, 6, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
-						    (__v8di) __B, 6,
-						    (__mmask8) -1);
-}
-
-#ifdef __DISABLE_AVX512F__
-#undef __DISABLE_AVX512F__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F__ */
-
-#endif /* _AVX512FINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512ifmaintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512ifmaintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,104 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512IFMAINTRIN_H_INCLUDED
-#define _AVX512IFMAINTRIN_H_INCLUDED
-
-#ifndef __AVX512IFMA__
-#pragma GCC push_options
-#pragma GCC target("avx512ifma")
-#define __DISABLE_AVX512IFMA__
-#endif /* __AVX512IFMA__ */
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
-						       (__v8di) __Y,
-						       (__v8di) __Z,
-						       (__mmask8) - 1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
-						       (__v8di) __Y,
-						       (__v8di) __Z,
-						       (__mmask8) - 1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
-			    __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
-						       (__v8di) __X,
-						       (__v8di) __Y,
-						       (__mmask8) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
-			    __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
-						       (__v8di) __X,
-						       (__v8di) __Y,
-						       (__mmask8) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
-							(__v8di) __Y,
-							(__v8di) __Z,
-							(__mmask8) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
-							(__v8di) __Y,
-							(__v8di) __Z,
-							(__mmask8) __M);
-}
-
-#ifdef __DISABLE_AVX512IFMA__
-#undef __DISABLE_AVX512IFMA__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512IFMA__ */
-
-#endif /* _AVX512IFMAINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512ifmavlintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512ifmavlintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,164 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
-#define _AVX512IFMAVLINTRIN_H_INCLUDED
-
-#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
-#pragma GCC push_options
-#pragma GCC target("avx512ifma,avx512vl")
-#define __DISABLE_AVX512IFMAVL__
-#endif /* __AVX512IFMAVL__ */
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
-						       (__v2di) __Y,
-						       (__v2di) __Z,
-						       (__mmask8) - 1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
-						       (__v2di) __Y,
-						       (__v2di) __Z,
-						       (__mmask8) - 1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
-						       (__v4di) __Y,
-						       (__v4di) __Z,
-						       (__mmask8) - 1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
-						       (__v4di) __Y,
-						       (__v4di) __Z,
-						       (__mmask8) - 1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
-						       (__v2di) __X,
-						       (__v2di) __Y,
-						       (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
-						       (__v2di) __X,
-						       (__v2di) __Y,
-						       (__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
-			    __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
-						       (__v4di) __X,
-						       (__v4di) __Y,
-						       (__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
-			    __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
-						       (__v4di) __X,
-						       (__v4di) __Y,
-						       (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
-							(__v2di) __Y,
-							(__v2di) __Z,
-							(__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
-							(__v2di) __Y,
-							(__v2di) __Z,
-							(__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
-							(__v4di) __Y,
-							(__v4di) __Z,
-							(__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
-							(__v4di) __Y,
-							(__v4di) __Z,
-							(__mmask8) __M);
-}
-
-#ifdef __DISABLE_AVX512IFMAVL__
-#undef __DISABLE_AVX512IFMAVL__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512IFMAVL__ */
-
-#endif /* _AVX512IFMAVLINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512pfintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512pfintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,212 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512PFINTRIN_H_INCLUDED
-#define _AVX512PFINTRIN_H_INCLUDED
-
-#ifndef __AVX512PF__
-#pragma GCC push_options
-#pragma GCC target("avx512pf")
-#define __DISABLE_AVX512PF__
-#endif /* __AVX512PF__ */
-
-/* Internal data types for implementing the intrinsics.  */
-typedef long long __v8di __attribute__ ((__vector_size__ (64)));
-typedef int __v16si __attribute__ ((__vector_size__ (64)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
-
-typedef unsigned char  __mmask8;
-typedef unsigned short __mmask16;
-
-#ifdef __OPTIMIZE__
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
-				   void *addr, int scale, int hint)
-{
-  __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
-			      scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
-				   void *addr, int scale, int hint)
-{
-  __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
-			      scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
-				   void *addr, int scale, int hint)
-{
-  __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
-			      scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
-				   void *addr, int scale, int hint)
-{
-  __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
-			      scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
-			       int hint)
-{
-  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index, 
-			       (long long const *)addr, scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
-			       int hint)
-{
-  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const *) addr,
-			       scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask,
-				    __m256i index, int scale, int hint)
-{
-  __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr,
-			       scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask,
-				    __m512i index, int scale, int hint)
-{
-  __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr,
-			       scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale,
-			       int hint)
-{
-  __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long const *) addr,
-			       scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale,
-			       int hint)
-{
-  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *) addr,
-			       scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask,
-				    __m512i index, int scale, int hint)
-{
-  __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr,
-			       scale, hint);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask,
-				    __m512i index, int scale, int hint)
-{
-  __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr,
-			       scale, hint);
-}
-
-#else
-#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,	     \
-			      (long long const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
-			      (int const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			      (long long const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			      (int const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
-			       (long long const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,   \
-			       (int const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
-			       (long long const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,     \
-			       (int const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
-			       (long long const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
-			       (int const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			       (long long const *)ADDR, (int)SCALE, (int)HINT)
-
-#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			       (int const *)ADDR, (int)SCALE, (int)HINT)
-#endif
-
-#ifdef __DISABLE_AVX512PF__
-#undef __DISABLE_AVX512PF__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512PF__ */
-
-#endif /* _AVX512PFINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vbmiintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vbmiintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,159 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512VBMIINTRIN_H_INCLUDED
-#define _AVX512VBMIINTRIN_H_INCLUDED
-
-#ifndef __AVX512VBMI__
-#pragma GCC push_options
-#pragma GCC target("avx512vbmi")
-#define __DISABLE_AVX512VBMI__
-#endif /* __AVX512VBMI__ */
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-							  (__v64qi) __Y,
-							  (__v64qi) __W,
-							  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-							  (__v64qi) __Y,
-							  (__v64qi)
-							  _mm512_setzero_si512 (),
-							  (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
-{
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-							  (__v64qi) __Y,
-							  (__v64qi)
-							  _mm512_undefined_si512 (),
-							  (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
-						     (__v64qi) __A,
-						     (__v64qi)
-						     _mm512_undefined_si512 (),
-						     (__mmask64) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
-				__m512i __B)
-{
-  return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
-						     (__v64qi) __A,
-						     (__v64qi)
-						     _mm512_setzero_si512(),
-						     (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
-			       __m512i __B)
-{
-  return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
-						     (__v64qi) __A,
-						     (__v64qi) __W,
-						     (__mmask64) __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
-							/* idx */ ,
-							(__v64qi) __A,
-							(__v64qi) __B,
-							(__mmask64) -
-							1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
-				__m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
-							/* idx */ ,
-							(__v64qi) __A,
-							(__v64qi) __B,
-							(__mmask64)
-							__U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
-				 __mmask64 __U, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
-							(__v64qi) __I
-							/* idx */ ,
-							(__v64qi) __B,
-							(__mmask64)
-							__U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
-				 __m512i __I, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
-							 /* idx */ ,
-							 (__v64qi) __A,
-							 (__v64qi) __B,
-							 (__mmask64)
-							 __U);
-}
-
-#ifdef __DISABLE_AVX512VBMI__
-#undef __DISABLE_AVX512VBMI__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512VBMI__ */
-
-#endif /* _AVX512VBMIINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vbmivlintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vbmivlintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,275 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
-#define _AVX512VBMIVLINTRIN_H_INCLUDED
-
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
-#pragma GCC push_options
-#pragma GCC target("avx512vbmi,avx512vl")
-#define __DISABLE_AVX512VBMIVL__
-#endif /* __AVX512VBMIVL__ */
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
-							  (__v32qi) __Y,
-							  (__v32qi) __W,
-							  (__mmask32) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
-							  (__v32qi) __Y,
-							  (__v32qi)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
-							  (__v32qi) __Y,
-							  (__v32qi)
-							  _mm256_undefined_si256 (),
-							  (__mmask32) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-							  (__v16qi) __Y,
-							  (__v16qi) __W,
-							  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-							  (__v16qi) __Y,
-							  (__v16qi)
-							  _mm_setzero_si128 (),
-							  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-							  (__v16qi) __Y,
-							  (__v16qi)
-							  _mm_undefined_si128 (),
-							  (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
-						     (__v32qi) __A,
-						     (__v32qi)
-						     _mm256_undefined_si256 (),
-						     (__mmask32) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
-				__m256i __B)
-{
-  return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
-						     (__v32qi) __A,
-						     (__v32qi)
-						     _mm256_setzero_si256 (),
-						     (__mmask32) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
-			       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
-						     (__v32qi) __A,
-						     (__v32qi) __W,
-						     (__mmask32) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
-						     (__v16qi) __A,
-						     (__v16qi)
-						     _mm_undefined_si128 (),
-						     (__mmask16) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
-						     (__v16qi) __A,
-						     (__v16qi)
-						     _mm_setzero_si128 (),
-						     (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
-			    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
-						     (__v16qi) __A,
-						     (__v16qi) __W,
-						     (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
-							/* idx */ ,
-							(__v32qi) __A,
-							(__v32qi) __B,
-							(__mmask32) -
-							1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
-				__m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
-							/* idx */ ,
-							(__v32qi) __A,
-							(__v32qi) __B,
-							(__mmask32)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
-				 __mmask32 __U, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
-							(__v32qi) __I
-							/* idx */ ,
-							(__v32qi) __B,
-							(__mmask32)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
-				 __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
-							 /* idx */ ,
-							 (__v32qi) __A,
-							 (__v32qi) __B,
-							 (__mmask32)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
-							/* idx */ ,
-							(__v16qi) __A,
-							(__v16qi) __B,
-							(__mmask16) -
-							1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
-			     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
-							/* idx */ ,
-							(__v16qi) __A,
-							(__v16qi) __B,
-							(__mmask16)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
-							(__v16qi) __I
-							/* idx */ ,
-							(__v16qi) __B,
-							(__mmask16)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
-							 /* idx */ ,
-							 (__v16qi) __A,
-							 (__v16qi) __B,
-							 (__mmask16)
-							 __U);
-}
-
-#ifdef __DISABLE_AVX512VBMIVL__
-#undef __DISABLE_AVX512VBMIVL__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512VBMIVL__ */
-
-#endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vlbwintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vlbwintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,4653 +1,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512VLBWINTRIN_H_INCLUDED
-#define _AVX512VLBWINTRIN_H_INCLUDED
-
-#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
-#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512bw")
-#define __DISABLE_AVX512VLBW__
-#endif /* __AVX512VLBW__ */
-
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
-						    (__v32qi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
-						    (__v32qi)
-						    _mm256_setzero_si256 (),
-						    (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
-						    (__v16qi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
-						    (__v16qi)
-						    _mm_setzero_hi (),
-						    (__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
-{
-  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
-				     (__v32qi) __A,
-				     (__mmask32) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
-{
-  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
-				     (__v16qi) __A,
-				     (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
-						     (__v16hi) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
-						     (__v8hi) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
-						     (__v8hi)
-						     _mm_setzero_hi (),
-						     (__mmask8) __U);
-}
-
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
-						    (__v16hi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
-						    (__v8hi) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
-						    (__v8hi)
-						    _mm_setzero_hi (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
-						     (__v32qi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
-						     (__v32qi)
-						     _mm256_setzero_si256 (),
-						     (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
-						     (__v16qi) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
-						     (__v16qi)
-						     _mm_setzero_hi (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi16_epi8 (__m256i __A)
-{
-
-  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
-						  (__v16qi)_mm_undefined_si128(),
-						  (__mmask16) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsepi16_epi8 (__m128i __A)
-{
-
-  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
-						   (__v16qi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtsepi16_epi8 (__m256i __A)
-{
-
-  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
-						   (__v16qi)_mm_undefined_si128(),
-						   (__mmask16) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtusepi16_epi8 (__m128i __A)
-{
-
-  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
-						    (__v16qi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtusepi16_epi8 (__m256i __A)
-{
-
-  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
-						    (__v16qi)_mm_undefined_si128(),
-						    (__mmask16) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
-						       (__v32qi) __O,
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
-						       (__v32qi)
-						       _mm256_setzero_si256 (),
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
-							   (__v32qi) __O,
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
-							   (__v32qi)
-							   _mm256_setzero_si256 (),
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
-						       (__v16qi) __O,
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
-						       (__v16qi)
-						       _mm_setzero_si128 (),
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
-							   (__v16qi) __O,
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
-							   (__v16qi)
-							   _mm_setzero_si128 (),
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
-						       (__v16hi) __O,
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
-						       (__v16hi)
-						       _mm256_setzero_si256 (),
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
-							   (__v16hi) __O,
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
-							   (__v16hi)
-							   _mm256_setzero_si256 (),
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
-						       (__v8hi) __O,
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
-						       (__v8hi)
-						       _mm_setzero_si128 (),
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
-							   (__v8hi) __O,
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
-							   (__v8hi)
-							   _mm_setzero_si128 (),
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
-						     (__v16hi) __A,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
-				__m256i __B)
-{
-  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
-						     (__v16hi) __A,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
-			       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
-						     (__v16hi) __A,
-						     (__v16hi) __W,
-						     (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
-						     (__v8hi) __A,
-						     (__v8hi)
-						     _mm_setzero_hi (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
-						     (__v8hi) __A,
-						     (__v8hi)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
-			    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
-						     (__v8hi) __A,
-						     (__v8hi) __W,
-						     (__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
-							/* idx */ ,
-							(__v16hi) __A,
-							(__v16hi) __B,
-							(__mmask16) -
-							1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
-				__m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
-							/* idx */ ,
-							(__v16hi) __A,
-							(__v16hi) __B,
-							(__mmask16)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
-				 __mmask16 __U, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
-							(__v16hi) __I
-							/* idx */ ,
-							(__v16hi) __B,
-							(__mmask16)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
-				 __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I
-							 /* idx */ ,
-							 (__v16hi) __A,
-							 (__v16hi) __B,
-							 (__mmask16)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
-							/* idx */ ,
-							(__v8hi) __A,
-							(__v8hi) __B,
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
-			     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
-							/* idx */ ,
-							(__v8hi) __A,
-							(__v8hi) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
-							(__v8hi) __I
-							/* idx */ ,
-							(__v8hi) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I
-							 /* idx */ ,
-							 (__v8hi) __A,
-							 (__v8hi) __B,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
-			   __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
-						     (__v32qi) __Y,
-						     (__v16hi) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
-						     (__v32qi) __Y,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
-			__m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
-						     (__v16qi) __Y,
-						     (__v8hi) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
-						     (__v16qi) __Y,
-						     (__v8hi)
-						     _mm_setzero_hi (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v8si) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v8si)
-						   _mm256_setzero_si256 (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v4si) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v4si)
-						   _mm_setzero_si128 (),
-						   (__mmask8) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movepi8_mask (__m128i __A)
-{
-  return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movepi8_mask (__m256i __A)
-{
-  return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movepi16_mask (__m128i __A)
-{
-  return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movepi16_mask (__m256i __A)
-{
-  return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movm_epi8 (__mmask16 __A)
-{
-  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movm_epi8 (__mmask32 __A)
-{
-  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movm_epi16 (__mmask8 __A)
-{
-  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movm_epi16 (__mmask16 __A)
-{
-  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_test_epi8_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
-						(__v16qi) __B,
-						(__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
-						(__v16qi) __B, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_test_epi8_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
-						(__v32qi) __B,
-						(__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
-						(__v32qi) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_test_epi16_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
-					       (__v8hi) __B,
-					       (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
-					       (__v8hi) __B, __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_test_epi16_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
-						(__v16hi) __B,
-						(__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
-						(__v16hi) __B, __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_di (),
-						  (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  (__mmask32) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi)
-						  _mm_setzero_di (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
-		   __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  (__mmask32) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi)
-						  _mm_setzero_di (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
-		   __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  (__mmask32) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi)
-						  _mm_setzero_di (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
-		   __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  (__mmask32) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi) __W,
-						  (__mmask32) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi)
-						  _mm_setzero_di (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
-		   __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_di (),
-						  (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_di (),
-						  (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_di (),
-						  (__mmask8) __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __M);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_alignr_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-			 __m256i __B, const int __N)
-{
-  return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
-						   (__v4di) __B,
-						   __N * 8,
-						   (__v4di) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
-			  const int __N)
-{
-  return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
-						   (__v4di) __B,
-						   __N * 8,
-						   (__v4di)
-						   _mm256_setzero_si256 (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_alignr_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-		      __m128i __B, const int __N)
-{
-  return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
-						   (__v2di) __B,
-						   __N * 8,
-						   (__v2di) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
-		       const int __N)
-{
-  return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
-						   (__v2di) __B,
-						   __N * 8,
-						   (__v2di)
-						   _mm_setzero_si128 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
-						    (__v32qi) __B,
-						    __imm,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_dbsad_epu8 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
-						    (__v32qi) __B,
-						    __imm,
-						    (__v16hi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
-			 const int __imm)
-{
-  return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
-						    (__v32qi) __B,
-						    __imm,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
-{
-  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
-						    (__v16qi) __B,
-						    __imm,
-						    (__v8hi)
-						    _mm_setzero_hi (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_dbsad_epu8 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B, const int __imm)
-{
-  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
-						    (__v16qi) __B,
-						    __imm,
-						    (__v8hi) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
-		      const int __imm)
-{
-  return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
-						    (__v16qi) __B,
-						    __imm,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
-{
-  return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
-						    (__v8hi) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
-{
-  return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
-						    (__v16qi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
-{
-  return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
-						    (__v16hi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
-{
-  return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
-						    (__v32qi) __W,
-						    (__mmask32) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, __P,
-						 (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epi16_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, __P,
-						 (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
-			    const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, __P,
-						  (__mmask16) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, __P,
-						  (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, __P,
-						  (__mmask16) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, __P,
-						  (__mmask16) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
-			   const int __P)
-{
-  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, __P,
-						  (__mmask32) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, __P,
-						  (__mmask32) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epu16_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
-			    const int __P)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						   (__v16hi) __Y, __P,
-						   (__mmask16) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						   (__v16hi) __Y, __P,
-						   (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			const int __P)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, __P,
-						   (__mmask16) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, __P,
-						   (__mmask16) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
-			   const int __P)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						   (__v32qi) __Y, __P,
-						   (__mmask32) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						   (__v32qi) __Y, __P,
-						   (__mmask32) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srli_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shufflehi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			     const int __imm)
-{
-  return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
-						   __imm,
-						   (__v16hi) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
-			      const int __imm)
-{
-  return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
-						   __imm,
-						   (__v16hi)
-						   _mm256_setzero_si256 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_shufflehi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-			  const int __imm)
-{
-  return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
-						   (__v8hi) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
-						   (__v8hi)
-						   _mm_setzero_hi (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shufflelo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			     const int __imm)
-{
-  return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
-						   __imm,
-						   (__v16hi) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
-			      const int __imm)
-{
-  return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
-						   __imm,
-						   (__v16hi)
-						   _mm256_setzero_si256 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_shufflelo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-			  const int __imm)
-{
-  return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
-						   (__v8hi) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
-						   (__v8hi)
-						   _mm_setzero_hi (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			int __B)
-{
-  return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, int __B)
-{
-  return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
-{
-  return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
-{
-  return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-#else
-#define _mm256_mask_alignr_epi8(W, U, X, Y, N)					    \
-  ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
-					    (__v4di)(__m256i)(Y), (int)(N * 8),	    \
-					    (__v4di)(__m256i)(X), (__mmask32)(U)))
-
-#define _mm256_mask_srli_epi16(W, U, A, B)                              \
-  ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
-    (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
-
-#define _mm256_maskz_srli_epi16(U, A, B)                                \
-  ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
-    (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
-
-#define _mm_mask_srli_epi16(W, U, A, B)                                 \
-  ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
-    (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm_maskz_srli_epi16(U, A, B)                                   \
-  ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A),       \
-    (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
-
-#define _mm256_mask_srai_epi16(W, U, A, B)                              \
-  ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
-    (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
-
-#define _mm256_maskz_srai_epi16(U, A, B)                                \
-  ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A),      \
-    (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
-
-#define _mm_mask_srai_epi16(W, U, A, B)                                 \
-  ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
-    (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm_maskz_srai_epi16(U, A, B)                                   \
-  ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A),       \
-    (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
-
-#define _mm256_mask_shufflehi_epi16(W, U, A, B)                                     \
-  ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
-                                             (__v16hi)(__m256i)(W),                 \
-                                             (__mmask16)(U)))
-
-#define _mm256_maskz_shufflehi_epi16(U, A, B)                                       \
-  ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
-                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
-                                             (__mmask16)(U)))
-
-#define _mm_mask_shufflehi_epi16(W, U, A, B)                                        \
-  ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-                                             (__v8hi)(__m128i)(W),                  \
-                                             (__mmask8)(U)))
-
-#define _mm_maskz_shufflehi_epi16(U, A, B)                                          \
-  ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-                                             (__v8hi)(__m128i)_mm_setzero_hi(),     \
-                                             (__mmask8)(U)))
-
-#define _mm256_mask_shufflelo_epi16(W, U, A, B)                                     \
-  ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
-                                             (__v16hi)(__m256i)(W),                 \
-                                             (__mmask16)(U)))
-
-#define _mm256_maskz_shufflelo_epi16(U, A, B)                                       \
-  ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B),       \
-                                             (__v16hi)(__m256i)_mm256_setzero_si256 (), \
-                                             (__mmask16)(U)))
-
-#define _mm_mask_shufflelo_epi16(W, U, A, B)                                        \
-  ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-                                             (__v8hi)(__m128i)(W),                  \
-                                             (__mmask8)(U)))
-
-#define _mm_maskz_shufflelo_epi16(U, A, B)                                          \
-  ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-                                             (__v8hi)(__m128i)_mm_setzero_hi(),     \
-                                             (__mmask8)(U)))
-
-#define _mm256_maskz_alignr_epi8(U, X, Y, N)					    \
-  ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
-					    (__v4di)(__m256i)(Y), (int)(N * 8),	    \
-					    (__v4di)(__m256i)_mm256_setzero_si256 (),   \
-					    (__mmask32)(U)))
-
-#define _mm_mask_alignr_epi8(W, U, X, Y, N)					    \
-  ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
-					    (__v2di)(__m128i)(Y), (int)(N * 8),	    \
-					    (__v2di)(__m128i)(X), (__mmask16)(U)))
-
-#define _mm_maskz_alignr_epi8(U, X, Y, N)					    \
-  ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
-					    (__v2di)(__m128i)(Y), (int)(N * 8),	    \
-					    (__v2di)(__m128i)_mm_setzero_di(),	    \
-					    (__mmask16)(U)))
-
-#define _mm_mask_slli_epi16(W, U, X, C)					  \
-  ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
-    (__v8hi)(__m128i)(W),\
-    (__mmask8)(U)))
-
-#define _mm_maskz_slli_epi16(U, X, C)					  \
-  ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
-    (__v8hi)(__m128i)_mm_setzero_hi(),\
-    (__mmask8)(U)))
-
-#define _mm256_dbsad_epu8(X, Y, C)                                                  \
-  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
-                                              (__v32qi)(__m256i) (Y), (int) (C),    \
-                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
-                                              (__mmask16)-1))
-
-#define _mm256_mask_slli_epi16(W, U, X, C)                                 \
-  ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
-    (__v16hi)(__m256i)(W),\
-    (__mmask16)(U)))
-
-#define _mm256_maskz_slli_epi16(U, X, C)                                   \
-  ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
-    (__v16hi)(__m256i)_mm256_setzero_si256 (),\
-    (__mmask16)(U)))
-
-#define _mm256_mask_dbsad_epu8(W, U, X, Y, C)                                       \
-  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
-                                              (__v32qi)(__m256i) (Y), (int) (C),    \
-                                              (__v16hi)(__m256i)(W),                \
-                                              (__mmask16)(U)))
-
-#define _mm256_maskz_dbsad_epu8(U, X, Y, C)                                         \
-  ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X),               \
-                                              (__v32qi)(__m256i) (Y), (int) (C),    \
-                                              (__v16hi)(__m256i)_mm256_setzero_si256(),\
-                                              (__mmask16)(U)))
-
-#define _mm_dbsad_epu8(X, Y, C)                                                     \
-  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
-                                              (__v16qi)(__m128i) (Y), (int) (C),    \
-                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
-                                              (__mmask8)-1))
-
-#define _mm_mask_dbsad_epu8(W, U, X, Y, C)                                          \
-  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
-                                              (__v16qi)(__m128i) (Y), (int) (C),    \
-                                              (__v8hi)(__m128i)(W),                 \
-                                              (__mmask8)(U)))
-
-#define _mm_maskz_dbsad_epu8(U, X, Y, C)                                            \
-  ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X),               \
-                                              (__v16qi)(__m128i) (Y), (int) (C),    \
-                                              (__v8hi)(__m128i)_mm_setzero_si128(), \
-                                              (__mmask8)(U)))
-
-#define _mm_mask_blend_epi16(__U, __A, __W)			      \
-  ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A),	      \
-						    (__v8hi) (__W),   \
-						    (__mmask8) (__U)))
-
-#define _mm_mask_blend_epi8(__U, __A, __W)			      \
-  ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A),	      \
-						    (__v16qi) (__W),  \
-						    (__mmask16) (__U)))
-
-#define _mm256_mask_blend_epi16(__U, __A, __W)			      \
-  ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A),	      \
-						    (__v16hi) (__W),  \
-						    (__mmask16) (__U)))
-
-#define _mm256_mask_blend_epi8(__U, __A, __W)			      \
-  ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A),	      \
-						    (__v32qi) (__W),  \
-						    (__mmask32) (__U)))
-
-#define _mm_cmp_epi16_mask(X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
-					    (__v8hi)(__m128i)(Y), (int)(P),\
-					    (__mmask8)(-1)))
-
-#define _mm_cmp_epi8_mask(X, Y, P)				\
-  ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
-					    (__v16qi)(__m128i)(Y), (int)(P),\
-					    (__mmask16)(-1)))
-
-#define _mm256_cmp_epi16_mask(X, Y, P)				\
-  ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X),	\
-					    (__v16hi)(__m256i)(Y), (int)(P),\
-					    (__mmask16)(-1)))
-
-#define _mm256_cmp_epi8_mask(X, Y, P)				\
-  ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X),	\
-					    (__v32qi)(__m256i)(Y), (int)(P),\
-					    (__mmask32)(-1)))
-
-#define _mm_cmp_epu16_mask(X, Y, P)				\
-  ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X),	\
-					    (__v8hi)(__m128i)(Y), (int)(P),\
-					    (__mmask8)(-1)))
-
-#define _mm_cmp_epu8_mask(X, Y, P)				\
-  ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X),	\
-					    (__v16qi)(__m128i)(Y), (int)(P),\
-					    (__mmask16)(-1)))
-
-#define _mm256_cmp_epu16_mask(X, Y, P)				\
-  ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X),	\
-					    (__v16hi)(__m256i)(Y), (int)(P),\
-					    (__mmask16)(-1)))
-
-#define _mm256_cmp_epu8_mask(X, Y, P)				\
-  ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X),	\
-					    (__v32qi)(__m256i)(Y), (int)(P),\
-					    (__mmask32)-1))
-
-#define _mm_mask_cmp_epi16_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X),	\
-					    (__v8hi)(__m128i)(Y), (int)(P),\
-					    (__mmask16)(M)))
-
-#define _mm_mask_cmp_epi8_mask(M, X, Y, P)				\
-  ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X),	\
-					    (__v16qi)(__m128i)(Y), (int)(P),\
-					    (__mmask16)(M)))
-
-#define _mm256_mask_cmp_epi16_mask(M, X, Y, P)				\
-  ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X),	\
-					    (__v16hi)(__m256i)(Y), (int)(P),\
-					    (__mmask16)(M)))
-
-#define _mm256_mask_cmp_epi8_mask(M, X, Y, P)				\
-  ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X),	\
-					    (__v32qi)(__m256i)(Y), (int)(P),\
-					    (__mmask32)(M)))
-
-#define _mm_mask_cmp_epu16_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X),	\
-					    (__v8hi)(__m128i)(Y), (int)(P),\
-					    (__mmask8)(M)))
-
-#define _mm_mask_cmp_epu8_mask(M, X, Y, P)				\
-  ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X),	\
-					    (__v16qi)(__m128i)(Y), (int)(P),\
-					    (__mmask16)(M)))
-
-#define _mm256_mask_cmp_epu16_mask(M, X, Y, P)				\
-  ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X),	\
-					    (__v16hi)(__m256i)(Y), (int)(P),\
-					    (__mmask16)(M)))
-
-#define _mm256_mask_cmp_epu8_mask(M, X, Y, P)				\
-  ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X),	\
-					    (__v32qi)(__m256i)(Y), (int)(P),\
-					    (__mmask32)M))
-#endif
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 4,
-						  (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 1,
-						  (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 5,
-						  (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epi8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 2,
-						  (__mmask32) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 4,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 1,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 5,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epi16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 2,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 4,
-						   (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epu8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 1,
-						   (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epu8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 5,
-						   (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epu8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 2,
-						   (__mmask16) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 4,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epu16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 1,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epu16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 5,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epu16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 2,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 4,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epi8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 1,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epi8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 5,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epi8_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 2,
-						  (__mmask16) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 4,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epi16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 1,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epi16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 5,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epi16_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 2,
-						 (__mmask8) - 1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
-			  __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
-						    (__v16hi) __Y,
-						    (__v16hi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
-						    (__v16hi) __Y,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
-			 __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v16hi) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v16hi)
-						   _mm256_setzero_si256 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			 __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_hi (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
-		      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v8hi) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v8hi)
-						   _mm_setzero_hi (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
-		       __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
-						    (__v8hi) __Y,
-						    (__v8hi) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
-						    (__v8hi) __Y,
-						    (__v8hi)
-						    _mm_setzero_hi (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			 __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_hi (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
-						    (__v16hi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
-						    (__v8hi) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
-						    (__v16hi) __W,
-						    (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
-						    (__v8hi) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__v32qi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__v32qi)
-						 _mm256_setzero_si256 (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
-		   __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__v16qi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__v16qi)
-						 _mm_setzero_si128 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
-						 (__v16hi) __B,
-						 (__v16hi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
-						 (__v16hi) __B,
-						 (__v16hi)
-						 _mm256_setzero_si256 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__v32qi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__v32qi)
-						 _mm256_setzero_si256 (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
-						 (__v16hi) __B,
-						 (__v16hi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
-						 (__v16hi) __B,
-						 (__v16hi)
-						 _mm256_setzero_si256 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
-						   (__v32qi) __B,
-						   (__v32qi) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
-						   (__v32qi) __B,
-						   (__v32qi)
-						   _mm256_setzero_si256 (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v16hi) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v16hi)
-						   _mm256_setzero_si256 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__v32qi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__v32qi)
-						 _mm256_setzero_si256 (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
-						 (__v16hi) __B,
-						 (__v16hi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
-						 (__v16hi) __B,
-						 (__v16hi)
-						 _mm256_setzero_si256 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
-						   (__v32qi) __B,
-						   (__v32qi) __W,
-						   (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
-						   (__v32qi) __B,
-						   (__v32qi)
-						   _mm256_setzero_si256 (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v16hi) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
-						   (__v16hi) __B,
-						   (__v16hi)
-						   _mm256_setzero_si256 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-		   __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__v16qi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__v16qi)
-						 _mm_setzero_si128 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-			   __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     (__v32qi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     (__v32qi)
-						     _mm256_setzero_si256 (),
-						     (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-			__m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     (__v16qi) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     (__v16qi)
-						     _mm_setzero_si128 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			    __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     (__v16hi) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-			 __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
-						     (__v8hi) __B,
-						     (__v8hi) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
-						     (__v8hi) __B,
-						     (__v8hi)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-			   __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     (__v32qi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     (__v32qi)
-						     _mm256_setzero_si256 (),
-						     (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-			__m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     (__v16qi) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     (__v16qi)
-						     _mm_setzero_si128 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			    __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     (__v16hi) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-			 __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
-						     (__v8hi) __B,
-						     (__v8hi) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
-						     (__v8hi) __B,
-						     (__v8hi)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi8_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epu8_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
-						    (__v16qi) __B, 0,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
-						    (__v16qi) __B, 0,
-						    __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epu8_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
-						    (__v32qi) __B, 0,
-						    (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi8_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
-						    (__v32qi) __B, 0,
-						    __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epu16_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
-						   (__v8hi) __B, 0,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi16_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
-						    (__v8hi) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
-						   (__v8hi) __B, 0, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
-						    (__v8hi) __B, __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epu16_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
-						    (__v16hi) __B, 0,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi16_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
-						    (__v16hi) __B, 0,
-						    __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epu8_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
-						    (__v16qi) __B, 6,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi8_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
-						    (__v16qi) __B, 6,
-						    __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
-						     (__v16qi) __B,
-						     __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epu8_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
-						    (__v32qi) __B, 6,
-						    (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi8_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
-						    (__v32qi) __B, 6,
-						    __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
-						     (__v32qi) __B,
-						     __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epu16_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
-						   (__v8hi) __B, 6,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi16_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
-						    (__v8hi) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
-						   (__v8hi) __B, 6, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
-						    (__v8hi) __B, __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epu16_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
-						    (__v16hi) __B, 6,
-						    (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi16_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
-						    (__v16hi) __B, 6,
-						    __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
-						     (__v16hi) __B,
-						     __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testn_epi8_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
-						 (__v16qi) __B, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
-						 (__v32qi) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testn_epi16_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
-						(__v8hi) __B,
-						(__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
-						(__v8hi) __B, __U);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
-						 (__v16hi) __B,
-						 (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
-						 (__v16hi) __B, __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-			  __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi) __W,
-						  (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
-						  (__v32qi) __B,
-						  (__v32qi)
-						  _mm256_setzero_si256 (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-		       __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
-						    (__v16hi) __B,
-						    (__v32qi)
-						    _mm256_setzero_si256 (),
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
-			 __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
-						    (__v16hi) __B,
-						    (__v32qi) __W,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
-						    (__v8hi) __B,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
-		      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
-						    (__v8hi) __B,
-						    (__v16qi) __W,
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
-						    (__v16hi) __B,
-						    (__v32qi)
-						    _mm256_setzero_si256 (),
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
-			  __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
-						    (__v16hi) __B,
-						    (__v32qi) __W,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
-						    (__v8hi) __B,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
-		       __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
-						    (__v8hi) __B,
-						    (__v16qi) __W,
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
-						 (__v32qi) __W,
-						 (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
-						 (__v32qi)
-						 _mm256_setzero_si256 (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
-						 (__v16qi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
-						 (__v16qi)
-						 _mm_setzero_si128 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
-						 (__v16hi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
-						 (__v16hi)
-						 _mm256_setzero_si256 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						   (__v32qi) __Y, 4,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						   (__v32qi) __Y, 1,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						   (__v32qi) __Y, 5,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask32
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epu8_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						   (__v32qi) __Y, 2,
-						   (__mmask32) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						   (__v16hi) __Y, 4,
-						   (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						   (__v16hi) __Y, 1,
-						   (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						   (__v16hi) __Y, 5,
-						   (__mmask16) - 1);
-}
-
-extern __inline __mmask16
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						   (__v16hi) __Y, 2,
-						   (__mmask16) - 1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
-{
-  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
-				     (__v16hi) __A,
-				     (__mmask16) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
-{
-  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
-				     (__v8hi) __A,
-				     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
-						   (__v16qi) __B,
-						   (__v16qi) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
-						   (__v16qi) __B,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v8hi) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v8hi)
-						   _mm_setzero_si128 (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
-						 (__v8hi) __B,
-						 (__v16hi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
-						 (__v8hi) __B,
-						 (__v16hi)
-						 _mm256_setzero_si256 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
-						 (__v8hi) __B,
-						 (__v16hi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
-						 (__v8hi) __B,
-						 (__v16hi)
-						 _mm256_setzero_si256 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
-						  (__v8hi) __B,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
-						   (__v16qi) __B,
-						   (__v16qi) __W,
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
-						   (__v16qi) __B,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v8hi) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
-						   (__v8hi) __B,
-						   (__v8hi)
-						   _mm_setzero_si128 (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-		   __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__v16qi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__v16qi)
-						 _mm_setzero_si128 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
-						  (__v16qi) __B,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi16_epi8 (__m128i __A)
-{
-
-  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
-						  (__v16qi)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srav_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srav_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_hi (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srlv_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srlv_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_hi (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sllv_epi16 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi) __W,
-						  (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
-						  (__v16hi) __B,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sllv_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_hi (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
-						 (__v8hi) __B,
-						 (__v8hi)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
-						 (__v8hi) __B,
-						 (__v16hi) __W,
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
-						 (__v8hi) __B,
-						 (__v16hi)
-						 _mm256_setzero_si256 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
-						    (__v8si) __B,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
-			  __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
-						    (__v8si) __B,
-						    (__v16hi) __W,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
-						    (__v4si) __B,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_packus_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
-		       __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
-						    (__v4si) __B,
-						    (__v8hi) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
-						    (__v8si) __B,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
-			 __m256i __B)
-{
-  return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
-						    (__v8si) __B,
-						    (__v16hi) __W,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
-						    (__v4si) __B,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_packs_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
-		      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
-						    (__v4si) __B,
-						    (__v8hi) __W, __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 4,
-						   (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 1,
-						   (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 5,
-						   (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
-						   (__v16qi) __Y, 2,
-						   (__mmask16) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 4,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 1,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 5,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
-						  (__v8hi) __Y, 2,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 4,
-						  (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 1,
-						  (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 5,
-						  (__mmask16) __M);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
-						  (__v16qi) __Y, 2,
-						  (__mmask16) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 4,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 1,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 5,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
-						 (__v8hi) __Y, 2,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 4,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 1,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 5,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
-						  (__v32qi) __Y, 2,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 4,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 1,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 5,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
-						  (__v16hi) __Y, 2,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 4,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 1,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 5,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
-						 (__v32qi) __Y, 2,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 4,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 1,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 5,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
-						 (__v16hi) __Y, 2,
-						 (__mmask8) __M);
-}
-
-#ifdef __DISABLE_AVX512VLBW__
-#undef __DISABLE_AVX512VLBW__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512VLBW__ */
-
-#endif /* _AVX512VLBWINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vldqintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vldqintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,2025 +1,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512VLDQINTRIN_H_INCLUDED
-#define _AVX512VLDQINTRIN_H_INCLUDED
-
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
-#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq")
-#define __DISABLE_AVX512VLDQ__
-#endif /* __AVX512VLDQ__ */
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttpd_epi64 (__m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttpd_epi64 (__m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
-						     (__v2di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
-						     (__v2di)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttpd_epu64 (__m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
-						      (__v4di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttpd_epu64 (__m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
-						      (__v2di)
-						      _mm_setzero_di (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
-						      (__v2di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
-						      (__v2di)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtpd_epi64 (__m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpd_epi64 (__m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
-						    (__v2di)
-						    _mm_setzero_di (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
-						    (__v2di)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtpd_epu64 (__m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
-{
-  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpd_epu64 (__m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
-						     (__v2di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
-						     (__v2di)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttps_epi64 (__m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttps_epi64 (__m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
-						     (__v2di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttps_epu64 (__m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
-						      (__v4di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttps_epu64 (__m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
-						      (__v2di)
-						      _mm_setzero_di (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
-						      (__v2di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
-						      (__v2di)
-						      _mm_setzero_di (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_f64x2 (__m128d __A)
-{
-  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
-							   __A,
-						           (__v4df)_mm256_undefined_pd(),
-							   (__mmask8) -
-							   1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
-{
-  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
-							   __A,
-							   (__v4df)
-							   __O, __M);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
-{
-  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
-							   __A,
-							   (__v4df)
-							   _mm256_setzero_ps (),
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_i64x2 (__m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
-							   __A,
-						           (__v4di)_mm256_undefined_si256(),
-							   (__mmask8) -
-							   1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
-							   __A,
-							   (__v4di)
-							   __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
-							   __A,
-							   (__v4di)
-							   _mm256_setzero_si256 (),
-							   __M);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_f32x2 (__m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
-						          (__v8sf)_mm256_undefined_ps(),
-							  (__mmask8) -
-							  1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
-							  (__v8sf) __O,
-							  __M);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
-							  (__v8sf)
-							  _mm256_setzero_ps (),
-							  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_i32x2 (__m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
-							   __A,
-						          (__v8si)_mm256_undefined_si256(),
-							   (__mmask8) -
-							   1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
-							   __A,
-							   (__v8si)
-							   __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
-							   __A,
-							   (__v8si)
-							   _mm256_setzero_si256 (),
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcast_i32x2 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
-							   __A,
-						          (__v4si)_mm_undefined_si128(),
-							   (__mmask8) -
-							   1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
-							   __A,
-							   (__v4si)
-							   __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
-							   __A,
-							   (__v4si)
-							   _mm_setzero_si128 (),
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mullo_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) ((__v4du) __A * (__v4du) __B);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			 __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mullo_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v2du) __A * (__v2du) __B);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		       __m256d __B)
-{
-  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
-						  (__v4df) __B,
-						  (__v4df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
-						  (__v4df) __B,
-						  (__v4df)
-						  _mm256_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
-		    __m128d __B)
-{
-  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
-						  (__v2df) __B,
-						  (__v2df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
-						  (__v2df) __B,
-						  (__v2df)
-						  _mm_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
-		       __m256 __B)
-{
-  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
-						 (__v8sf) __B,
-						 (__v8sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
-						 (__v8sf) __B,
-						 (__v8sf)
-						 _mm256_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
-						 (__v4sf) __B,
-						 (__v4sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
-						 (__v4sf) __B,
-						 (__v4sf)
-						 _mm_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtps_epi64 (__m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_epi64 (__m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
-						    (__v2di)
-						    _mm_setzero_di (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
-						    (__v2di)
-						    _mm_setzero_di (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtps_epu64 (__m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_epu64 (__m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
-						     (__v2di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi64_ps (__m256i __A)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi64_ps (__m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu64_ps (__m256i __A)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu64_ps (__m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi64_pd (__m256i __A)
-{
-  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
-{
-  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi64_pd (__m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu64_pd (__m256i __A)
-{
-  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
-{
-  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df)
-						 _mm_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf)
-						_mm_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu64_pd (__m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
-						     (__v2df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df)
-						 _mm_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf)
-						_mm_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
-						(__v4df) __B,
-						(__v4df) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
-						(__v4df) __B,
-						(__v4df)
-						_mm256_setzero_pd (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
-						(__v2df) __B,
-						(__v2df) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
-						(__v2df) __B,
-						(__v2df)
-						_mm_setzero_pd (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
-					       (__v8sf) __B,
-					       (__v8sf) __W,
-					       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
-					       (__v8sf) __B,
-					       (__v8sf)
-					       _mm256_setzero_ps (),
-					       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
-					       (__v4sf) __B,
-					       (__v4sf) __W,
-					       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
-					       (__v4sf) __B,
-					       (__v4sf)
-					       _mm_setzero_ps (),
-					       (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movm_epi32 (__mmask8 __A)
-{
-  return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movm_epi32 (__mmask8 __A)
-{
-  return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movm_epi64 (__mmask8 __A)
-{
-  return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movm_epi64 (__mmask8 __A)
-{
-  return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movepi32_mask (__m128i __A)
-{
-  return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movepi32_mask (__m256i __A)
-{
-  return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movepi64_mask (__m128i __A)
-{
-  return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movepi64_mask (__m256i __A)
-{
-  return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extractf64x2_pd (__m256d __A, const int __imm)
-{
-  return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
-							 __imm,
-							 (__v2df)
-							 _mm_setzero_pd (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
-			     const int __imm)
-{
-  return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
-							 __imm,
-							 (__v2df) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
-			      const int __imm)
-{
-  return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
-							 __imm,
-							 (__v2df)
-							 _mm_setzero_pd (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
-							 __imm,
-							 (__v2di)
-							 _mm_setzero_di (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
-				const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
-							 __imm,
-							 (__v2di) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
-				 const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
-							 __imm,
-							 (__v2di)
-							 _mm_setzero_di (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_reduce_pd (__m256d __A, int __B)
-{
-  return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
-{
-  return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
-{
-  return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_reduce_pd (__m128d __A, int __B)
-{
-  return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
-{
-  return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
-{
-  return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_reduce_ps (__m256 __A, int __B)
-{
-  return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
-{
-  return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
-{
-  return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_reduce_ps (__m128 __A, int __B)
-{
-  return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
-{
-  return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
-{
-  return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_range_pd (__m256d __A, __m256d __B, int __C)
-{
-  return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
-						   (__v4df) __B, __C,
-						   (__v4df)
-						   _mm256_setzero_pd (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_range_pd (__m256d __W, __mmask8 __U,
-		      __m256d __A, __m256d __B, int __C)
-{
-  return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
-						   (__v4df) __B, __C,
-						   (__v4df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
-{
-  return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
-						   (__v4df) __B, __C,
-						   (__v4df)
-						   _mm256_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_range_pd (__m128d __A, __m128d __B, int __C)
-{
-  return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
-						   (__v2df) __B, __C,
-						   (__v2df)
-						   _mm_setzero_pd (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_range_pd (__m128d __W, __mmask8 __U,
-		   __m128d __A, __m128d __B, int __C)
-{
-  return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
-						   (__v2df) __B, __C,
-						   (__v2df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
-{
-  return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
-						   (__v2df) __B, __C,
-						   (__v2df)
-						   _mm_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_range_ps (__m256 __A, __m256 __B, int __C)
-{
-  return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
-						  (__v8sf) __B, __C,
-						  (__v8sf)
-						  _mm256_setzero_ps (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-		      int __C)
-{
-  return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
-						  (__v8sf) __B, __C,
-						  (__v8sf) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
-{
-  return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
-						  (__v8sf) __B, __C,
-						  (__v8sf)
-						  _mm256_setzero_ps (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_range_ps (__m128 __A, __m128 __B, int __C)
-{
-  return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
-						  (__v4sf) __B, __C,
-						  (__v4sf)
-						  _mm_setzero_ps (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_range_ps (__m128 __W, __mmask8 __U,
-		   __m128 __A, __m128 __B, int __C)
-{
-  return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
-						  (__v4sf) __B, __C,
-						  (__v4sf) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
-{
-  return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
-						  (__v4sf) __B, __C,
-						  (__v4sf)
-						  _mm_setzero_ps (),
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
-			     const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
-						      __imm, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fpclass_pd_mask (__m256d __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
-						      __imm,
-						      (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
-						      __imm, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fpclass_ps_mask (__m256 __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
-						      __imm,
-						      (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
-						      __imm, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fpclass_pd_mask (__m128d __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
-						      __imm,
-						      (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
-						      __imm, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fpclass_ps_mask (__m128 __A, const int __imm)
-{
-  return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
-						      __imm,
-						      (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
-							(__v2di) __B,
-							__imm,
-							(__v4di)
-							_mm256_setzero_si256 (),
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
-			 __m128i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
-							(__v2di) __B,
-							__imm,
-							(__v4di) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
-			  const int __imm)
-{
-  return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
-							(__v2di) __B,
-							__imm,
-							(__v4di)
-							_mm256_setzero_si256 (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
-{
-  return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
-							(__v2df) __B,
-							__imm,
-							(__v4df)
-							_mm256_setzero_pd (),
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
-			 __m128d __B, const int __imm)
-{
-  return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
-							(__v2df) __B,
-							__imm,
-							(__v4df) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
-			  const int __imm)
-{
-  return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
-							(__v2df) __B,
-							__imm,
-							(__v4df)
-							_mm256_setzero_pd (),
-							(__mmask8)
-							__U);
-}
-
-#else
-#define _mm256_insertf64x2(X, Y, C)                                     \
-  ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (__v2df)(__m128d) (Y), (int) (C),					\
-    (__v4df)(__m256d)_mm256_setzero_pd(),				\
-    (__mmask8)-1))
-
-#define _mm256_mask_insertf64x2(W, U, X, Y, C)                          \
-  ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (__v2df)(__m128d) (Y), (int) (C),					\
-    (__v4df)(__m256d)(W),						\
-    (__mmask8)(U)))
-
-#define _mm256_maskz_insertf64x2(U, X, Y, C)				\
-  ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (__v2df)(__m128d) (Y), (int) (C),					\
-    (__v4df)(__m256d)_mm256_setzero_pd(),				\
-    (__mmask8)(U)))
-
-#define _mm256_inserti64x2(X, Y, C)                                     \
-  ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (__v2di)(__m128i) (Y), (int) (C),					\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),				\
-    (__mmask8)-1))
-
-#define _mm256_mask_inserti64x2(W, U, X, Y, C)                          \
-  ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (__v2di)(__m128i) (Y), (int) (C),					\
-    (__v4di)(__m256i)(W),						\
-    (__mmask8)(U)))
-
-#define _mm256_maskz_inserti64x2(U, X, Y, C)                            \
-  ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (__v2di)(__m128i) (Y), (int) (C),					\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),				\
-    (__mmask8)(U)))
-
-#define _mm256_extractf64x2_pd(X, C)                                    \
-  ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
-
-#define _mm256_mask_extractf64x2_pd(W, U, X, C)                         \
-  ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
-
-#define _mm256_maskz_extractf64x2_pd(U, X, C)                           \
-  ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
-
-#define _mm256_extracti64x2_epi64(X, C)                                 \
-  ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
-
-#define _mm256_mask_extracti64x2_epi64(W, U, X, C)                     \
-  ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
-
-#define _mm256_maskz_extracti64x2_epi64(U, X, C)                        \
-  ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
-
-#define _mm256_reduce_pd(A, B)						\
-  ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
-    (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
-
-#define _mm256_mask_reduce_pd(W, U, A, B)				\
-  ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
-    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_reduce_pd(U, A, B)					\
-  ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),	\
-    (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
-
-#define _mm_reduce_pd(A, B)						\
-  ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
-    (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
-
-#define _mm_mask_reduce_pd(W, U, A, B)					\
-  ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
-    (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
-
-#define _mm_maskz_reduce_pd(U, A, B)					\
-  ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A),	\
-    (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
-
-#define _mm256_reduce_ps(A, B)						\
-  ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
-    (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
-
-#define _mm256_mask_reduce_ps(W, U, A, B)				\
-  ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
-    (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_reduce_ps(U, A, B)					\
-  ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A),	\
-    (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
-
-#define _mm_reduce_ps(A, B)						\
-  ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
-    (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
-
-#define _mm_mask_reduce_ps(W, U, A, B)					\
-  ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
-    (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
-
-#define _mm_maskz_reduce_ps(U, A, B)					\
-  ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A),	\
-    (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
-
-#define _mm256_range_pd(A, B, C)					\
-  ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
-    (__v4df)(__m256d)(B), (int)(C),					\
-    (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
-
-#define _mm256_maskz_range_pd(U, A, B, C)				\
-  ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
-    (__v4df)(__m256d)(B), (int)(C),					\
-    (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
-
-#define _mm_range_pd(A, B, C)						\
-  ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C),					\
-    (__v2df)_mm_setzero_pd(), (__mmask8)-1))
-
-#define _mm256_range_ps(A, B, C)					\
-  ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
-    (__v8sf)(__m256)(B), (int)(C),					\
-    (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
-
-#define _mm256_mask_range_ps(W, U, A, B, C)				\
-  ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
-    (__v8sf)(__m256)(B), (int)(C),					\
-    (__v8sf)(__m256)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_range_ps(U, A, B, C)				\
-  ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A),	\
-    (__v8sf)(__m256)(B), (int)(C),					\
-    (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
-
-#define _mm_range_ps(A, B, C)						\
-  ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C),					\
-    (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
-
-#define _mm_mask_range_ps(W, U, A, B, C)				\
-  ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C),					\
-    (__v4sf)(__m128)(W), (__mmask8)(U)))
-
-#define _mm_maskz_range_ps(U, A, B, C)					\
-  ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A),	\
-    (__v4sf)(__m128)(B), (int)(C),					\
-    (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
-
-#define _mm256_mask_range_pd(W, U, A, B, C)				\
-  ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A),	\
-    (__v4df)(__m256d)(B), (int)(C),					\
-    (__v4df)(__m256d)(W), (__mmask8)(U)))
-
-#define _mm_mask_range_pd(W, U, A, B, C)				\
-  ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C),					\
-    (__v2df)(__m128d)(W), (__mmask8)(U)))
-
-#define _mm_maskz_range_pd(U, A, B, C)					\
-  ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A),	\
-    (__v2df)(__m128d)(B), (int)(C),					\
-    (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
-
-#define _mm256_mask_fpclass_pd_mask(u, X, C)                            \
-  ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
-						(int) (C),(__mmask8)(u)))
-
-#define _mm256_mask_fpclass_ps_mask(u, X, C)				\
-  ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
-						(int) (C),(__mmask8)(u)))
-
-#define _mm_mask_fpclass_pd_mask(u, X, C)                               \
-  ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
-						(int) (C),(__mmask8)(u)))
-
-#define _mm_mask_fpclass_ps_mask(u, X, C)                               \
-  ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
-						(int) (C),(__mmask8)(u)))
-
-#define _mm256_fpclass_pd_mask(X, C)                                    \
-  ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
-						(int) (C),(__mmask8)-1))
-
-#define _mm256_fpclass_ps_mask(X, C)                                    \
-  ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X),  \
-						(int) (C),(__mmask8)-1))
-
-#define _mm_fpclass_pd_mask(X, C)                                       \
-  ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
-						(int) (C),(__mmask8)-1))
-
-#define _mm_fpclass_ps_mask(X, C)                                       \
-  ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X),  \
-						(int) (C),(__mmask8)-1))
-
-#endif
-
-#ifdef __DISABLE_AVX512VLDQ__
-#undef __DISABLE_AVX512VLDQ__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512VLDQ__ */
-
-#endif /* _AVX512VLDQINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vlintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avx512vlintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,13598 +1,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX512VLINTRIN_H_INCLUDED
-#define _AVX512VLINTRIN_H_INCLUDED
-
-/* Doesn't require avx512vl target and is used in avx512dqintrin.h.  */
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_di (void)
-{
-  return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
-}
-
-#ifndef __AVX512VL__
-#pragma GCC push_options
-#pragma GCC target("avx512vl")
-#define __DISABLE_AVX512VL__
-#endif /* __AVX512VL__ */
-
-/* Internal data types for implementing the intrinsics.  */
-typedef unsigned int __mmask32;
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
-						  (__v4df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
-						  (__v4df)
-						  _mm256_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
-						  (__v2df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
-						  (__v2df)
-						  _mm_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
-{
-  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
-						   (__v4df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
-{
-  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
-						   (__v4df)
-						   _mm256_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
-{
-  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
-						   (__v2df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_load_pd (__mmask8 __U, void const *__P)
-{
-  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
-						   (__v2df)
-						   _mm_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
-{
-  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
-				   (__v4df) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
-{
-  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
-				   (__v2df) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
-						 (__v8sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
-						 (__v8sf)
-						 _mm256_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
-						 (__v4sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
-						 (__v4sf)
-						 _mm_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
-{
-  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
-						  (__v8sf) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
-{
-  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
-						  (__v8sf)
-						  _mm256_setzero_ps (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
-{
-  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
-						  (__v4sf) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_load_ps (__mmask8 __U, void const *__P)
-{
-  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
-						  (__v4sf)
-						  _mm_setzero_ps (),
-						  (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
-{
-  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
-				   (__v8sf) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
-{
-  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
-				   (__v4sf) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
-						     (__v2di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
-							(__v4di) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
-							(__v4di)
-							_mm256_setzero_si256 (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
-							(__v2di) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
-							(__v2di)
-							_mm_setzero_di (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
-{
-  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
-					(__v4di) __A,
-					(__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
-{
-  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
-					(__v2di) __A,
-					(__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
-							(__v8si) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
-							(__v4si) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
-							(__v4si)
-							_mm_setzero_si128 (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
-{
-  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
-					(__v8si) __A,
-					(__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
-{
-  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
-					(__v4si) __A,
-					(__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_hi (void)
-{
-  return __extension__ (__m128i) (__v8hi)
-  {
-  0, 0, 0, 0, 0, 0, 0, 0};
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df)
-						 _mm_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf)
-						_mm_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
-						 (__v2df) __B,
-						 (__v2df)
-						 _mm_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
-						(__v4sf) __B,
-						(__v4sf)
-						_mm_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_store_epi64 (void *__P, __m256i __A)
-{
-  *(__m256i *) __P = __A;
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_epi64 (void *__P, __m128i __A)
-{
-  *(__m128i *) __P = __A;
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
-{
-  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
-						   (__v4df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
-{
-  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
-						   (__v4df)
-						   _mm256_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
-{
-  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
-						   (__v2df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
-{
-  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
-						   (__v2df)
-						   _mm_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
-{
-  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
-				   (__v4df) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
-{
-  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
-				   (__v2df) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
-{
-  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
-						  (__v8sf) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
-{
-  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
-						  (__v8sf)
-						  _mm256_setzero_ps (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
-{
-  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
-						  (__v4sf) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
-{
-  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
-						  (__v4sf)
-						  _mm_setzero_ps (),
-						  (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
-{
-  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
-				   (__v8sf) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
-{
-  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
-				   (__v4sf) __A,
-				   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
-						     (__v2di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
-{
-  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
-				     (__v4di) __A,
-				     (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
-{
-  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
-				     (__v2di) __A,
-				     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
-{
-  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
-				     (__v8si) __A,
-				     (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
-{
-  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
-				     (__v4si) __A,
-				     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_abs_epi64 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_abs_epi64 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtpd_epu32 (__m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpd_epu32 (__m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttps_epu32 (__m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
-						      (__v8si) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttps_epu32 (__m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
-						      (__v4si) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttpd_epu32 (__m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
-						      (__v4si) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttpd_epu32 (__m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
-						      (__v4si) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
-{
-  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
-{
-  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu32_pd (__m128i __A)
-{
-  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
-{
-  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu32_pd (__m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
-						     (__v2df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
-{
-  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
-{
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepu32_ps (__m256i __A)
-{
-  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
-{
-  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu32_ps (__m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
-{
-  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
-{
-  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
-{
-  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi32_epi8 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
-						  (__v16qi)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi32_epi8 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
-						  (__v16qi)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsepi32_epi8 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
-						   (__v16qi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtsepi32_epi8 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
-						   (__v16qi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtusepi32_epi8 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
-						    (__v16qi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtusepi32_epi8 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
-						    (__v16qi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi32_epi16 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
-						  (__v8hi) _mm_setzero_si128 (),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
-						  (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi32_epi16 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
-						  (__v8hi)_mm_setzero_si128 (),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
-						  (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsepi32_epi16 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
-						   (__v8hi)_mm_setzero_si128 (),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
-						   (__v8hi)__O,
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
-						   (__v8hi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtsepi32_epi16 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
-						   (__v8hi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
-						   (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
-						   (__v8hi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtusepi32_epi16 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
-						    (__v8hi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
-						    (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtusepi32_epi16 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
-						    (__v8hi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
-						    (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi64_epi8 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
-						  (__v16qi)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi64_epi8 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
-						  (__v16qi)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
-						  (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
-						  (__v16qi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsepi64_epi8 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
-						   (__v16qi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtsepi64_epi8 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
-						   (__v16qi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
-						   (__v16qi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
-						   (__v16qi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtusepi64_epi8 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
-						    (__v16qi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtusepi64_epi8 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
-						    (__v16qi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
-						    (__v16qi) __O,
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
-						    (__v16qi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi64_epi16 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
-						  (__v8hi)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
-						  (__v8hi)__O,
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi64_epi16 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
-						  (__v8hi)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
-						  (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
-						  (__v8hi)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsepi64_epi16 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
-						   (__v8hi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
-						   (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
-						   (__v8hi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtsepi64_epi16 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
-						   (__v8hi)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
-						   (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
-						   (__v8hi)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtusepi64_epi16 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
-						    (__v8hi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
-						    (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtusepi64_epi16 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
-						    (__v8hi)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
-						    (__v8hi) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
-						    (__v8hi)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi64_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
-						  (__v4si)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
-						  (__v4si) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi64_epi32 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
-						  (__v4si)_mm_undefined_si128(),
-						  (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
-						  (__v4si) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsepi64_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
-						   (__v4si)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
-						   (__v4si) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
-						   (__v4si)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtsepi64_epi32 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
-						   (__v4si)_mm_undefined_si128(),
-						   (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
-						   (__v4si)__O,
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
-						   (__v4si)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtusepi64_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
-						    (__v4si)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
-{
-  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
-						    (__v4si) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtusepi64_epi32 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
-						    (__v4si)_mm_undefined_si128(),
-						    (__mmask8) -1);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
-{
-  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
-						    (__v4si) __O, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
-{
-  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    __M);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
-						      (__v8sf) __O,
-						      __M);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      __M);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
-{
-  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
-						      (__v4sf) __O,
-						      __M);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
-  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
-						      (__v4sf)
-						      _mm_setzero_ps (),
-						      __M);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
-{
-  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
-						       (__v4df) __O,
-						       __M);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
-  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
-						       (__v8si) __O,
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
-							   (__v8si)
-							   _mm256_setzero_si256 (),
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
-						       (__v4si) __O,
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
-						       (__v4si)
-						       _mm_setzero_si128 (),
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
-							   (__v4si)
-							   _mm_setzero_si128 (),
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
-						       (__v4di) __O,
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
-  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
-							   (__v4di)
-							   _mm256_setzero_si256 (),
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
-						       (__v2di) __O,
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
-						       (__v2di)
-						       _mm_setzero_si128 (),
-						       __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
-							   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
-  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
-							   (__v2di)
-							   _mm_setzero_si128 (),
-							   __M);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_f32x4 (__m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
-						          (__v8sf)_mm256_undefined_pd (),
-							  (__mmask8) -
-							  1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
-							  (__v8sf) __O,
-							  __M);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
-{
-  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
-							  (__v8sf)
-							  _mm256_setzero_ps (),
-							  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_i32x4 (__m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
-							   __A,
-						           (__v8si)_mm256_undefined_si256 (),
-							   (__mmask8) -
-							   1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
-							   __A,
-							   (__v8si)
-							   __O, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
-							   __A,
-							   (__v8si)
-							   _mm256_setzero_si256 (),
-							   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
-						    (__v2di)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
-						    (__v2di)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
-						    (__v2di)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
-						    (__v2di)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
-						    (__v2di)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
-{
-  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
-						    (__v4di)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
-						    (__v2di)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rcp14_pd (__m256d __A)
-{
-  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
-					      (__v4df)
-					      _mm256_setzero_pd (),
-					      (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
-					      (__v4df) __W,
-					      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
-					      (__v4df)
-					      _mm256_setzero_pd (),
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp14_pd (__m128d __A)
-{
-  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
-					      (__v2df)
-					      _mm_setzero_pd (),
-					      (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
-					      (__v2df) __W,
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
-					      (__v2df)
-					      _mm_setzero_pd (),
-					      (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rcp14_ps (__m256 __A)
-{
-  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
-					     (__v8sf)
-					     _mm256_setzero_ps (),
-					     (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
-					     (__v8sf) __W,
-					     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
-					     (__v8sf)
-					     _mm256_setzero_ps (),
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp14_ps (__m128 __A)
-{
-  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
-					     (__v4sf)
-					     _mm_setzero_ps (),
-					     (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
-					     (__v4sf) __W,
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
-					     (__v4sf)
-					     _mm_setzero_ps (),
-					     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rsqrt14_pd (__m256d __A)
-{
-  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt14_pd (__m128d __A)
-{
-  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
-						     (__v2df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rsqrt14_ps (__m256 __A)
-{
-  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt14_ps (__m128 __A)
-{
-  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
-						  (__v4df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
-						  (__v4df)
-						  _mm256_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
-						  (__v2df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
-						  (__v2df)
-						  _mm_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
-						 (__v8sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
-						 (__v8sf)
-						 _mm256_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
-						 (__v4sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
-						 (__v4sf)
-						 _mm_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getexp_ps (__m256 __A)
-{
-  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getexp_pd (__m256d __A)
-{
-  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_ps (__m128 __A)
-{
-  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_pd (__m128d __A)
-{
-  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
-						 (__v4si) __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
-						 (__v4si) __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
-						 (__v2di) __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
-						 (__v2di) __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_scalef_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		       __m256d __B)
-{
-  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_scalef_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
-		       __m256 __B)
-{
-  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
-		    __m128d __B)
-{
-  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
-		      __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
-		       __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
-		       __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
-		    __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
-						     (__v2df) __B,
-						     (__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		    __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
-						     (__v2df) __B,
-						     (__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf) __C,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
-		       __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
-						    (__v8sf) __B,
-						    (__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
-		       __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
-						    (__v8sf) __B,
-						    (__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf) __C,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
-						    (__v4sf) __B,
-						    (__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
-						    (__v4sf) __B,
-						    (__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
-		      __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    -(__v4df) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
-		       __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
-		       __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
-						     (__v4df) __B,
-						     -(__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    -(__v2df) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
-		    __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
-						     (__v2df) __B,
-						     (__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		    __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
-						     (__v2df) __B,
-						     -(__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   -(__v8sf) __C,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
-		       __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
-						    (__v8sf) __B,
-						    (__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
-		       __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
-						    (__v8sf) __B,
-						    -(__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   -(__v4sf) __C,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
-						    (__v4sf) __B,
-						    (__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
-						    (__v4sf) __B,
-						    -(__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			 __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
-			  __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
-							(__v4df) __B,
-							(__v4df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			  __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
-							(__v4df) __B,
-							(__v4df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
-		      __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
-						       (__v2df) __B,
-						       (__v2df) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
-		       __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
-							(__v2df) __B,
-							(__v2df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		       __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
-							(__v2df) __B,
-							(__v2df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			 __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf) __C,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
-			  __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
-						       (__v8sf) __B,
-						       (__v8sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			  __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
-						       (__v8sf) __B,
-						       (__v8sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
-						      (__v4sf) __B,
-						      (__v4sf) __C,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
-		       __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
-						       (__v4sf) __B,
-						       (__v4sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
-		       __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
-						       (__v4sf) __B,
-						       (__v4sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			 __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
-						       (__v4df) __B,
-						       -(__v4df) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
-			  __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
-							(__v4df) __B,
-							(__v4df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			  __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
-							(__v4df) __B,
-							-(__v4df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
-		      __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
-						       (__v2df) __B,
-						       -(__v2df) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
-		       __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
-							(__v2df) __B,
-							(__v2df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		       __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
-							(__v2df) __B,
-							-(__v2df) __C,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			 __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
-						      (__v8sf) __B,
-						      -(__v8sf) __C,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
-			  __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
-						       (__v8sf) __B,
-						       (__v8sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			  __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
-						       (__v8sf) __B,
-						       -(__v8sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
-						      (__v4sf) __B,
-						      -(__v4sf) __C,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
-		       __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
-						       (__v4sf) __B,
-						       (__v4sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
-		       __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
-						       (__v4sf) __B,
-						       -(__v4sf) __C,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
-		       __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
-			__mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			__m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
-		    __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
-						     (__v2df) __B,
-						     (__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
-		     __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
-						     (__v2df) __B,
-						     (__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		     __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
-						     (__v2df) __B,
-						     (__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
-		       __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
-						    (__v8sf) __B,
-						    (__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
-			__mmask8 __U)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
-						    (__v8sf) __B,
-						    (__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			__m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
-						    (__v8sf) __B,
-						    (__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
-						    (__v4sf) __B,
-						    (__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
-						    (__v4sf) __B,
-						    (__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
-						    (__v4sf) __B,
-						    (__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
-		       __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
-			__mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
-						      (__v4df) __B,
-						      (__v4df) __C,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			__m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
-						     (__v4df) __B,
-						     -(__v4df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
-		    __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
-						     (__v2df) __B,
-						     (__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
-		     __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
-						      (__v2df) __B,
-						      (__v2df) __C,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		     __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
-						     (__v2df) __B,
-						     -(__v2df) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
-		       __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
-						    (__v8sf) __B,
-						    (__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
-			__mmask8 __U)
-{
-  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
-						     (__v8sf) __B,
-						     (__v8sf) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			__m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
-						    (__v8sf) __B,
-						    -(__v8sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
-						    (__v4sf) __B,
-						    (__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
-						     (__v4sf) __B,
-						     (__v4sf) __C,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
-						    (__v4sf) __B,
-						    -(__v4sf) __C,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			  __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		       __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
-						(__v8si) __B,
-						(__v8si) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
-						(__v8si) __B,
-						(__v8si)
-						_mm256_setzero_si256 (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
-						(__v4si) __B,
-						(__v4si) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
-						(__v4si) __B,
-						(__v4si)
-						_mm_setzero_si128 (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
-						 (__v8si) __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
-						(__v4sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
-{
-  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
-						(__v4sf)
-						_mm_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
-{
-  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
-{
-  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
-						    (__v8si)
-						    _mm256_setzero_si256 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
-						    (__v4si)
-						    _mm_setzero_si128 (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtps_epu32 (__m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
-{
-  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_epu32 (__m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
-						   (__v4df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
-						   (__v4df)
-						   _mm256_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
-						   (__v2df) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
-						   (__v2df)
-						   _mm_setzero_pd (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-			 __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
-						     (__v4si) __B,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
-						     (__v4si) __B,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			    __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
-						     (__v8si) __B,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
-						     (__v8si) __B,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-			 __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
-						      (__v2di) __B,
-						      (__v2di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
-						      (__v2di) __B,
-						      (__v2di)
-						      _mm_setzero_di (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			    __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
-						      (__v4di) __B,
-						      (__v4di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
-						      (__v4di) __B,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-			 __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
-						     (__v4si) __B,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
-						     (__v4si) __B,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			    __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
-						     (__v8si) __B,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
-						     (__v8si) __B,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-			 __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
-						      (__v2di) __B,
-						      (__v2di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
-						      (__v2di) __B,
-						      (__v2di)
-						      _mm_setzero_di (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			    __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
-						      (__v4di) __B,
-						      (__v4di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
-						      (__v4di) __B,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
-						   (__v4si) __B, 0,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
-						    (__v4si) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
-						   (__v4si) __B, 0, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
-						    (__v4si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
-						   (__v8si) __B, 0,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
-						    (__v8si) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
-						   (__v8si) __B, 0, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
-						    (__v8si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
-						   (__v2di) __B, 0,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
-						    (__v2di) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
-						   (__v2di) __B, 0, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
-						    (__v2di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
-						   (__v4di) __B, 0,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
-						    (__v4di) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
-						   (__v4di) __B, 0, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
-						    (__v4di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
-						   (__v4si) __B, 6,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
-						    (__v4si) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
-						   (__v4si) __B, 6, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
-						    (__v4si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
-						   (__v8si) __B, 6,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
-						    (__v8si) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
-						   (__v8si) __B, 6, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
-						    (__v8si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
-						   (__v2di) __B, 6,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
-						    (__v2di) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
-						   (__v2di) __B, 6, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
-						    (__v2di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
-						   (__v4di) __B, 6,
-						   (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
-						    (__v4di) __B,
-						    (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
-						   (__v4di) __B, 6, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
-						    (__v4di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_test_epi32_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
-					       (__v4si) __B,
-					       (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
-					       (__v4si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_test_epi32_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
-					       (__v8si) __B,
-					       (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
-					       (__v8si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_test_epi64_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
-					       (__v2di) __B,
-					       (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
-					       (__v2di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_test_epi64_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
-					       (__v4di) __B,
-					       (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
-					       (__v4di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testn_epi32_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
-						(__v4si) __B,
-						(__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
-						(__v4si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
-						(__v8si) __B,
-						(__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
-						(__v8si) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testn_epi64_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
-						(__v2di) __B,
-						(__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
-						(__v2di) __B, __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
-						(__v4di) __B,
-						(__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
-						(__v4di) __B, __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
-						      (__v4df) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
-						      (__v4df)
-						      _mm256_setzero_pd (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
-{
-  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
-					  (__v4df) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
-						      (__v2df) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
-						      (__v2df)
-						      _mm_setzero_pd (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
-{
-  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
-					  (__v2df) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
-						     (__v8sf) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
-						     (__v8sf)
-						     _mm256_setzero_ps (),
-						     (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
-{
-  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
-					  (__v8sf) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
-						     (__v4sf) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
-						     (__v4sf)
-						     _mm_setzero_ps (),
-						     (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
-{
-  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
-					  (__v4sf) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
-						      (__v4di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
-{
-  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
-					  (__v4di) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
-						      (__v2di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
-						      (__v2di)
-						      _mm_setzero_di (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
-{
-  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
-					  (__v2di) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
-						      (__v8si) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
-{
-  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
-					  (__v8si) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
-						      (__v4si) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
-{
-  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
-					  (__v4si) __A,
-					  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
-{
-  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
-							(__v4df) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
-{
-  return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
-							 (__v4df)
-							 _mm256_setzero_pd (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
-{
-  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
-							(__v2df) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
-{
-  return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
-							 (__v2df)
-							 _mm_setzero_pd (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
-{
-  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
-						       (__v8sf) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
-{
-  return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
-							(__v8sf)
-							_mm256_setzero_ps (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
-{
-  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
-						       (__v4sf) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
-{
-  return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
-							(__v4sf)
-							_mm_setzero_ps (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
-			       void const *__P)
-{
-  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
-							(__v4di) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
-							 (__v4di)
-							 _mm256_setzero_si256 (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
-						     (__v2di)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
-							(__v2di) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
-							 (__v2di)
-							 _mm_setzero_si128 (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
-			       void const *__P)
-{
-  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
-							(__v8si) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
-{
-  return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
-							 (__v8si)
-							 _mm256_setzero_si256 (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
-							(__v4si) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
-{
-  return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
-							 (__v4si)
-							 _mm_setzero_si128 (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
-							/* idx */ ,
-							(__v4df) __A,
-							(__v4df) __B,
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
-			     __m256d __B)
-{
-  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
-							/* idx */ ,
-							(__v4df) __A,
-							(__v4df) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
-			      __m256d __B)
-{
-  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
-							(__v4di) __I
-							/* idx */ ,
-							(__v4df) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
-			      __m256d __B)
-{
-  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
-							 /* idx */ ,
-							 (__v4df) __A,
-							 (__v4df) __B,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
-{
-  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
-						       /* idx */ ,
-						       (__v8sf) __A,
-						       (__v8sf) __B,
-						       (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
-			     __m256 __B)
-{
-  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
-						       /* idx */ ,
-						       (__v8sf) __A,
-						       (__v8sf) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
-			      __m256 __B)
-{
-  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
-						       (__v8si) __I
-						       /* idx */ ,
-						       (__v8sf) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
-			      __m256 __B)
-{
-  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
-							/* idx */ ,
-							(__v8sf) __A,
-							(__v8sf) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
-						       /* idx */ ,
-						       (__v2di) __A,
-						       (__v2di) __B,
-						       (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
-			     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
-						       /* idx */ ,
-						       (__v2di) __A,
-						       (__v2di) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
-						       (__v2di) __I
-						       /* idx */ ,
-						       (__v2di) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
-							/* idx */ ,
-							(__v2di) __A,
-							(__v2di) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
-						       /* idx */ ,
-						       (__v4si) __A,
-						       (__v4si) __B,
-						       (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
-			     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
-						       /* idx */ ,
-						       (__v4si) __A,
-						       (__v4si) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
-						       (__v4si) __I
-						       /* idx */ ,
-						       (__v4si) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
-			      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
-							/* idx */ ,
-							(__v4si) __A,
-							(__v4si) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
-						       /* idx */ ,
-						       (__v4di) __A,
-						       (__v4di) __B,
-						       (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
-				__m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
-						       /* idx */ ,
-						       (__v4di) __A,
-						       (__v4di) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
-				 __mmask8 __U, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
-						       (__v4di) __I
-						       /* idx */ ,
-						       (__v4di) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
-				 __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
-							/* idx */ ,
-							(__v4di) __A,
-							(__v4di) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
-						       /* idx */ ,
-						       (__v8si) __A,
-						       (__v8si) __B,
-						       (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
-				__m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
-						       /* idx */ ,
-						       (__v8si) __A,
-						       (__v8si) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
-				 __mmask8 __U, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
-						       (__v8si) __I
-						       /* idx */ ,
-						       (__v8si) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
-				 __m256i __I, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
-							/* idx */ ,
-							(__v8si) __A,
-							(__v8si) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
-							/* idx */ ,
-							(__v2df) __A,
-							(__v2df) __B,
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
-			  __m128d __B)
-{
-  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
-							/* idx */ ,
-							(__v2df) __A,
-							(__v2df) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
-			   __m128d __B)
-{
-  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
-							(__v2di) __I
-							/* idx */ ,
-							(__v2df) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
-			   __m128d __B)
-{
-  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
-							 /* idx */ ,
-							 (__v2df) __A,
-							 (__v2df) __B,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
-{
-  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
-						       /* idx */ ,
-						       (__v4sf) __A,
-						       (__v4sf) __B,
-						       (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
-			  __m128 __B)
-{
-  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
-						       /* idx */ ,
-						       (__v4sf) __A,
-						       (__v4sf) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
-			   __m128 __B)
-{
-  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
-						       (__v4si) __I
-						       /* idx */ ,
-						       (__v4sf) __B,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
-			   __m128 __B)
-{
-  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
-							/* idx */ ,
-							(__v4sf) __A,
-							(__v4sf) __B,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srav_epi64 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
-						  (__v2di) __Y,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
-		     __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
-						  (__v2di) __Y,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
-						  (__v2di) __Y,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
-			__m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
-						 (__v8si) __Y,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
-						 (__v8si) __Y,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
-		     __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
-						 (__v4si) __Y,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
-						 (__v4si) __Y,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
-			__m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
-						 (__v4di) __Y,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
-						 (__v4di) __Y,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
-		     __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
-						 (__v2di) __Y,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
-						 (__v2di) __Y,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
-			__m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
-						 (__v8si) __Y,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
-						 (__v8si) __Y,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
-		     __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
-						 (__v4si) __Y,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
-						 (__v4si) __Y,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
-			__m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
-						 (__v8si) __Y,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
-						 (__v8si) __Y,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
-		     __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
-						 (__v4si) __Y,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
-						 (__v4si) __Y,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
-			__m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
-						 (__v4di) __Y,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
-						 (__v4di) __Y,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
-		     __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
-						 (__v2di) __Y,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
-						 (__v2di) __Y,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rolv_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rolv_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rorv_epi32 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rorv_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rolv_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rolv_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rorv_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			__m256i __B)
-{
-  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rorv_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		     __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srav_epi64 (__m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
-						  (__v4di) __Y,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
-			__m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
-						  (__v4di) __Y,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
-						  (__v4di) __Y,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di) __W, __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di)
-						 _mm256_setzero_pd (),
-						 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di) __W, __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_pd (),
-						 __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			  __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W, __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_pd (),
-						  __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		       __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W, __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_pd (),
-						  __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		      __m256i __B)
-{
-  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
-						(__v4di) __B,
-						(__v4di) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
-						(__v4di) __B,
-						(__v4di)
-						_mm256_setzero_si256 (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
-						(__v2di) __B,
-						(__v2di) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
-						(__v2di) __B,
-						(__v2di)
-						_mm_setzero_si128 (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
-						 (__v4di) __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf) __W,
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf)
-					     _mm_setzero_ps (),
-					     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df) __W,
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df)
-					      _mm_setzero_pd (),
-					      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf) __W,
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf) __W,
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf)
-					     _mm_setzero_ps (),
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf)
-					     _mm_setzero_ps (),
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf) __W,
-					     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
-					     (__v4sf) __B,
-					     (__v4sf)
-					     _mm_setzero_ps (),
-					     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df) __W,
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df)
-					      _mm_setzero_pd (),
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df) __W,
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df)
-					      _mm_setzero_pd (),
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df) __W,
-					      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
-					      (__v2df) __B,
-					      (__v2df)
-					      _mm_setzero_pd (),
-					      (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf) __W,
-						(__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
-						(__v8sf) __B,
-						(__v8sf)
-						_mm256_setzero_ps (),
-						(__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
-		    __m256d __B)
-{
-  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
-						 (__v4df) __B,
-						 (__v4df)
-						 _mm256_setzero_pd (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epi64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_epu64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_epu64 (__m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
-						  (__v4di) __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
-		       __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epu64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epu64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
-						  (__v2di) __B,
-						  (__v2di)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W, __M);
-}
-
-#ifndef __AVX512CD__
-#pragma GCC push_options
-#pragma GCC target("avx512vl,avx512cd")
-#define __DISABLE_AVX512VLCD__
-#endif
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcastmb_epi64 (__mmask8 __A)
-{
-  return (__m128i) __builtin_ia32_broadcastmb128 (__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastmb_epi64 (__mmask8 __A)
-{
-  return (__m256i) __builtin_ia32_broadcastmb256 (__A);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcastmw_epi32 (__mmask16 __A)
-{
-  return (__m128i) __builtin_ia32_broadcastmw128 (__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcastmw_epi32 (__mmask16 __A)
-{
-  return (__m256i) __builtin_ia32_broadcastmw256 (__A);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_lzcnt_epi32 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
-						     (__v8si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_lzcnt_epi64 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
-						     (__v4di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_conflict_epi64 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
-							 (__v4di)
-							 _mm256_setzero_si256 (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
-							 (__v4di) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
-							 (__v4di)
-							 _mm256_setzero_si256 (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_conflict_epi32 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
-							 (__v8si)
-							 _mm256_setzero_si256 (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
-							 (__v8si) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
-							 (__v8si)
-							 _mm256_setzero_si256 (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_lzcnt_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_lzcnt_epi64 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
-						     (__v2di) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
-						     (__v2di)
-						     _mm_setzero_di (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_conflict_epi64 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
-							 (__v2di)
-							 _mm_setzero_di (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
-							 (__v2di) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
-							 (__v2di)
-							 _mm_setzero_di (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_conflict_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
-							 (__v4si)
-							 _mm_setzero_si128 (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
-							 (__v4si) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
-							 (__v4si)
-							 _mm_setzero_si128 (),
-							 (__mmask8)
-							 __U);
-}
-
-#ifdef __DISABLE_AVX512VLCD__
-#pragma GCC pop_options
-#endif
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			 __m256d __B)
-{
-  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
-		      __m128d __B)
-{
-  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			 __m256 __B)
-{
-  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			 __m256d __B)
-{
-  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
-						    (__v4df) __B,
-						    (__v4df)
-						    _mm256_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
-		      __m128d __B)
-{
-  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
-						    (__v2df) __B,
-						    (__v2df)
-						    _mm_setzero_pd (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			 __m256 __B)
-{
-  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
-						 (__v4sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
-{
-  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
-						 (__v4sf)
-						 _mm_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
-						   (__v8sf) __B,
-						   (__v8sf)
-						   _mm256_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
-{
-  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
-{
-  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf) __W,
-						   (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
-						   (__v4sf) __B,
-						   (__v4sf)
-						   _mm_setzero_ps (),
-						   (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
-						 (__v4si) __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
-						 (__v4si) __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sra_epi64 (__m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
-						 (__v2di) __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
-						 (__v2di) __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
-						 (__v2di) __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sra_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
-						 (__v4si) __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
-						 (__v2di) __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
-						 (__v4si) __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
-						 (__v4si) __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
-						 (__v2di) __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
-{
-  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
-						 (__v2di) __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
-			    __m256 __Y)
-{
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-						    (__v8si) __X,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
-{
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-						    (__v8si) __X,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
-{
-  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
-						     (__v4di) __X,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
-			    __m256d __Y)
-{
-  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
-						     (__v4di) __X,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
-{
-  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
-						     (__v4di) __X,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			   __m256i __C)
-{
-  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
-							(__v4di) __C,
-							(__v4df) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
-{
-  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
-							(__v4di) __C,
-							(__v4df)
-							_mm256_setzero_pd (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			   __m256i __C)
-{
-  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
-						       (__v8si) __C,
-						       (__v8sf) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
-{
-  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
-						       (__v8si) __C,
-						       (__v8sf)
-						       _mm256_setzero_ps (),
-						       (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
-			__m128i __C)
-{
-  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
-						     (__v2di) __C,
-						     (__v2df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
-{
-  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
-						     (__v2di) __C,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
-			__m128i __C)
-{
-  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
-						    (__v4si) __C,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
-{
-  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
-						    (__v4si) __C,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
-						     (__v4di) __X,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
-			 __m256i __B)
-{
-  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
-						  (__v8si) __B,
-						  (__v8si) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
-		      __m128i __B)
-{
-  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
-						  (__v4si) __B,
-						  (__v4si) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
-		       __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
-						  (__v8si) __Y,
-						  (__v4di) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
-						  (__v8si) __Y,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
-		    __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
-						  (__v4si) __Y,
-						  (__v2di) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
-						  (__v4si) __Y,
-						  (__v2di)
-						  _mm_setzero_si128 (),
-						  __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
-			       __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
-						     (__v4di) __X,
-						     (__v4di) __W,
-						     __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
-		       __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
-						   (__v8si) __Y,
-						   (__v4di) __W, __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
-						     (__v8si) __X,
-						     (__v8si)
-						     _mm256_setzero_si256 (),
-						     __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
-						   (__v8si) __Y,
-						   (__v4di)
-						   _mm256_setzero_si256 (),
-						   __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
-		    __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
-						   (__v4si) __Y,
-						   (__v2di) __W, __M);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
-						   (__v4si) __Y,
-						   (__v2di)
-						   _mm_setzero_si128 (),
-						   __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
-			       __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
-						     (__v8si) __X,
-						     (__v8si) __W,
-						     __M);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
-			    __m256i __X, const int __I)
-{
-  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
-						  __I,
-						  (__v4di) __W,
-						  (__mmask8) __M);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
-{
-  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
-						  __I,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __M);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			__m256d __B, const int __imm)
-{
-  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
-						  (__v4df) __B, __imm,
-						  (__v4df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			 const int __imm)
-{
-  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
-						  (__v4df) __B, __imm,
-						  (__v4df)
-						  _mm256_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
-		     __m128d __B, const int __imm)
-{
-  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
-						  (__v2df) __B, __imm,
-						  (__v2df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		      const int __imm)
-{
-  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
-						  (__v2df) __B, __imm,
-						  (__v2df)
-						  _mm_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			__m256 __B, const int __imm)
-{
-  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
-						 (__v8sf) __B, __imm,
-						 (__v8sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			 const int __imm)
-{
-  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
-						 (__v8sf) __B, __imm,
-						 (__v8sf)
-						 _mm256_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
-		     const int __imm)
-{
-  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
-						 (__v4sf) __B, __imm,
-						 (__v4sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
-		      const int __imm)
-{
-  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
-						 (__v4sf) __B, __imm,
-						 (__v4sf)
-						 _mm_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
-							(__v4si) __B,
-							__imm,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
-			 __m128i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
-							(__v4si) __B,
-							__imm,
-							(__v8si) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
-			  const int __imm)
-{
-  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
-							(__v4si) __B,
-							__imm,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
-{
-  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
-						       (__v4sf) __B,
-						       __imm,
-						       (__v8sf)
-						       _mm256_setzero_ps (),
-						       (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
-			 __m128 __B, const int __imm)
-{
-  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
-						       (__v4sf) __B,
-						       __imm,
-						       (__v8sf) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
-			  const int __imm)
-{
-  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
-						       (__v4sf) __B,
-						       __imm,
-						       (__v8sf)
-						       _mm256_setzero_ps (),
-						       (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
-							 __imm,
-							 (__v4si)
-							 _mm_setzero_si128 (),
-							 (__mmask8) -
-							 1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
-				const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
-							 __imm,
-							 (__v4si) __W,
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
-				 const int __imm)
-{
-  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
-							 __imm,
-							 (__v4si)
-							 _mm_setzero_si128 (),
-							 (__mmask8)
-							 __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extractf32x4_ps (__m256 __A, const int __imm)
-{
-  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
-							__imm,
-							(__v4sf)
-							_mm_setzero_ps (),
-							(__mmask8) -
-							1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
-			     const int __imm)
-{
-  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
-							__imm,
-							(__v4sf) __W,
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
-			      const int __imm)
-{
-  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
-							__imm,
-							(__v4sf)
-							_mm_setzero_ps (),
-							(__mmask8)
-							__U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
-						       (__v4di) __B,
-						       __imm,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
-			   __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
-						       (__v4di) __B,
-						       __imm,
-						       (__v4di) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
-			    const int __imm)
-{
-  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
-						       (__v4di) __B,
-						       __imm,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
-						       (__v8si) __B,
-						       __imm,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
-			   __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
-						       (__v8si) __B,
-						       __imm,
-						       (__v8si) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
-			    const int __imm)
-{
-  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
-						       (__v8si) __B,
-						       __imm,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
-{
-  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
-						       (__v4df) __B,
-						       __imm,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
-			   __m256d __B, const int __imm)
-{
-  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
-						       (__v4df) __B,
-						       __imm,
-						       (__v4df) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
-			    const int __imm)
-{
-  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
-						       (__v4df) __B,
-						       __imm,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
-{
-  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
-						      (__v8sf) __B,
-						      __imm,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
-			   __m256 __B, const int __imm)
-{
-  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
-						      (__v8sf) __B,
-						      __imm,
-						      (__v8sf) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
-			    const int __imm)
-{
-  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
-						      (__v8sf) __B,
-						      __imm,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
-		    const int __imm)
-{
-  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
-						      (__v4df) __B,
-						      (__v4di) __C,
-						      __imm,
-						      (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			 __m256i __C, const int __imm)
-{
-  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
-						      (__v4df) __B,
-						      (__v4di) __C,
-						      __imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			  __m256i __C, const int __imm)
-{
-  return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4di) __C,
-						       __imm,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
-		    const int __imm)
-{
-  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
-						     (__v8sf) __B,
-						     (__v8si) __C,
-						     __imm,
-						     (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			 __m256i __C, const int __imm)
-{
-  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
-						     (__v8sf) __B,
-						     (__v8si) __C,
-						     __imm,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			  __m256i __C, const int __imm)
-{
-  return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8si) __C,
-						      __imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
-		 const int __imm)
-{
-  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
-						      (__v2df) __B,
-						      (__v2di) __C,
-						      __imm,
-						      (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
-		      __m128i __C, const int __imm)
-{
-  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
-						      (__v2df) __B,
-						      (__v2di) __C,
-						      __imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
-		       __m128i __C, const int __imm)
-{
-  return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
-						       (__v2df) __B,
-						       (__v2di) __C,
-						       __imm,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
-{
-  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
-						     (__v4sf) __B,
-						     (__v4si) __C,
-						     __imm,
-						     (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
-		      __m128i __C, const int __imm)
-{
-  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
-						     (__v4sf) __B,
-						     (__v4si) __C,
-						     __imm,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
-		       __m128i __C, const int __imm)
-{
-  return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
-						      (__v4sf) __B,
-						      (__v4si) __C,
-						      __imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		     const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		     const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
-						  (__v2di)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
-			   const int imm)
-{
-  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
-						     (__v4di) __B,
-						     (__v4di) __C, imm,
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
-				__m256i __B, __m256i __C,
-				const int imm)
-{
-  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
-						     (__v4di) __B,
-						     (__v4di) __C, imm,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
-				 __m256i __B, __m256i __C,
-				 const int imm)
-{
-  return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
-						      (__v4di) __B,
-						      (__v4di) __C,
-						      imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
-			   const int imm)
-{
-  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
-						     (__v8si) __B,
-						     (__v8si) __C, imm,
-						     (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
-				__m256i __B, __m256i __C,
-				const int imm)
-{
-  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
-						     (__v8si) __B,
-						     (__v8si) __C, imm,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
-				 __m256i __B, __m256i __C,
-				 const int imm)
-{
-  return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
-						      (__v8si) __B,
-						      (__v8si) __C,
-						      imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
-			const int imm)
-{
-  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
-						     (__v2di) __B,
-						     (__v2di) __C, imm,
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
-			     __m128i __B, __m128i __C, const int imm)
-{
-  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
-						     (__v2di) __B,
-						     (__v2di) __C, imm,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
-			      __m128i __B, __m128i __C, const int imm)
-{
-  return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
-						      (__v2di) __B,
-						      (__v2di) __C,
-						      imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
-			const int imm)
-{
-  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
-						     (__v4si) __B,
-						     (__v4si) __C, imm,
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
-			     __m128i __B, __m128i __C, const int imm)
-{
-  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
-						     (__v4si) __B,
-						     (__v4si) __C, imm,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
-			      __m128i __B, __m128i __C, const int imm)
-{
-  return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
-						      (__v4si) __B,
-						      (__v4si) __C,
-						      imm,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_roundscale_ps (__m256 __A, const int __imm)
-{
-  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
-						      __imm,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			   const int __imm)
-{
-  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
-						      __imm,
-						      (__v8sf) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
-{
-  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
-						      __imm,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_roundscale_pd (__m256d __A, const int __imm)
-{
-  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
-						       __imm,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			   const int __imm)
-{
-  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
-						       __imm,
-						       (__v4df) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
-{
-  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
-						       __imm,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_ps (__m128 __A, const int __imm)
-{
-  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
-						      __imm,
-						      (__v4sf)
-						      _mm_setzero_ps (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
-			const int __imm)
-{
-  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
-						      __imm,
-						      (__v4sf) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
-{
-  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
-						      __imm,
-						      (__v4sf)
-						      _mm_setzero_ps (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_pd (__m128d __A, const int __imm)
-{
-  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
-						       __imm,
-						       (__v2df)
-						       _mm_setzero_pd (),
-						       (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
-			const int __imm)
-{
-  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
-						       __imm,
-						       (__v2df) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
-{
-  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
-						       __imm,
-						       (__v2df)
-						       _mm_setzero_pd (),
-						       (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
-		   _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
-						    (__C << 2) | __B,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			_MM_MANTISSA_NORM_ENUM __B,
-			_MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
-						    (__C << 2) | __B,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
-			 _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
-						    (__C << 2) | __B,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
-		_MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
-						    (__C << 2) | __B,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) -1);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
-		     _MM_MANTISSA_NORM_ENUM __B,
-		     _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
-						    (__C << 2) | __B,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
-		      _MM_MANTISSA_NORM_ENUM __B,
-		      _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
-						    (__C << 2) | __B,
-						    (__v4sf)
-						    _mm_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
-		   _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
-						     (__C << 2) | __B,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			_MM_MANTISSA_NORM_ENUM __B,
-			_MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
-						     (__C << 2) | __B,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
-			 _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
-						     (__C << 2) | __B,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
-		_MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
-						     (__C << 2) | __B,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
-		     _MM_MANTISSA_NORM_ENUM __B,
-		     _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
-						     (__C << 2) | __B,
-						     (__v2df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
-		      _MM_MANTISSA_NORM_ENUM __B,
-		      _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
-						     (__C << 2) | __B,
-						     (__v2df)
-						     _mm_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
-			   __m256i __index, float const *__addr,
-			   int __scale)
-{
-  return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
-						__addr,
-						(__v8si) __index,
-						__mask, __scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
-			__m128i __index, float const *__addr,
-			int __scale)
-{
-  return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
-						__addr,
-						(__v4si) __index,
-						__mask, __scale);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
-			   __m128i __index, double const *__addr,
-			   int __scale)
-{
-  return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
-						 __addr,
-						 (__v4si) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
-			__m128i __index, double const *__addr,
-			int __scale)
-{
-  return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
-						 __addr,
-						 (__v4si) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
-			   __m256i __index, float const *__addr,
-			   int __scale)
-{
-  return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
-						__addr,
-						(__v4di) __index,
-						__mask, __scale);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
-			__m128i __index, float const *__addr,
-			int __scale)
-{
-  return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
-						__addr,
-						(__v2di) __index,
-						__mask, __scale);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
-			   __m256i __index, double const *__addr,
-			   int __scale)
-{
-  return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
-						 __addr,
-						 (__v4di) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
-			__m128i __index, double const *__addr,
-			int __scale)
-{
-  return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
-						 __addr,
-						 (__v2di) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
-			      __m256i __index, int const *__addr,
-			      int __scale)
-{
-  return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
-						 __addr,
-						 (__v8si) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
-			   __m128i __index, int const *__addr,
-			   int __scale)
-{
-  return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
-						 __addr,
-						 (__v4si) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
-			      __m128i __index, long long const *__addr,
-			      int __scale)
-{
-  return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
-						 __addr,
-						 (__v4si) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
-			   __m128i __index, long long const *__addr,
-			   int __scale)
-{
-  return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
-						 __addr,
-						 (__v4si) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
-			      __m256i __index, int const *__addr,
-			      int __scale)
-{
-  return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
-						 __addr,
-						 (__v4di) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
-			   __m128i __index, int const *__addr,
-			   int __scale)
-{
-  return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
-						 __addr,
-						 (__v2di) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
-			      __m256i __index, long long const *__addr,
-			      int __scale)
-{
-  return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
-						 __addr,
-						 (__v4di) __index,
-						 __mask, __scale);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
-			   __m128i __index, long long const *__addr,
-			   int __scale)
-{
-  return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
-						 __addr,
-						 (__v2di) __index,
-						 __mask, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32scatter_ps (float *__addr, __m256i __index,
-		      __m256 __v1, const int __scale)
-{
-  __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
-				(__v8si) __index, (__v8sf) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
-			   __m256i __index, __m256 __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
-				(__v8sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
-		   const int __scale)
-{
-  __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
-				(__v4si) __index, (__v4sf) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
-			__m128i __index, __m128 __v1,
-			const int __scale)
-{
-  __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
-				(__v4sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32scatter_pd (double *__addr, __m128i __index,
-		      __m256d __v1, const int __scale)
-{
-  __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
-				(__v4si) __index, (__v4df) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
-			   __m128i __index, __m256d __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
-				(__v4df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32scatter_pd (double *__addr, __m128i __index,
-		   __m128d __v1, const int __scale)
-{
-  __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
-				(__v4si) __index, (__v2df) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
-			__m128i __index, __m128d __v1,
-			const int __scale)
-{
-  __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
-				(__v2df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64scatter_ps (float *__addr, __m256i __index,
-		      __m128 __v1, const int __scale)
-{
-  __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
-				(__v4di) __index, (__v4sf) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
-			   __m256i __index, __m128 __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
-				(__v4sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
-		   const int __scale)
-{
-  __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
-				(__v2di) __index, (__v4sf) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
-			__m128i __index, __m128 __v1,
-			const int __scale)
-{
-  __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
-				(__v4sf) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64scatter_pd (double *__addr, __m256i __index,
-		      __m256d __v1, const int __scale)
-{
-  __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
-				(__v4di) __index, (__v4df) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
-			   __m256i __index, __m256d __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
-				(__v4df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64scatter_pd (double *__addr, __m128i __index,
-		   __m128d __v1, const int __scale)
-{
-  __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
-				(__v2di) __index, (__v2df) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
-			__m128i __index, __m128d __v1,
-			const int __scale)
-{
-  __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
-				(__v2df) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32scatter_epi32 (int *__addr, __m256i __index,
-			 __m256i __v1, const int __scale)
-{
-  __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
-				(__v8si) __index, (__v8si) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
-			      __m256i __index, __m256i __v1,
-			      const int __scale)
-{
-  __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
-				(__v8si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32scatter_epi32 (int *__addr, __m128i __index,
-		      __m128i __v1, const int __scale)
-{
-  __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
-				(__v4si) __index, (__v4si) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
-			   __m128i __index, __m128i __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
-				(__v4si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
-			 __m256i __v1, const int __scale)
-{
-  __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
-				(__v4si) __index, (__v4di) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
-			      __m128i __index, __m256i __v1,
-			      const int __scale)
-{
-  __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
-				(__v4di) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32scatter_epi64 (long long *__addr, __m128i __index,
-		      __m128i __v1, const int __scale)
-{
-  __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
-				(__v4si) __index, (__v2di) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
-			   __m128i __index, __m128i __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
-				(__v2di) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64scatter_epi32 (int *__addr, __m256i __index,
-			 __m128i __v1, const int __scale)
-{
-  __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
-				(__v4di) __index, (__v4si) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
-			      __m256i __index, __m128i __v1,
-			      const int __scale)
-{
-  __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
-				(__v4si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64scatter_epi32 (int *__addr, __m128i __index,
-		      __m128i __v1, const int __scale)
-{
-  __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
-				(__v2di) __index, (__v4si) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
-			   __m128i __index, __m128i __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
-				(__v4si) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
-			 __m256i __v1, const int __scale)
-{
-  __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
-				(__v4di) __index, (__v4di) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
-			      __m256i __index, __m256i __v1,
-			      const int __scale)
-{
-  __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
-				(__v4di) __v1, __scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64scatter_epi64 (long long *__addr, __m128i __index,
-		      __m128i __v1, const int __scale)
-{
-  __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
-				(__v2di) __index, (__v2di) __v1,
-				__scale);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
-			   __m128i __index, __m128i __v1,
-			   const int __scale)
-{
-  __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
-				(__v2di) __v1, __scale);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			   _MM_PERM_ENUM __mask)
-{
-  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
-			    _MM_PERM_ENUM __mask)
-{
-  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-			_MM_PERM_ENUM __mask)
-{
-  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
-			 _MM_PERM_ENUM __mask)
-{
-  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rol_epi32 (__m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       const int __B)
-{
-  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rol_epi32 (__m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    const int __B)
-{
-  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ror_epi32 (__m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-		       const int __B)
-{
-  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
-						 (__v8si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ror_epi32 (__m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		    const int __B)
-{
-  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
-						 (__v4si) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
-						 (__v4si)
-						 _mm_setzero_si128 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rol_epi64 (__m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       const int __B)
-{
-  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rol_epi64 (__m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    const int __B)
-{
-  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ror_epi64 (__m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-		       const int __B)
-{
-  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
-						 (__v4di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
-{
-  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
-						 (__v4di)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ror_epi64 (__m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		    const int __B)
-{
-  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
-						 (__v2di) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
-{
-  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
-						 (__v2di)
-						 _mm_setzero_di (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
-{
-  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
-						  (__v4si) __B, __imm,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		       __m128i __B, const int __imm)
-{
-  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
-						  (__v4si) __B, __imm,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
-			const int __imm)
-{
-  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
-						  (__v4si) __B, __imm,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
-{
-  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
-						  (__v2di) __B, __imm,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		       __m128i __B, const int __imm)
-{
-  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
-						  (__v2di) __B, __imm,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
-			const int __imm)
-{
-  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
-						  (__v2di) __B, __imm,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
-						  (__v8si) __B, __imm,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			  __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
-						  (__v8si) __B, __imm,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
-			   const int __imm)
-{
-  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
-						  (__v8si) __B, __imm,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
-						  (__v4di) __B, __imm,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			  __m256i __B, const int __imm)
-{
-  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
-						  (__v4di) __B, __imm,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
-			   const int __imm)
-{
-  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
-						  (__v4di) __B, __imm,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
-		   const int __I)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
-						  (__v8hi) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
-						  (__v8hi)
-						  _mm_setzero_hi (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
-		      const int __I)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
-						     (__v8hi) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
-						     (__v8hi)
-						     _mm_setzero_hi (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			const int __imm)
-{
-  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-		     const int __imm)
-{
-  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srai_epi64 (__m256i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			const int __imm)
-{
-  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
-{
-  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srai_epi64 (__m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-		     const int __imm)
-{
-  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
-{
-  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
-						  (__v2di)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
-{
-  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
-						  (__v4si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
-{
-  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
-						  (__v4si)
-						  _mm_setzero_si128 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
-{
-  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
-						  (__v2di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
-{
-  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
-						  (__v2di)
-						  _mm_setzero_di (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-			int __B)
-{
-  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
-						  (__v8si) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
-{
-  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-			int __B)
-{
-  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
-						  (__v4di) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
-{
-  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
-						  (__v4di)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
-			 const int __imm)
-{
-  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
-						  (__v4df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
-{
-  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
-						  (__v4df)
-						  _mm256_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
-			const int __C)
-{
-  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
-{
-  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
-						     (__v4df)
-						     _mm256_setzero_pd (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
-		     const int __C)
-{
-  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
-						  (__v2df) __W,
-						  (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
-{
-  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
-						  (__v2df)
-						  _mm_setzero_pd (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
-			const int __C)
-{
-  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
-{
-  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
-						    (__v8sf)
-						    _mm256_setzero_ps (),
-						    (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
-		     const int __C)
-{
-  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
-						 (__v4sf) __W,
-						 (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
-{
-  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
-						 (__v4sf)
-						 _mm_setzero_ps (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
-{
-  return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
-						     (__v4df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
-{
-  return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
-						    (__v8sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
-{
-  return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
-						    (__v4di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
-{
-  return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
-						    (__v8si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
-{
-  return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
-						     (__v2df) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
-{
-  return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
-						    (__v4sf) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
-{
-  return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
-						    (__v2di) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
-{
-  return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
-						    (__v4si) __W,
-						    (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, __P,
-						 (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, __P,
-						 (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
-						  (__v4df) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
-						  (__v8sf) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
-			    const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, __P,
-						 (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
-			    const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, __P,
-						 (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
-			    const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
-			    const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
-						  (__v4df) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
-						  (__v8sf) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, __P,
-						 (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, __P,
-						 (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
-						  (__v2df) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
-						  (__v4sf) __Y, __P,
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, __P,
-						 (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, __P,
-						 (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
-			 const int __P)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
-		      const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
-						  (__v2df) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
-		      const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
-						  (__v4sf) __Y, __P,
-						  (__mmask8) __U);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutex_pd (__m256d __X, const int __M)
-{
-  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
-						  (__v4df)
-						  _mm256_undefined_pd (),
-						  (__mmask8) -1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 4,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 4,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 1,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 1,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 5,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 5,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 2,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
-						  (__v8si) __Y, 2,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 4,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 4,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 1,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 1,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 5,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 5,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 2,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
-						  (__v4di) __Y, 2,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 4,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 4,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 1,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 1,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 5,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 5,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 2,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
-						 (__v8si) __Y, 2,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 4,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 4,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 1,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 1,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 5,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 5,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 2,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
-						 (__v4di) __Y, 2,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 4,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 4,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 1,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 1,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 5,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 5,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 2,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
-						  (__v4si) __Y, 2,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 4,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 4,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 1,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 1,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 5,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 5,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 2,
-						  (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
-						  (__v2di) __Y, 2,
-						  (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 4,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 4,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 1,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 1,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 5,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 5,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 2,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
-						 (__v4si) __Y, 2,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 4,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 4,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 1,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 1,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 5,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 5,
-						 (__mmask8) - 1);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 2,
-						 (__mmask8) __M);
-}
-
-extern __inline __mmask8
-  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
-{
-  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
-						 (__v2di) __Y, 2,
-						 (__mmask8) - 1);
-}
-
-#else
-#define _mm256_permutex_pd(X, M)						\
-  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),	\
-					    (__v4df)(__m256d)_mm256_undefined_pd(),\
-					    (__mmask8)-1))
-
-#define _mm256_maskz_permutex_epi64(M, X, I)                    \
-  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
-					    (int)(I),                \
-					    (__v4di)(__m256i)        \
-					    (_mm256_setzero_si256()),\
-					    (__mmask8)(M)))
-
-#define _mm256_mask_permutex_epi64(W, M, X, I)               \
-  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
-					    (int)(I),             \
-					    (__v4di)(__m256i)(W), \
-					    (__mmask8)(M)))
-
-#define _mm256_insertf32x4(X, Y, C)                                     \
-  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
-    (__v4sf)(__m128) (Y), (int) (C),					\
-    (__v8sf)(__m256)_mm256_setzero_ps(),				\
-    (__mmask8)-1))
-
-#define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
-  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
-    (__v4sf)(__m128) (Y), (int) (C),					\
-    (__v8sf)(__m256)(W),						\
-    (__mmask8)(U)))
-
-#define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
-  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),	\
-    (__v4sf)(__m128) (Y), (int) (C),					\
-    (__v8sf)(__m256)_mm256_setzero_ps(),				\
-    (__mmask8)(U)))
-
-#define _mm256_inserti32x4(X, Y, C)                                     \
-  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (__v4si)(__m128i) (Y), (int) (C),					\
-    (__v8si)(__m256i)_mm256_setzero_si256(),				\
-    (__mmask8)-1))
-
-#define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
-  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (__v4si)(__m128i) (Y), (int) (C),					\
-    (__v8si)(__m256i)(W),						\
-    (__mmask8)(U)))
-
-#define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
-  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (__v4si)(__m128i) (Y), (int) (C),					\
-    (__v8si)(__m256i)_mm256_setzero_si256(),				\
-    (__mmask8)(U)))
-
-#define _mm256_extractf32x4_ps(X, C)                                    \
-  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
-    (int) (C),								\
-    (__v4sf)(__m128)_mm_setzero_ps(),					\
-    (__mmask8)-1))
-
-#define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
-  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
-    (int) (C),								\
-    (__v4sf)(__m128)(W),						\
-    (__mmask8)(U)))
-
-#define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
-  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
-    (int) (C),								\
-    (__v4sf)(__m128)_mm_setzero_ps(),					\
-    (__mmask8)(U)))
-
-#define _mm256_extracti32x4_epi32(X, C)                                 \
-  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
-
-#define _mm256_mask_extracti32x4_epi32(W, U, X, C)                      \
-  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_extracti32x4_epi32(U, X, C)                        \
-  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
-    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
-
-#define _mm256_shuffle_i64x2(X, Y, C)                                                   \
-  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
-                                                  (__v4di)(__m256i)(Y), (int)(C),       \
-                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
-                                                  (__mmask8)-1))
-
-#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C)                                        \
-  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
-                                                  (__v4di)(__m256i)(Y), (int)(C),       \
-                                                  (__v4di)(__m256i)(W),\
-                                                  (__mmask8)(U)))
-
-#define _mm256_maskz_shuffle_i64x2(U, X, Y, C)                                          \
-  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
-                                                  (__v4di)(__m256i)(Y), (int)(C),       \
-                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
-                                                  (__mmask8)(U)))
-
-#define _mm256_shuffle_i32x4(X, Y, C)                                                   \
-  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
-                                                  (__v8si)(__m256i)(Y), (int)(C),       \
-                                                  (__v8si)(__m256i)_mm256_setzero_si256(), \
-                                                  (__mmask8)-1))
-
-#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
-  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
-                                                  (__v8si)(__m256i)(Y), (int)(C),       \
-                                                  (__v8si)(__m256i)(W),                 \
-                                                  (__mmask8)(U)))
-
-#define _mm256_maskz_shuffle_i32x4(U, X, Y, C)                                          \
-  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
-                                                  (__v8si)(__m256i)(Y), (int)(C),       \
-                                                  (__v8si)(__m256i)_mm256_setzero_si256(), \
-                                                  (__mmask8)(U)))
-
-#define _mm256_shuffle_f64x2(X, Y, C)                                                   \
-  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
-                                                  (__v4df)(__m256d)(Y), (int)(C),       \
-                                                  (__v4df)(__m256d)_mm256_setzero_pd(), \
-                                                  (__mmask8)-1))
-
-#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
-  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
-                                                  (__v4df)(__m256d)(Y), (int)(C),       \
-                                                  (__v4df)(__m256d)(W),                 \
-                                                  (__mmask8)(U)))
-
-#define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
-  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
-                                                  (__v4df)(__m256d)(Y), (int)(C),       \
-                                                  (__v4df)(__m256d)_mm256_setzero_pd(), \
-                                                  (__mmask8)(U)))
-
-#define _mm256_shuffle_f32x4(X, Y, C)                                                   \
-  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
-                                                 (__v8sf)(__m256)(Y), (int)(C),         \
-                                                 (__v8sf)(__m256)_mm256_setzero_ps(),   \
-                                                 (__mmask8)-1))
-
-#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
-  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
-                                                 (__v8sf)(__m256)(Y), (int)(C),         \
-                                                 (__v8sf)(__m256)(W),                   \
-                                                 (__mmask8)(U)))
-
-#define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
-  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
-                                                 (__v8sf)(__m256)(Y), (int)(C),         \
-                                                 (__v8sf)(__m256)_mm256_setzero_ps(),   \
-                                                 (__mmask8)(U)))
-
-#define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
-  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
-                                           (__v4df)(__m256d)(B), (int)(C),      \
-                                           (__v4df)(__m256d)(W),                \
-                                           (__mmask8)(U)))
-
-#define _mm256_maskz_shuffle_pd(U, A, B, C)                                     \
-  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
-                                           (__v4df)(__m256d)(B), (int)(C),      \
-                                           (__v4df)(__m256d)_mm256_setzero_pd(),\
-                                           (__mmask8)(U)))
-
-#define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
-  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
-                                           (__v2df)(__m128d)(B), (int)(C),      \
-                                           (__v2df)(__m128d)(W),                \
-                                           (__mmask8)(U)))
-
-#define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
-  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
-                                           (__v2df)(__m128d)(B), (int)(C),      \
-                                           (__v2df)(__m128d)_mm_setzero_pd(),   \
-                                           (__mmask8)(U)))
-
-#define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
-  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
-                                           (__v8sf)(__m256)(B), (int)(C),       \
-                                           (__v8sf)(__m256)(W),                 \
-                                           (__mmask8)(U)))
-
-#define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
-  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
-                                           (__v8sf)(__m256)(B), (int)(C),       \
-                                           (__v8sf)(__m256)_mm256_setzero_ps(), \
-                                           (__mmask8)(U)))
-
-#define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
-  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
-                                           (__v4sf)(__m128)(B), (int)(C),       \
-                                           (__v4sf)(__m128)(W),                 \
-                                           (__mmask8)(U)))
-
-#define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
-  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
-                                           (__v4sf)(__m128)(B), (int)(C),       \
-                                           (__v4sf)(__m128)_mm_setzero_ps(),    \
-                                           (__mmask8)(U)))
-
-#define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
-  ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),		\
-					       (__v4df)(__m256d)(Y),		\
-					       (__v4di)(__m256i)(Z), (int)(C),	\
-					       (__mmask8)(-1)))
-
-#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C)                                  \
-   ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),           \
-						(__v4df)(__m256d)(Y),           \
-						(__v4di)(__m256i)(Z), (int)(C), \
-						(__mmask8)(U)))
-
-#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C)                                 \
-   ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X),          \
-						 (__v4df)(__m256d)(Y),          \
-						 (__v4di)(__m256i)(Z), (int)(C),\
-						 (__mmask8)(U)))
-
-#define _mm256_fixupimm_ps(X, Y, Z, C)						\
-  ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),		\
-					      (__v8sf)(__m256)(Y),		\
-					      (__v8si)(__m256i)(Z), (int)(C),	\
-					      (__mmask8)(-1)))
-
-
-#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C)                                  \
-    ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),            \
-						(__v8sf)(__m256)(Y),            \
-						(__v8si)(__m256i)(Z), (int)(C), \
-						(__mmask8)(U)))
-
-#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C)                                 \
-    ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X),           \
-						 (__v8sf)(__m256)(Y),           \
-						 (__v8si)(__m256i)(Z), (int)(C),\
-						 (__mmask8)(U)))
-
-#define _mm_fixupimm_pd(X, Y, Z, C)						\
-  ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),		\
-					       (__v2df)(__m128d)(Y),		\
-					       (__v2di)(__m128i)(Z), (int)(C), 	\
-					       (__mmask8)(-1)))
-
-
-#define _mm_mask_fixupimm_pd(X, U, Y, Z, C)                                       \
-     ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),           \
-						  (__v2df)(__m128d)(Y),           \
-						  (__v2di)(__m128i)(Z), (int)(C), \
-						  (__mmask8)(U)))
-
-#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C)                                      \
-     ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X),          \
-						   (__v2df)(__m128d)(Y),          \
-						   (__v2di)(__m128i)(Z), (int)(C),\
-						   (__mmask8)(U)))
-
-#define _mm_fixupimm_ps(X, Y, Z, C)						\
-   ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),		\
-					       (__v4sf)(__m128)(Y),		\
-					       (__v4si)(__m128i)(Z), (int)(C), 	\
-					       (__mmask8)(-1)))
-
-#define _mm_mask_fixupimm_ps(X, U, Y, Z, C)                                      \
-      ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),           \
-						  (__v4sf)(__m128)(Y),           \
-						  (__v4si)(__m128i)(Z), (int)(C),\
-						  (__mmask8)(U)))
-
-#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C)                                      \
-      ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X),           \
-						   (__v4sf)(__m128)(Y),           \
-						   (__v4si)(__m128i)(Z), (int)(C),\
-						   (__mmask8)(U)))
-
-#define _mm256_mask_srli_epi32(W, U, A, B)				\
-  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
-    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_srli_epi32(U, A, B)				\
-  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
-    (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
-
-#define _mm_mask_srli_epi32(W, U, A, B)                                 \
-  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
-    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm_maskz_srli_epi32(U, A, B)                                   \
-  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
-    (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
-
-#define _mm256_mask_srli_epi64(W, U, A, B)				\
-  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
-    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_srli_epi64(U, A, B)				\
-  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
-    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
-
-#define _mm_mask_srli_epi64(W, U, A, B)                                 \
-  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm_maskz_srli_epi64(U, A, B)                                   \
-  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
-
-#define _mm256_mask_slli_epi32(W, U, X, C)                                \
-  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
-    (__v8si)(__m256i)(W),\
-    (__mmask8)(U)))
-
-#define _mm256_maskz_slli_epi32(U, X, C)                                  \
-  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
-    (__v8si)(__m256i)_mm256_setzero_si256(),\
-    (__mmask8)(U)))
-
-#define _mm256_mask_slli_epi64(W, U, X, C)                                \
-  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
-    (__v4di)(__m256i)(W),\
-    (__mmask8)(U)))
-
-#define _mm256_maskz_slli_epi64(U, X, C)                                  \
-  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),\
-    (__mmask8)(U)))
-
-#define _mm_mask_slli_epi32(W, U, X, C)					  \
-  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
-    (__v4si)(__m128i)(W),\
-    (__mmask8)(U)))
-
-#define _mm_maskz_slli_epi32(U, X, C)					  \
-  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
-    (__v4si)(__m128i)_mm_setzero_si128 (),\
-    (__mmask8)(U)))
-
-#define _mm_mask_slli_epi64(W, U, X, C)					  \
-  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
-    (__v2di)(__m128i)(W),\
-    (__mmask8)(U)))
-
-#define _mm_maskz_slli_epi64(U, X, C)					  \
-  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
-    (__v2di)(__m128i)_mm_setzero_di(),\
-    (__mmask8)(U)))
-
-#define _mm256_ternarylogic_epi64(A, B, C, I)                           \
-  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
-    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
-
-#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I)			\
-  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
-    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I)			\
-  ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A),	\
-    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm256_ternarylogic_epi32(A, B, C, I)                           \
-  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
-    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
-
-#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I)                   \
-  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
-    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I)			\
-  ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A),	\
-    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm_ternarylogic_epi64(A, B, C, I)                              \
-  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
-    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
-
-#define _mm_mask_ternarylogic_epi64(A, U, B, C, I)			\
-  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
-    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I)			\
-  ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A),	\
-    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm_ternarylogic_epi32(A, B, C, I)                              \
-  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
-    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
-
-#define _mm_mask_ternarylogic_epi32(A, U, B, C, I)			\
-  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
-    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I)			\
-  ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A),	\
-    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
-
-#define _mm256_roundscale_ps(A, B)				        \
-  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
-    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
-
-#define _mm256_mask_roundscale_ps(W, U, A, B)			        \
-  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
-    (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_roundscale_ps(U, A, B)			        \
-  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
-    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
-
-#define _mm256_roundscale_pd(A, B)				        \
-  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
-
-#define _mm256_mask_roundscale_pd(W, U, A, B)			        \
-  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
-    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_roundscale_pd(U, A, B)			        \
-  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
-
-#define _mm_roundscale_ps(A, B)					        \
-  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
-    (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
-
-#define _mm_mask_roundscale_ps(W, U, A, B)			        \
-  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
-    (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
-
-#define _mm_maskz_roundscale_ps(U, A, B)			        \
-  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
-    (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
-
-#define _mm_roundscale_pd(A, B)					        \
-  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
-    (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
-
-#define _mm_mask_roundscale_pd(W, U, A, B)			        \
-  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
-    (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
-
-#define _mm_maskz_roundscale_pd(U, A, B)			        \
-  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
-    (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
-
-#define _mm256_getmant_ps(X, B, C)                                              \
-  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
-                                         (int)(((C)<<2) | (B)),                 \
-                                         (__v8sf)(__m256)_mm256_setzero_ps(),   \
-                                         (__mmask8)-1))
-
-#define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
-  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
-                                         (int)(((C)<<2) | (B)),                 \
-                                         (__v8sf)(__m256)(W),                   \
-                                         (__mmask8)(U)))
-
-#define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
-  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
-                                         (int)(((C)<<2) | (B)),                 \
-                                         (__v8sf)(__m256)_mm256_setzero_ps(),   \
-                                         (__mmask8)(U)))
-
-#define _mm_getmant_ps(X, B, C)                                                 \
-  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
-                                         (int)(((C)<<2) | (B)),                 \
-                                         (__v4sf)(__m128)_mm_setzero_ps(),      \
-                                         (__mmask8)-1))
-
-#define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
-  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
-                                         (int)(((C)<<2) | (B)),                 \
-                                         (__v4sf)(__m128)(W),                   \
-                                         (__mmask8)(U)))
-
-#define _mm_maskz_getmant_ps(U, X, B, C)                                        \
-  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
-                                         (int)(((C)<<2) | (B)),                 \
-                                         (__v4sf)(__m128)_mm_setzero_ps(),      \
-                                         (__mmask8)(U)))
-
-#define _mm256_getmant_pd(X, B, C)                                              \
-  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
-                                         (int)(((C)<<2) | (B)),                 \
-                                          (__v4df)(__m256d)_mm256_setzero_pd(), \
-                                          (__mmask8)-1))
-
-#define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
-  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
-                                         (int)(((C)<<2) | (B)),                 \
-                                          (__v4df)(__m256d)(W),                 \
-                                          (__mmask8)(U)))
-
-#define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
-  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
-                                         (int)(((C)<<2) | (B)),                 \
-                                          (__v4df)(__m256d)_mm256_setzero_pd(), \
-                                          (__mmask8)(U)))
-
-#define _mm_getmant_pd(X, B, C)                                                 \
-  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
-                                         (int)(((C)<<2) | (B)),                 \
-                                          (__v2df)(__m128d)_mm_setzero_pd(),    \
-                                          (__mmask8)-1))
-
-#define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
-  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
-                                         (int)(((C)<<2) | (B)),                 \
-                                          (__v2df)(__m128d)(W),                 \
-                                          (__mmask8)(U)))
-
-#define _mm_maskz_getmant_pd(U, X, B, C)                                        \
-  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
-                                         (int)(((C)<<2) | (B)),                 \
-                                          (__v2df)(__m128d)_mm_setzero_pd(),    \
-                                          (__mmask8)(U)))
-
-#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD,		\
-					 (float const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD,		\
-					 (float const *)ADDR,		\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD,	\
-					  (double const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD,	\
-					  (double const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD,		\
-					 (float const *)ADDR,		\
-					 (__v4di)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD,		\
-					 (float const *)ADDR,		\
-					 (__v2di)(__m128i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD,	\
-					  (double const *)ADDR,		\
-					  (__v4di)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD,	\
-					  (double const *)ADDR,		\
-					  (__v2di)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD,	\
-					  (int const *)ADDR,		\
-					  (__v8si)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD,	\
-					  (int const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD,	\
-					  (long long const *)ADDR,	\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD,	\
-					  (long long const *)ADDR,	\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD,	\
-					  (int const *)ADDR,		\
-					  (__v4di)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD,	\
-					  (int const *)ADDR,		\
-					  (__v2di)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD,	\
-					  (long long const *)ADDR,	\
-					  (__v4di)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD,	\
-					  (long long const *)ADDR,	\
-					  (__v2di)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
-
-#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8sf)(__m256)V1, (int)SCALE)
-
-#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8sf)(__m256)V1, (int)SCALE)
-
-#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
-
-#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
-
-#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
-
-#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
-
-#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
-
-#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
-
-#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
-
-#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
-
-#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
-
-#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
-
-#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
-
-#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
-
-#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
-
-#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
-
-#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8si)(__m256i)V1, (int)SCALE)
-
-#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8si)(__m256i)V1, (int)SCALE)
-
-#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
-
-#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
-
-#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF,	\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
-
-#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK,	\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
-
-#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF,	\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
-
-#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK,	\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
-
-#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
-
-#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
-
-#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
-
-#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
-
-#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF,	\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
-
-#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK,	\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
-
-#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF,	\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
-
-#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK,	\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
-
-#define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
-  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
-                                             (__v8si)(__m256i)(W),                  \
-                                             (__mmask8)(U)))
-
-#define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
-  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
-                                             (__v8si)(__m256i)_mm256_setzero_si256(),  \
-                                             (__mmask8)(U)))
-
-#define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
-  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
-                                             (__v4si)(__m128i)(W),                  \
-                                             (__mmask8)(U)))
-
-#define _mm_maskz_shuffle_epi32(U, X, C)                                            \
-  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
-                                             (__v4si)(__m128i)_mm_setzero_si128 (),     \
-                                             (__mmask8)(U)))
-
-#define _mm256_rol_epi64(A, B)                                                 \
-  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
-                                          (__mmask8)-1))
-
-#define _mm256_mask_rol_epi64(W, U, A, B)                                      \
-  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm256_maskz_rol_epi64(U, A, B)                                        \
-  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
-                                          (__mmask8)(U)))
-
-#define _mm_rol_epi64(A, B)                                                    \
-  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
-                                          (__mmask8)-1))
-
-#define _mm_mask_rol_epi64(W, U, A, B)                                         \
-  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm_maskz_rol_epi64(U, A, B)                                           \
-  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
-                                          (__mmask8)(U)))
-
-#define _mm256_ror_epi64(A, B)                                                 \
-  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
-                                          (__mmask8)-1))
-
-#define _mm256_mask_ror_epi64(W, U, A, B)                                      \
-  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm256_maskz_ror_epi64(U, A, B)                                        \
-  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
-                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
-                                          (__mmask8)(U)))
-
-#define _mm_ror_epi64(A, B)                                                    \
-  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
-                                          (__mmask8)-1))
-
-#define _mm_mask_ror_epi64(W, U, A, B)                                         \
-  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm_maskz_ror_epi64(U, A, B)                                           \
-  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
-                                          (__mmask8)(U)))
-
-#define _mm256_rol_epi32(A, B)                                                 \
-  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
-                                          (__mmask8)-1))
-
-#define _mm256_mask_rol_epi32(W, U, A, B)                                      \
-  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm256_maskz_rol_epi32(U, A, B)                                        \
-  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
-                                          (__mmask8)(U)))
-
-#define _mm_rol_epi32(A, B)                                                    \
-  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
-                                          (__mmask8)-1))
-
-#define _mm_mask_rol_epi32(W, U, A, B)                                         \
-  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm_maskz_rol_epi32(U, A, B)                                           \
-  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
-                                          (__mmask8)(U)))
-
-#define _mm256_ror_epi32(A, B)                                                 \
-  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
-                                          (__mmask8)-1))
-
-#define _mm256_mask_ror_epi32(W, U, A, B)                                      \
-  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm256_maskz_ror_epi32(U, A, B)                                        \
-  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
-                                          (__mmask8)(U)))
-
-#define _mm_ror_epi32(A, B)                                                    \
-  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
-                                          (__mmask8)-1))
-
-#define _mm_mask_ror_epi32(W, U, A, B)                                         \
-  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)(W),                \
-                                          (__mmask8)(U)))
-
-#define _mm_maskz_ror_epi32(U, A, B)                                           \
-  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
-                                          (__mmask8)(U)))
-
-#define _mm256_alignr_epi32(X, Y, C)                                        \
-    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
-        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
-
-#define _mm256_mask_alignr_epi32(W, U, X, Y, C)                             \
-    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
-        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_alignr_epi32(U, X, Y, C)                               \
-    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
-        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
-        (__mmask8)(U)))
-
-#define _mm256_alignr_epi64(X, Y, C)                                        \
-    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
-        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
-
-#define _mm256_mask_alignr_epi64(W, U, X, Y, C)                             \
-    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
-        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_alignr_epi64(U, X, Y, C)                               \
-    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
-        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
-        (__mmask8)(U)))
-
-#define _mm_alignr_epi32(X, Y, C)                                           \
-    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
-        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
-
-#define _mm_mask_alignr_epi32(W, U, X, Y, C)                                \
-    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
-        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
-    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
-        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
-        (__mmask8)(U)))
-
-#define _mm_alignr_epi64(X, Y, C)                                           \
-    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
-        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
-
-#define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
-    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
-        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
-
-#define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
-    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
-        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
-        (__mmask8)(U)))
-
-#define _mm_mask_cvtps_ph(W, U, A, I)						\
-  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
-      (__v8hi)(__m128i) (W), (__mmask8) (U)))
-
-#define _mm_maskz_cvtps_ph(U, A, I)						\
-  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
-      (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
-
-#define _mm256_mask_cvtps_ph(W, U, A, I)					\
-  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),	\
-      (__v8hi)(__m128i) (W), (__mmask8) (U)))
-
-#define _mm256_maskz_cvtps_ph(U, A, I)						\
-  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),   \
-      (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
-
-#define _mm256_mask_srai_epi32(W, U, A, B)				\
-  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
-    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_srai_epi32(U, A, B)				\
-  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
-    (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
-
-#define _mm_mask_srai_epi32(W, U, A, B)                                 \
-  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
-    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm_maskz_srai_epi32(U, A, B)                                   \
-  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
-    (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
-
-#define _mm256_srai_epi64(A, B)						\
-  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
-    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
-
-#define _mm256_mask_srai_epi64(W, U, A, B)				\
-  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
-    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_srai_epi64(U, A, B)				\
-  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
-    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
-
-#define _mm_srai_epi64(A, B)						\
-  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
-
-#define _mm_mask_srai_epi64(W, U, A, B)                                 \
-  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
-
-#define _mm_maskz_srai_epi64(U, A, B)                                   \
-  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
-
-#define _mm256_mask_permutex_pd(W, U, A, B)                             \
-  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
-    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_permutex_pd(U, A, B)				\
-  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
-
-#define _mm256_mask_permute_pd(W, U, X, C)					    \
-  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
-					      (__v4df)(__m256d)(W),		    \
-					      (__mmask8)(U)))
-
-#define _mm256_maskz_permute_pd(U, X, C)					    \
-  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
-					      (__v4df)(__m256d)_mm256_setzero_pd(), \
-					      (__mmask8)(U)))
-
-#define _mm256_mask_permute_ps(W, U, X, C)					    \
-  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
-					      (__v8sf)(__m256)(W), (__mmask8)(U)))
-
-#define _mm256_maskz_permute_ps(U, X, C)					    \
-  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
-					      (__v8sf)(__m256)_mm256_setzero_ps(),  \
-					      (__mmask8)(U)))
-
-#define _mm_mask_permute_pd(W, U, X, C)						    \
-  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
-					    (__v2df)(__m128d)(W), (__mmask8)(U)))
-
-#define _mm_maskz_permute_pd(U, X, C)						    \
-  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
-					    (__v2df)(__m128d)_mm_setzero_pd(),	    \
-					    (__mmask8)(U)))
-
-#define _mm_mask_permute_ps(W, U, X, C)						    \
-  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
-					  (__v4sf)(__m128)(W), (__mmask8)(U)))
-
-#define _mm_maskz_permute_ps(U, X, C)						    \
-  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
-					  (__v4sf)(__m128)_mm_setzero_ps(),	    \
-					  (__mmask8)(U)))
-
-#define _mm256_mask_blend_pd(__U, __A, __W)			      \
-  ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A),	      \
-						     (__v4df) (__W),  \
-						     (__mmask8) (__U)))
-
-#define _mm256_mask_blend_ps(__U, __A, __W)			      \
-  ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A),	      \
-						    (__v8sf) (__W),   \
-						    (__mmask8) (__U)))
-
-#define _mm256_mask_blend_epi64(__U, __A, __W)			      \
-  ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A),	      \
-						    (__v4di) (__W),   \
-						    (__mmask8) (__U)))
-
-#define _mm256_mask_blend_epi32(__U, __A, __W)			      \
-  ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A),	      \
-						    (__v8si) (__W),   \
-						    (__mmask8) (__U)))
-
-#define _mm_mask_blend_pd(__U, __A, __W)			      \
-  ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A),	      \
-						     (__v2df) (__W),  \
-						     (__mmask8) (__U)))
-
-#define _mm_mask_blend_ps(__U, __A, __W)			      \
-  ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A),	      \
-						    (__v4sf) (__W),   \
-						    (__mmask8) (__U)))
-
-#define _mm_mask_blend_epi64(__U, __A, __W)			      \
-  ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A),	      \
-						    (__v2di) (__W),   \
-						    (__mmask8) (__U)))
-
-#define _mm_mask_blend_epi32(__U, __A, __W)			      \
-  ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A),	      \
-						    (__v4si) (__W),   \
-						    (__mmask8) (__U)))
-
-#define _mm256_cmp_epu32_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
-					    (__v8si)(__m256i)(Y), (int)(P),\
-					    (__mmask8)-1))
-
-#define _mm256_cmp_epi64_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
-					   (__v4di)(__m256i)(Y), (int)(P),\
-					   (__mmask8)-1))
-
-#define _mm256_cmp_epi32_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
-					   (__v8si)(__m256i)(Y), (int)(P),\
-					   (__mmask8)-1))
-
-#define _mm256_cmp_epu64_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
-					    (__v4di)(__m256i)(Y), (int)(P),\
-					    (__mmask8)-1))
-
-#define _mm256_cmp_pd_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
-					    (__v4df)(__m256d)(Y), (int)(P),\
-					    (__mmask8)-1))
-
-#define _mm256_cmp_ps_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
-					     (__v8sf)(__m256)(Y), (int)(P),\
-					     (__mmask8)-1))
-
-#define _mm256_mask_cmp_epi64_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
-					   (__v4di)(__m256i)(Y), (int)(P),\
-					   (__mmask8)(M)))
-
-#define _mm256_mask_cmp_epi32_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
-					   (__v8si)(__m256i)(Y), (int)(P),\
-					   (__mmask8)(M)))
-
-#define _mm256_mask_cmp_epu64_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
-					    (__v4di)(__m256i)(Y), (int)(P),\
-					    (__mmask8)(M)))
-
-#define _mm256_mask_cmp_epu32_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
-					    (__v8si)(__m256i)(Y), (int)(P),\
-					    (__mmask8)(M)))
-
-#define _mm256_mask_cmp_pd_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
-					    (__v4df)(__m256d)(Y), (int)(P),\
-					    (__mmask8)(M)))
-
-#define _mm256_mask_cmp_ps_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
-					     (__v8sf)(__m256)(Y), (int)(P),\
-					     (__mmask8)(M)))
-
-#define _mm_cmp_epi64_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
-					   (__v2di)(__m128i)(Y), (int)(P),\
-					   (__mmask8)-1))
-
-#define _mm_cmp_epi32_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
-					   (__v4si)(__m128i)(Y), (int)(P),\
-					   (__mmask8)-1))
-
-#define _mm_cmp_epu64_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
-					    (__v2di)(__m128i)(Y), (int)(P),\
-					    (__mmask8)-1))
-
-#define _mm_cmp_epu32_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
-					    (__v4si)(__m128i)(Y), (int)(P),\
-					    (__mmask8)-1))
-
-#define _mm_cmp_pd_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
-					    (__v2df)(__m128d)(Y), (int)(P),\
-					    (__mmask8)-1))
-
-#define _mm_cmp_ps_mask(X, Y, P)					\
-  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
-					     (__v4sf)(__m128)(Y), (int)(P),\
-					     (__mmask8)-1))
-
-#define _mm_mask_cmp_epi64_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
-					   (__v2di)(__m128i)(Y), (int)(P),\
-					   (__mmask8)(M)))
-
-#define _mm_mask_cmp_epi32_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
-					   (__v4si)(__m128i)(Y), (int)(P),\
-					   (__mmask8)(M)))
-
-#define _mm_mask_cmp_epu64_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
-					    (__v2di)(__m128i)(Y), (int)(P),\
-					    (__mmask8)(M)))
-
-#define _mm_mask_cmp_epu32_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
-					    (__v4si)(__m128i)(Y), (int)(P),\
-					    (__mmask8)(M)))
-
-#define _mm_mask_cmp_pd_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
-					    (__v2df)(__m128d)(Y), (int)(P),\
-					    (__mmask8)(M)))
-
-#define _mm_mask_cmp_ps_mask(M, X, Y, P)				\
-  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
-					     (__v4sf)(__m128)(Y), (int)(P),\
-					     (__mmask8)(M)))
-
-#endif
-
-#define _mm256_permutexvar_ps(A, B)	_mm256_permutevar8x32_ps((B), (A))
-
-#ifdef __DISABLE_AVX512VL__
-#undef __DISABLE_AVX512VL__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512VL__ */
-
-#endif /* _AVX512VLINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avxintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/avxintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,1467 +1,0 @@
-/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 11.0.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVXINTRIN_H_INCLUDED
-#define _AVXINTRIN_H_INCLUDED
-
-#ifndef __AVX__
-#pragma GCC push_options
-#pragma GCC target("avx")
-#define __DISABLE_AVX__
-#endif /* __AVX__ */
-
-/* Internal data types for implementing the intrinsics.  */
-typedef double __v4df __attribute__ ((__vector_size__ (32)));
-typedef float __v8sf __attribute__ ((__vector_size__ (32)));
-typedef long long __v4di __attribute__ ((__vector_size__ (32)));
-typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
-typedef int __v8si __attribute__ ((__vector_size__ (32)));
-typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
-typedef short __v16hi __attribute__ ((__vector_size__ (32)));
-typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
-typedef char __v32qi __attribute__ ((__vector_size__ (32)));
-typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef float __m256 __attribute__ ((__vector_size__ (32),
-				     __may_alias__));
-typedef long long __m256i __attribute__ ((__vector_size__ (32),
-					  __may_alias__));
-typedef double __m256d __attribute__ ((__vector_size__ (32),
-				       __may_alias__));
-
-/* Compare predicates for scalar and packed compare intrinsics.  */
-
-/* Equal (ordered, non-signaling)  */
-#define _CMP_EQ_OQ	0x00
-/* Less-than (ordered, signaling)  */
-#define _CMP_LT_OS	0x01
-/* Less-than-or-equal (ordered, signaling)  */
-#define _CMP_LE_OS	0x02
-/* Unordered (non-signaling)  */
-#define _CMP_UNORD_Q	0x03
-/* Not-equal (unordered, non-signaling)  */
-#define _CMP_NEQ_UQ	0x04
-/* Not-less-than (unordered, signaling)  */
-#define _CMP_NLT_US	0x05
-/* Not-less-than-or-equal (unordered, signaling)  */
-#define _CMP_NLE_US	0x06
-/* Ordered (nonsignaling)   */
-#define _CMP_ORD_Q	0x07
-/* Equal (unordered, non-signaling)  */
-#define _CMP_EQ_UQ	0x08
-/* Not-greater-than-or-equal (unordered, signaling)  */
-#define _CMP_NGE_US	0x09
-/* Not-greater-than (unordered, signaling)  */
-#define _CMP_NGT_US	0x0a
-/* False (ordered, non-signaling)  */
-#define _CMP_FALSE_OQ	0x0b
-/* Not-equal (ordered, non-signaling)  */
-#define _CMP_NEQ_OQ	0x0c
-/* Greater-than-or-equal (ordered, signaling)  */
-#define _CMP_GE_OS	0x0d
-/* Greater-than (ordered, signaling)  */
-#define _CMP_GT_OS	0x0e
-/* True (unordered, non-signaling)  */
-#define _CMP_TRUE_UQ	0x0f
-/* Equal (ordered, signaling)  */
-#define _CMP_EQ_OS	0x10
-/* Less-than (ordered, non-signaling)  */
-#define _CMP_LT_OQ	0x11
-/* Less-than-or-equal (ordered, non-signaling)  */
-#define _CMP_LE_OQ	0x12
-/* Unordered (signaling)  */
-#define _CMP_UNORD_S	0x13
-/* Not-equal (unordered, signaling)  */
-#define _CMP_NEQ_US	0x14
-/* Not-less-than (unordered, non-signaling)  */
-#define _CMP_NLT_UQ	0x15
-/* Not-less-than-or-equal (unordered, non-signaling)  */
-#define _CMP_NLE_UQ	0x16
-/* Ordered (signaling)  */
-#define _CMP_ORD_S	0x17
-/* Equal (unordered, signaling)  */
-#define _CMP_EQ_US	0x18
-/* Not-greater-than-or-equal (unordered, non-signaling)  */
-#define _CMP_NGE_UQ	0x19
-/* Not-greater-than (unordered, non-signaling)  */
-#define _CMP_NGT_UQ	0x1a
-/* False (ordered, signaling)  */
-#define _CMP_FALSE_OS	0x1b
-/* Not-equal (ordered, signaling)  */
-#define _CMP_NEQ_OS	0x1c
-/* Greater-than-or-equal (ordered, non-signaling)  */
-#define _CMP_GE_OQ	0x1d
-/* Greater-than (ordered, non-signaling)  */
-#define _CMP_GT_OQ	0x1e
-/* True (unordered, signaling)  */
-#define _CMP_TRUE_US	0x1f
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) ((__v4df)__A + (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) ((__v8sf)__A + (__v8sf)__B);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_addsub_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_addsub_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_and_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_and_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_andnot_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_andnot_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-/* Double/single precision floating point blend instructions - select
-   data from 2 sources using constant/variable mask.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
-{
-  return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
-					      (__v4df)__Y,
-					      __M);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
-{
-  return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
-					     (__v8sf)__Y,
-					     __M);
-}
-#else
-#define _mm256_blend_pd(X, Y, M)					\
-  ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X),		\
-					(__v4df)(__m256d)(Y), (int)(M)))
-
-#define _mm256_blend_ps(X, Y, M)					\
-  ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X),		\
-				       (__v8sf)(__m256)(Y), (int)(M)))
-#endif
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
-{
-  return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
-					       (__v4df)__Y,
-					       (__v4df)__M);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
-{
-  return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
-					      (__v8sf)__Y,
-					      (__v8sf)__M);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_div_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) ((__v4df)__A / (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_div_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) ((__v8sf)__A / (__v8sf)__B);
-}
-
-/* Dot product instructions with mask-defined summing and zeroing parts
-   of result.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
-{
-  return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
-					  (__v8sf)__Y,
-					  __M);
-}
-#else
-#define _mm256_dp_ps(X, Y, M)						\
-  ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X),		\
-				    (__v8sf)(__m256)(Y), (int)(M)))
-#endif
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hadd_pd (__m256d __X, __m256d __Y)
-{
-  return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hadd_ps (__m256 __X, __m256 __Y)
-{
-  return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hsub_pd (__m256d __X, __m256d __Y)
-{
-  return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_hsub_ps (__m256 __X, __m256 __Y)
-{
-  return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mul_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) ((__v4df)__A * (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mul_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) ((__v8sf)__A * (__v8sf)__B);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_or_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_or_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
-{
-  return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
-					     __mask);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
-{
-  return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
-					    __mask);
-}
-#else
-#define _mm256_shuffle_pd(A, B, N)					\
-  ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A),		\
-				      (__v4df)(__m256d)(B), (int)(N)))
-
-#define _mm256_shuffle_ps(A, B, N)					\
-  ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A),		\
-				      (__v8sf)(__m256)(B), (int)(N)))
-#endif
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) ((__v4df)__A - (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) ((__v8sf)__A - (__v8sf)__B);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_xor_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_xor_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
-{
-  return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
-					    __P);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
-{
-  return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
-					   __P);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
-}
-#else
-#define _mm_cmp_pd(X, Y, P)						\
-  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),		\
-				   (__v2df)(__m128d)(Y), (int)(P)))
-
-#define _mm_cmp_ps(X, Y, P)						\
-  ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X),			\
-				  (__v4sf)(__m128)(Y), (int)(P)))
-
-#define _mm256_cmp_pd(X, Y, P)						\
-  ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X),		\
-				      (__v4df)(__m256d)(Y), (int)(P)))
-
-#define _mm256_cmp_ps(X, Y, P)						\
-  ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X),		\
-				     (__v8sf)(__m256)(Y), (int)(P)))
-
-#define _mm_cmp_sd(X, Y, P)						\
-  ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X),		\
-				   (__v2df)(__m128d)(Y), (int)(P)))
-
-#define _mm_cmp_ss(X, Y, P)						\
-  ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X),			\
-				  (__v4sf)(__m128)(Y), (int)(P)))
-#endif
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi32_pd (__m128i __A)
-{
-  return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtepi32_ps (__m256i __A)
-{
-  return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtpd_ps (__m256d __A)
-{
-  return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtps_epi32 (__m256 __A)
-{
-  return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtps_pd (__m128 __A)
-{
-  return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttpd_epi32 (__m256d __A)
-{
-  return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtpd_epi32 (__m256d __A)
-{
-  return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvttps_epi32 (__m256 __A)
-{
-  return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extractf128_pd (__m256d __X, const int __N)
-{
-  return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extractf128_ps (__m256 __X, const int __N)
-{
-  return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extractf128_si256 (__m256i __X, const int __N)
-{
-  return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extract_epi32 (__m256i __X, int const __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
-  return _mm_extract_epi32 (__Y, __N % 4);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extract_epi16 (__m256i __X, int const __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
-  return _mm_extract_epi16 (__Y, __N % 8);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extract_epi8 (__m256i __X, int const __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
-  return _mm_extract_epi8 (__Y, __N % 16);
-}
-
-#ifdef __x86_64__
-extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_extract_epi64 (__m256i __X, const int __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
-  return _mm_extract_epi64 (__Y, __N % 2);
-}
-#endif
-#else
-#define _mm256_extractf128_pd(X, N)					\
-  ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X),	\
-						(int)(N)))
-
-#define _mm256_extractf128_ps(X, N)					\
-  ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X),	\
-					       (int)(N)))
-
-#define _mm256_extractf128_si256(X, N)					\
-  ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X),	\
-						(int)(N)))
-
-#define _mm256_extract_epi32(X, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
-      _mm_extract_epi32 (__Y, (N) % 4);					\
-    }))
-
-#define _mm256_extract_epi16(X, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
-      _mm_extract_epi16 (__Y, (N) % 8);					\
-    }))
-
-#define _mm256_extract_epi8(X, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
-      _mm_extract_epi8 (__Y, (N) % 16);					\
-    }))
-
-#ifdef __x86_64__
-#define _mm256_extract_epi64(X, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
-      _mm_extract_epi64 (__Y, (N) % 2);					\
-    }))
-#endif
-#endif
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_zeroall (void)
-{
-  __builtin_ia32_vzeroall ();
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_zeroupper (void)
-{
-  __builtin_ia32_vzeroupper ();
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutevar_pd (__m128d __A, __m128i __C)
-{
-  return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
-						(__v2di)__C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutevar_pd (__m256d __A, __m256i __C)
-{
-  return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
-						   (__v4di)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutevar_ps (__m128 __A, __m128i __C)
-{
-  return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
-					       (__v4si)__C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutevar_ps (__m256 __A, __m256i __C)
-{
-  return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
-						  (__v8si)__C);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permute_pd (__m128d __X, const int __C)
-{
-  return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute_pd (__m256d __X, const int __C)
-{
-  return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permute_ps (__m128 __X, const int __C)
-{
-  return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute_ps (__m256 __X, const int __C)
-{
-  return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
-}
-#else
-#define _mm_permute_pd(X, C)						\
-  ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
-
-#define _mm256_permute_pd(X, C)						\
-  ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X),	(int)(C)))
-
-#define _mm_permute_ps(X, C)						\
-  ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
-
-#define _mm256_permute_ps(X, C)						\
-  ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
-{
-  return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
-						    (__v4df)__Y,
-						    __C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
-{
-  return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
-						   (__v8sf)__Y,
-						   __C);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
-{
-  return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
-						    (__v8si)__Y,
-						    __C);
-}
-#else
-#define _mm256_permute2f128_pd(X, Y, C)					\
-  ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X),	\
-					      (__v4df)(__m256d)(Y),	\
-					      (int)(C)))
-
-#define _mm256_permute2f128_ps(X, Y, C)					\
-  ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X),	\
-					     (__v8sf)(__m256)(Y),	\
-					     (int)(C)))
-
-#define _mm256_permute2f128_si256(X, Y, C)				\
-  ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X),	\
-					      (__v8si)(__m256i)(Y),	\
-					      (int)(C)))
-#endif
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_broadcast_ss (float const *__X)
-{
-  return (__m128) __builtin_ia32_vbroadcastss (__X);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_sd (double const *__X)
-{
-  return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_ss (float const *__X)
-{
-  return (__m256) __builtin_ia32_vbroadcastss256 (__X);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_pd (__m128d const *__X)
-{
-  return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_broadcast_ps (__m128 const *__X)
-{
-  return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
-{
-  return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
-						     (__v2df)__Y,
-						     __O);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
-{
-  return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
-						    (__v4sf)__Y,
-						    __O);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
-{
-  return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
-						     (__v4si)__Y,
-						     __O);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
-  __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
-  return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
-  __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
-  return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
-  __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
-  return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
-}
-
-#ifdef __x86_64__
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_insert_epi64 (__m256i __X, long long __D, int const __N)
-{
-  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
-  __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
-  return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
-}
-#endif
-#else
-#define _mm256_insertf128_pd(X, Y, O)					\
-  ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X),	\
-					       (__v2df)(__m128d)(Y),	\
-					       (int)(O)))
-
-#define _mm256_insertf128_ps(X, Y, O)					\
-  ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X),	\
-					      (__v4sf)(__m128)(Y),  	\
-					      (int)(O)))
-
-#define _mm256_insertf128_si256(X, Y, O)				\
-  ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X),	\
-					       (__v4si)(__m128i)(Y),	\
-					       (int)(O)))
-
-#define _mm256_insert_epi32(X, D, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
-      __Y = _mm_insert_epi32 (__Y, (D), (N) % 4);			\
-      _mm256_insertf128_si256 ((X), __Y, (N) >> 2);			\
-    }))
-
-#define _mm256_insert_epi16(X, D, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
-      __Y = _mm_insert_epi16 (__Y, (D), (N) % 8);			\
-      _mm256_insertf128_si256 ((X), __Y, (N) >> 3);			\
-    }))
-
-#define _mm256_insert_epi8(X, D, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
-      __Y = _mm_insert_epi8 (__Y, (D), (N) % 16);			\
-      _mm256_insertf128_si256 ((X), __Y, (N) >> 4);			\
-    }))
-
-#ifdef __x86_64__
-#define _mm256_insert_epi64(X, D, N)					\
-  (__extension__							\
-   ({									\
-      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
-      __Y = _mm_insert_epi64 (__Y, (D), (N) % 2);			\
-      _mm256_insertf128_si256 ((X), __Y, (N) >> 1);			\
-    }))
-#endif
-#endif
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_load_pd (double const *__P)
-{
-  return *(__m256d *)__P;
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_store_pd (double *__P, __m256d __A)
-{
-  *(__m256d *)__P = __A;
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_load_ps (float const *__P)
-{
-  return *(__m256 *)__P;
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_store_ps (float *__P, __m256 __A)
-{
-  *(__m256 *)__P = __A;
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_loadu_pd (double const *__P)
-{
-  return (__m256d) __builtin_ia32_loadupd256 (__P);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_storeu_pd (double *__P, __m256d __A)
-{
-  __builtin_ia32_storeupd256 (__P, (__v4df)__A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_loadu_ps (float const *__P)
-{
-  return (__m256) __builtin_ia32_loadups256 (__P);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_storeu_ps (float *__P, __m256 __A)
-{
-  __builtin_ia32_storeups256 (__P, (__v8sf)__A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_load_si256 (__m256i const *__P)
-{
-  return *__P;
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_store_si256 (__m256i *__P, __m256i __A)
-{
-  *__P = __A;
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_loadu_si256 (__m256i const *__P)
-{
-  return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_storeu_si256 (__m256i *__P, __m256i __A)
-{
-  __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskload_pd (double const *__P, __m128i __M)
-{
-  return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
-					      (__v2di)__M);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
-{
-  __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskload_pd (double const *__P, __m256i __M)
-{
-  return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
-						 (__v4di)__M);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
-{
-  __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskload_ps (float const *__P, __m128i __M)
-{
-  return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
-					     (__v4si)__M);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
-{
-  __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskload_ps (float const *__P, __m256i __M)
-{
-  return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
-						(__v8si)__M);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
-{
-  __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movehdup_ps (__m256 __X)
-{
-  return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_moveldup_ps (__m256 __X)
-{
-  return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movedup_pd (__m256d __X)
-{
-  return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_lddqu_si256 (__m256i const *__P)
-{
-  return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_stream_si256 (__m256i *__A, __m256i __B)
-{
-  __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_stream_pd (double *__A, __m256d __B)
-{
-  __builtin_ia32_movntpd256 (__A, (__v4df)__B);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_stream_ps (float *__P, __m256 __A)
-{
-  __builtin_ia32_movntps256 (__P, (__v8sf)__A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rcp_ps (__m256 __A)
-{
-  return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_rsqrt_ps (__m256 __A)
-{
-  return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sqrt_pd (__m256d __A)
-{
-  return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sqrt_ps (__m256 __A)
-{
-  return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_round_pd (__m256d __V, const int __M)
-{
-  return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_round_ps (__m256 __V, const int __M)
-{
-  return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
-}
-#else
-#define _mm256_round_pd(V, M) \
-  ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
-
-#define _mm256_round_ps(V, M) \
-  ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
-#endif
-
-#define _mm256_ceil_pd(V)	_mm256_round_pd ((V), _MM_FROUND_CEIL)
-#define _mm256_floor_pd(V)	_mm256_round_pd ((V), _MM_FROUND_FLOOR)
-#define _mm256_ceil_ps(V)	_mm256_round_ps ((V), _MM_FROUND_CEIL)
-#define _mm256_floor_ps(V)	_mm256_round_ps ((V), _MM_FROUND_FLOOR)
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpackhi_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpacklo_pd (__m256d __A, __m256d __B)
-{
-  return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpackhi_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_unpacklo_ps (__m256 __A, __m256 __B)
-{
-  return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testz_pd (__m128d __M, __m128d __V)
-{
-  return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testc_pd (__m128d __M, __m128d __V)
-{
-  return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testnzc_pd (__m128d __M, __m128d __V)
-{
-  return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testz_ps (__m128 __M, __m128 __V)
-{
-  return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testc_ps (__m128 __M, __m128 __V)
-{
-  return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testnzc_ps (__m128 __M, __m128 __V)
-{
-  return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testz_pd (__m256d __M, __m256d __V)
-{
-  return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testc_pd (__m256d __M, __m256d __V)
-{
-  return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testnzc_pd (__m256d __M, __m256d __V)
-{
-  return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testz_ps (__m256 __M, __m256 __V)
-{
-  return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testc_ps (__m256 __M, __m256 __V)
-{
-  return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testnzc_ps (__m256 __M, __m256 __V)
-{
-  return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testz_si256 (__m256i __M, __m256i __V)
-{
-  return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testc_si256 (__m256i __M, __m256i __V)
-{
-  return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_testnzc_si256 (__m256i __M, __m256i __V)
-{
-  return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movemask_pd (__m256d __A)
-{
-  return __builtin_ia32_movmskpd256 ((__v4df)__A);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_movemask_ps (__m256 __A)
-{
-  return __builtin_ia32_movmskps256 ((__v8sf)__A);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_undefined_pd (void)
-{
-  __m256d __Y = __Y;
-  return __Y;
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_undefined_ps (void)
-{
-  __m256 __Y = __Y;
-  return __Y;
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_undefined_si256 (void)
-{
-  __m256i __Y = __Y;
-  return __Y;
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setzero_pd (void)
-{
-  return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setzero_ps (void)
-{
-  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
-				 0.0, 0.0, 0.0, 0.0 };
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setzero_si256 (void)
-{
-  return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
-}
-
-/* Create the vector [A B C D].  */
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set_pd (double __A, double __B, double __C, double __D)
-{
-  return __extension__ (__m256d){ __D, __C, __B, __A };
-}
-
-/* Create the vector [A B C D E F G H].  */
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set_ps (float __A, float __B, float __C, float __D,
-	       float __E, float __F, float __G, float __H)
-{
-  return __extension__ (__m256){ __H, __G, __F, __E,
-				 __D, __C, __B, __A };
-}
-
-/* Create the vector [A B C D E F G H].  */
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set_epi32 (int __A, int __B, int __C, int __D,
-		  int __E, int __F, int __G, int __H)
-{
-  return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
-					  __D, __C, __B, __A };
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
-		  short __q11, short __q10, short __q09, short __q08,
-		  short __q07, short __q06, short __q05, short __q04,
-		  short __q03, short __q02, short __q01, short __q00)
-{
-  return __extension__ (__m256i)(__v16hi){
-    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
-    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
-  };
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set_epi8  (char __q31, char __q30, char __q29, char __q28,
-		  char __q27, char __q26, char __q25, char __q24,
-		  char __q23, char __q22, char __q21, char __q20,
-		  char __q19, char __q18, char __q17, char __q16,
-		  char __q15, char __q14, char __q13, char __q12,
-		  char __q11, char __q10, char __q09, char __q08,
-		  char __q07, char __q06, char __q05, char __q04,
-		  char __q03, char __q02, char __q01, char __q00)
-{
-  return __extension__ (__m256i)(__v32qi){
-    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
-    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
-    __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
-    __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
-  };
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set_epi64x (long long __A, long long __B, long long __C,
-		   long long __D)
-{
-  return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
-}
-
-/* Create a vector with all elements equal to A.  */
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set1_pd (double __A)
-{
-  return __extension__ (__m256d){ __A, __A, __A, __A };
-}
-
-/* Create a vector with all elements equal to A.  */
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set1_ps (float __A)
-{
-  return __extension__ (__m256){ __A, __A, __A, __A,
-				 __A, __A, __A, __A };
-}
-
-/* Create a vector with all elements equal to A.  */
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set1_epi32 (int __A)
-{
-  return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
-					  __A, __A, __A, __A };
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set1_epi16 (short __A)
-{
-  return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
-			   __A, __A, __A, __A, __A, __A, __A, __A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set1_epi8 (char __A)
-{
-  return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
-			  __A, __A, __A, __A, __A, __A, __A, __A,
-			  __A, __A, __A, __A, __A, __A, __A, __A,
-			  __A, __A, __A, __A, __A, __A, __A, __A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_set1_epi64x (long long __A)
-{
-  return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
-}
-
-/* Create vectors of elements in the reversed order from the
-   _mm256_set_XXX functions.  */
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setr_pd (double __A, double __B, double __C, double __D)
-{
-  return _mm256_set_pd (__D, __C, __B, __A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setr_ps (float __A, float __B, float __C, float __D,
-		float __E, float __F, float __G, float __H)
-{
-  return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
-		   int __E, int __F, int __G, int __H)
-{
-  return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
-		   short __q11, short __q10, short __q09, short __q08,
-		   short __q07, short __q06, short __q05, short __q04,
-		   short __q03, short __q02, short __q01, short __q00)
-{
-  return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
-			   __q04, __q05, __q06, __q07,
-			   __q08, __q09, __q10, __q11,
-			   __q12, __q13, __q14, __q15);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setr_epi8  (char __q31, char __q30, char __q29, char __q28,
-		   char __q27, char __q26, char __q25, char __q24,
-		   char __q23, char __q22, char __q21, char __q20,
-		   char __q19, char __q18, char __q17, char __q16,
-		   char __q15, char __q14, char __q13, char __q12,
-		   char __q11, char __q10, char __q09, char __q08,
-		   char __q07, char __q06, char __q05, char __q04,
-		   char __q03, char __q02, char __q01, char __q00)
-{
-  return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
-			  __q04, __q05, __q06, __q07,
-			  __q08, __q09, __q10, __q11,
-			  __q12, __q13, __q14, __q15,
-			  __q16, __q17, __q18, __q19,
-			  __q20, __q21, __q22, __q23,
-			  __q24, __q25, __q26, __q27,
-			  __q28, __q29, __q30, __q31);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_setr_epi64x (long long __A, long long __B, long long __C,
-		    long long __D)
-{
-  return _mm256_set_epi64x (__D, __C, __B, __A);
-}
-
-/* Casts between various SP, DP, INT vector types.  Note that these do no
-   conversion of values, they just change the type.  */
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castpd_ps (__m256d __A)
-{
-  return (__m256) __A;
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castpd_si256 (__m256d __A)
-{
-  return (__m256i) __A;
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castps_pd (__m256 __A)
-{
-  return (__m256d) __A;
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castps_si256(__m256 __A)
-{
-  return (__m256i) __A;
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castsi256_ps (__m256i __A)
-{
-  return (__m256) __A;
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castsi256_pd (__m256i __A)
-{
-  return (__m256d) __A;
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castpd256_pd128 (__m256d __A)
-{
-  return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castps256_ps128 (__m256 __A)
-{
-  return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castsi256_si128 (__m256i __A)
-{
-  return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
-}
-
-/* When cast is done from a 128 to 256-bit type, the low 128 bits of
-   the 256-bit result contain source parameter value and the upper 128
-   bits of the result are undefined.  Those intrinsics shouldn't
-   generate any extra moves.  */
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castpd128_pd256 (__m128d __A)
-{
-  return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castps128_ps256 (__m128 __A)
-{
-  return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
-}
-
-extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_castsi128_si256 (__m128i __A)
-{
-  return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
-}
-
-#ifdef __DISABLE_AVX__
-#undef __DISABLE_AVX__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX__ */
-
-#endif /* _AVXINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/bmi2intrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/bmi2intrin.h	(revision 1046)
+++ 	(revision )
@@ -1,109 +1,0 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _BMI2INTRIN_H_INCLUDED
-#define _BMI2INTRIN_H_INCLUDED
-
-#ifndef __BMI2__
-#pragma GCC push_options
-#pragma GCC target("bmi2")
-#define __DISABLE_BMI2__
-#endif /* __BMI2__ */
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bzhi_u32 (unsigned int __X, unsigned int __Y)
-{
-  return __builtin_ia32_bzhi_si (__X, __Y);
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pdep_u32 (unsigned int __X, unsigned int __Y)
-{
-  return __builtin_ia32_pdep_si (__X, __Y);
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pext_u32 (unsigned int __X, unsigned int __Y)
-{
-  return __builtin_ia32_pext_si (__X, __Y);
-}
-
-#ifdef  __x86_64__
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
-{
-  return __builtin_ia32_bzhi_di (__X, __Y);
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pdep_u64 (unsigned long long __X, unsigned long long __Y)
-{
-  return __builtin_ia32_pdep_di (__X, __Y);
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_pext_u64 (unsigned long long __X, unsigned long long __Y)
-{
-  return __builtin_ia32_pext_di (__X, __Y);
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mulx_u64 (unsigned long long __X, unsigned long long __Y,
-	   unsigned long long *__P)
-{
-  unsigned __int128 __res = (unsigned __int128) __X * __Y;
-  *__P = (unsigned long long) (__res >> 64);
-  return (unsigned long long) __res;
-}
-
-#else /* !__x86_64__ */
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
-{
-  unsigned long long __res = (unsigned long long) __X * __Y;
-  *__P = (unsigned int) (__res >> 32);
-  return (unsigned int) __res;
-}
-
-#endif /* !__x86_64__  */
-
-#ifdef __DISABLE_BMI2__
-#undef __DISABLE_BMI2__
-#pragma GCC pop_options
-#endif /* __DISABLE_BMI2__ */
-
-#endif /* _BMI2INTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/bmiintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/bmiintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,184 +1,0 @@
-/* Copyright (C) 2010-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _BMIINTRIN_H_INCLUDED
-#define _BMIINTRIN_H_INCLUDED
-
-#ifndef __BMI__
-#pragma GCC push_options
-#pragma GCC target("bmi")
-#define __DISABLE_BMI__
-#endif /* __BMI__ */
-
-extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzcnt_u16 (unsigned short __X)
-{
-  return __builtin_ctzs (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__andn_u32 (unsigned int __X, unsigned int __Y)
-{
-  return ~__X & __Y;
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bextr_u32 (unsigned int __X, unsigned int __Y)
-{
-  return __builtin_ia32_bextr_u32 (__X, __Y);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
-{
-  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsi_u32 (unsigned int __X)
-{
-  return __X & -__X;
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsi_u32 (unsigned int __X)
-{
-  return __blsi_u32 (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsmsk_u32 (unsigned int __X)
-{
-  return __X ^ (__X - 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsmsk_u32 (unsigned int __X)
-{
-  return __blsmsk_u32 (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsr_u32 (unsigned int __X)
-{
-  return __X & (__X - 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsr_u32 (unsigned int __X)
-{
-  return __blsr_u32 (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzcnt_u32 (unsigned int __X)
-{
-  return __builtin_ctz (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_tzcnt_u32 (unsigned int __X)
-{
-  return __builtin_ctz (__X);
-}
-
-
-#ifdef  __x86_64__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__andn_u64 (unsigned long long __X, unsigned long long __Y)
-{
-  return ~__X & __Y;
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bextr_u64 (unsigned long long __X, unsigned long long __Y)
-{
-  return __builtin_ia32_bextr_u64 (__X, __Y);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
-{
-  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsi_u64 (unsigned long long __X)
-{
-  return __X & -__X;
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsi_u64 (unsigned long long __X)
-{
-  return __blsi_u64 (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsmsk_u64 (unsigned long long __X)
-{
-  return __X ^ (__X - 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsmsk_u64 (unsigned long long __X)
-{
-  return __blsmsk_u64 (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsr_u64 (unsigned long long __X)
-{
-  return __X & (__X - 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_blsr_u64 (unsigned long long __X)
-{
-  return __blsr_u64 (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzcnt_u64 (unsigned long long __X)
-{
-  return __builtin_ctzll (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_tzcnt_u64 (unsigned long long __X)
-{
-  return __builtin_ctzll (__X);
-}
-
-#endif /* __x86_64__  */
-
-#ifdef __DISABLE_BMI__
-#undef __DISABLE_BMI__
-#pragma GCC pop_options
-#endif /* __DISABLE_BMI__ */
-
-#endif /* _BMIINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/bmmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/bmmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,29 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _BMMINTRIN_H_INCLUDED
-#define _BMMINTRIN_H_INCLUDED
-
-# error "SSE5 instruction set removed from compiler"
-
-#endif /* _BMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/clflushoptintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/clflushoptintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,49 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <clflushoptintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
-#define _CLFLUSHOPTINTRIN_H_INCLUDED
-
-#ifndef __CLFLUSHOPT__
-#pragma GCC push_options
-#pragma GCC target("clflushopt")
-#define __DISABLE_CLFLUSHOPT__
-#endif /* __CLFLUSHOPT__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_clflushopt (void *__A)
-{
-  __builtin_ia32_clflushopt (__A);
-}
-
-#ifdef __DISABLE_CLFLUSHOPT__
-#undef __DISABLE_CLFLUSHOPT__
-#pragma GCC pop_options
-#endif /* __DISABLE_CLFLUSHOPT__ */
-
-#endif /* _CLFLUSHOPTINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/clwbintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/clwbintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,49 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <clwbintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _CLWBINTRIN_H_INCLUDED
-#define _CLWBINTRIN_H_INCLUDED
-
-#ifndef __CLWB__
-#pragma GCC push_options
-#pragma GCC target("clwb")
-#define __DISABLE_CLWB__
-#endif /* __CLWB__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_clwb (void *__A)
-{
-  __builtin_ia32_clwb (__A);
-}
-
-#ifdef __DISABLE_CLWB__
-#undef __DISABLE_CLWB__
-#pragma GCC pop_options
-#endif /* __DISABLE_CLWB__ */
-
-#endif /* _CLWBINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/cpuid.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/cpuid.h	(revision 1046)
+++ 	(revision )
@@ -1,244 +1,0 @@
-/*
- * Copyright (C) 2007-2015 Free Software Foundation, Inc.
- *
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option) any
- * later version.
- * 
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- * 
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- * <http://www.gnu.org/licenses/>.
- */
-
-/* %ecx */
-#define bit_SSE3	(1 << 0)
-#define bit_PCLMUL	(1 << 1)
-#define bit_LZCNT	(1 << 5)
-#define bit_SSSE3	(1 << 9)
-#define bit_FMA		(1 << 12)
-#define bit_CMPXCHG16B	(1 << 13)
-#define bit_SSE4_1	(1 << 19)
-#define bit_SSE4_2	(1 << 20)
-#define bit_MOVBE	(1 << 22)
-#define bit_POPCNT	(1 << 23)
-#define bit_AES		(1 << 25)
-#define bit_XSAVE	(1 << 26)
-#define bit_OSXSAVE	(1 << 27)
-#define bit_AVX		(1 << 28)
-#define bit_F16C	(1 << 29)
-#define bit_RDRND	(1 << 30)
-
-/* %edx */
-#define bit_CMPXCHG8B	(1 << 8)
-#define bit_CMOV	(1 << 15)
-#define bit_MMX		(1 << 23)
-#define bit_FXSAVE	(1 << 24)
-#define bit_SSE		(1 << 25)
-#define bit_SSE2	(1 << 26)
-
-/* Extended Features */
-/* %ecx */
-#define bit_LAHF_LM	(1 << 0)
-#define bit_ABM		(1 << 5)
-#define bit_SSE4a	(1 << 6)
-#define bit_PRFCHW	(1 << 8)
-#define bit_XOP         (1 << 11)
-#define bit_LWP 	(1 << 15)
-#define bit_FMA4        (1 << 16)
-#define bit_TBM         (1 << 21)
-#define bit_MWAITX      (1 << 29)
-
-/* %edx */
-#define bit_MMXEXT	(1 << 22)
-#define bit_LM		(1 << 29)
-#define bit_3DNOWP	(1 << 30)
-#define bit_3DNOW	(1 << 31)
-
-/* Extended Features (%eax == 7) */
-/* %ebx */
-#define bit_FSGSBASE	(1 << 0)
-#define bit_BMI	(1 << 3)
-#define bit_HLE	(1 << 4)
-#define bit_AVX2	(1 << 5)
-#define bit_BMI2	(1 << 8)
-#define bit_RTM	(1 << 11)
-#define bit_MPX	(1 << 14)
-#define bit_AVX512F	(1 << 16)
-#define bit_AVX512DQ	(1 << 17)
-#define bit_RDSEED	(1 << 18)
-#define bit_ADX	(1 << 19)
-#define bit_AVX512IFMA	(1 << 21)
-#define bit_PCOMMIT	(1 << 22)
-#define bit_CLFLUSHOPT	(1 << 23)
-#define bit_CLWB	(1 << 24)
-#define bit_AVX512PF	(1 << 26)
-#define bit_AVX512ER	(1 << 27)
-#define bit_AVX512CD	(1 << 28)
-#define bit_SHA		(1 << 29)
-#define bit_AVX512BW	(1 << 30)
-#define bit_AVX512VL	(1 << 31)
-
-/* %ecx */
-#define bit_PREFETCHWT1	  (1 << 0)
-#define bit_AVX512VBMI	(1 << 1)
-
-/* XFEATURE_ENABLED_MASK register bits (%eax == 13, %ecx == 0) */
-#define bit_BNDREGS     (1 << 3)
-#define bit_BNDCSR      (1 << 4)
-
-/* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
-#define bit_XSAVEOPT	(1 << 0)
-#define bit_XSAVEC	(1 << 1)
-#define bit_XSAVES	(1 << 3)
-
-/* Signatures for different CPU implementations as returned in uses
-   of cpuid with level 0.  */
-#define signature_AMD_ebx	0x68747541
-#define signature_AMD_ecx	0x444d4163
-#define signature_AMD_edx	0x69746e65
-
-#define signature_CENTAUR_ebx	0x746e6543
-#define signature_CENTAUR_ecx	0x736c7561
-#define signature_CENTAUR_edx	0x48727561
-
-#define signature_CYRIX_ebx	0x69727943
-#define signature_CYRIX_ecx	0x64616574
-#define signature_CYRIX_edx	0x736e4978
-
-#define signature_INTEL_ebx	0x756e6547
-#define signature_INTEL_ecx	0x6c65746e
-#define signature_INTEL_edx	0x49656e69
-
-#define signature_TM1_ebx	0x6e617254
-#define signature_TM1_ecx	0x55504361
-#define signature_TM1_edx	0x74656d73
-
-#define signature_TM2_ebx	0x756e6547
-#define signature_TM2_ecx	0x3638784d
-#define signature_TM2_edx	0x54656e69
-
-#define signature_NSC_ebx	0x646f6547
-#define signature_NSC_ecx	0x43534e20
-#define signature_NSC_edx	0x79622065
-
-#define signature_NEXGEN_ebx	0x4778654e
-#define signature_NEXGEN_ecx	0x6e657669
-#define signature_NEXGEN_edx	0x72446e65
-
-#define signature_RISE_ebx	0x65736952
-#define signature_RISE_ecx	0x65736952
-#define signature_RISE_edx	0x65736952
-
-#define signature_SIS_ebx	0x20536953
-#define signature_SIS_ecx	0x20536953
-#define signature_SIS_edx	0x20536953
-
-#define signature_UMC_ebx	0x20434d55
-#define signature_UMC_ecx	0x20434d55
-#define signature_UMC_edx	0x20434d55
-
-#define signature_VIA_ebx	0x20414956
-#define signature_VIA_ecx	0x20414956
-#define signature_VIA_edx	0x20414956
-
-#define signature_VORTEX_ebx	0x74726f56
-#define signature_VORTEX_ecx	0x436f5320
-#define signature_VORTEX_edx	0x36387865
-
-#define __cpuid(level, a, b, c, d)			\
-  __asm__ ("cpuid\n\t"					\
-	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
-	   : "0" (level))
-
-#define __cpuid_count(level, count, a, b, c, d)		\
-  __asm__ ("cpuid\n\t"					\
-	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
-	   : "0" (level), "2" (count))
-
-
-/* Return highest supported input value for cpuid instruction.  ext can
-   be either 0x0 or 0x8000000 to return highest supported value for
-   basic or extended cpuid information.  Function returns 0 if cpuid
-   is not supported or whatever cpuid returns in eax register.  If sig
-   pointer is non-null, then first four bytes of the signature
-   (as found in ebx register) are returned in location pointed by sig.  */
-
-static __inline unsigned int
-__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
-{
-  unsigned int __eax, __ebx, __ecx, __edx;
-
-#ifndef __x86_64__
-  /* See if we can use cpuid.  On AMD64 we always can.  */
-#if __GNUC__ >= 3
-  __asm__ ("pushf{l|d}\n\t"
-	   "pushf{l|d}\n\t"
-	   "pop{l}\t%0\n\t"
-	   "mov{l}\t{%0, %1|%1, %0}\n\t"
-	   "xor{l}\t{%2, %0|%0, %2}\n\t"
-	   "push{l}\t%0\n\t"
-	   "popf{l|d}\n\t"
-	   "pushf{l|d}\n\t"
-	   "pop{l}\t%0\n\t"
-	   "popf{l|d}\n\t"
-	   : "=&r" (__eax), "=&r" (__ebx)
-	   : "i" (0x00200000));
-#else
-/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
-   nor alternatives in i386 code.  */
-  __asm__ ("pushfl\n\t"
-	   "pushfl\n\t"
-	   "popl\t%0\n\t"
-	   "movl\t%0, %1\n\t"
-	   "xorl\t%2, %0\n\t"
-	   "pushl\t%0\n\t"
-	   "popfl\n\t"
-	   "pushfl\n\t"
-	   "popl\t%0\n\t"
-	   "popfl\n\t"
-	   : "=&r" (__eax), "=&r" (__ebx)
-	   : "i" (0x00200000));
-#endif
-
-  if (!((__eax ^ __ebx) & 0x00200000))
-    return 0;
-#endif
-
-  /* Host supports cpuid.  Return highest supported cpuid input value.  */
-  __cpuid (__ext, __eax, __ebx, __ecx, __edx);
-
-  if (__sig)
-    *__sig = __ebx;
-
-  return __eax;
-}
-
-/* Return cpuid data for requested cpuid level, as found in returned
-   eax, ebx, ecx and edx registers.  The function checks if cpuid is
-   supported and returns 1 for valid cpuid information or 0 for
-   unsupported cpuid level.  All pointers are required to be non-null.  */
-
-static __inline int
-__get_cpuid (unsigned int __level,
-	     unsigned int *__eax, unsigned int *__ebx,
-	     unsigned int *__ecx, unsigned int *__edx)
-{
-  unsigned int __ext = __level & 0x80000000;
-
-  if (__get_cpuid_max (__ext, 0) < __level)
-    return 0;
-
-  __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
-  return 1;
-}
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/cross-stdarg.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/cross-stdarg.h	(revision 1046)
+++ 	(revision )
@@ -1,72 +1,0 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef __CROSS_STDARG_H_INCLUDED
-#define __CROSS_STDARG_H_INCLUDED
-
-/* Make sure that for non x64 targets cross builtins are defined.  */
-#ifndef __x86_64__
-/* Call abi ms_abi.  */
-#define __builtin_ms_va_list __builtin_va_list
-#define __builtin_ms_va_copy __builtin_va_copy
-#define __builtin_ms_va_start __builtin_va_start
-#define __builtin_ms_va_end __builtin_va_end
-
-/* Call abi sysv_abi.  */
-#define __builtin_sysv_va_list __builtin_va_list
-#define __builtin_sysv_va_copy __builtin_va_copy
-#define __builtin_sysv_va_start __builtin_va_start
-#define __builtin_sysv_va_end __builtin_va_end
-#endif
-
-#define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s)
-#define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l)
-#define __ms_va_arg(__v,__l)	__builtin_va_arg(__v,__l)
-#define __ms_va_end(__v) __builtin_ms_va_end(__v)
-
-#define __sysv_va_copy(__d,__s) __builtin_sysv_va_copy(__d,__s)
-#define __sysv_va_start(__v,__l) __builtin_sysv_va_start(__v,__l)
-#define __sysv_va_arg(__v,__l)	__builtin_va_arg(__v,__l)
-#define __sysv_va_end(__v) __builtin_sysv_va_end(__v)
-
-#ifndef __GNUC_SYSV_VA_LIST
-#define __GNUC_SYSV_VA_LIST
-  typedef __builtin_sysv_va_list __gnuc_sysv_va_list;
-#endif
-
-#ifndef _SYSV_VA_LIST_DEFINED
-#define _SYSV_VA_LIST_DEFINED
-  typedef __gnuc_sysv_va_list sysv_va_list;
-#endif
-
-#ifndef __GNUC_MS_VA_LIST
-#define __GNUC_MS_VA_LIST
-  typedef __builtin_ms_va_list __gnuc_ms_va_list;
-#endif
-
-#ifndef _MS_VA_LIST_DEFINED
-#define _MS_VA_LIST_DEFINED
-  typedef __gnuc_ms_va_list ms_va_list;
-#endif
-
-#endif /* __CROSS_STDARG_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/emmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/emmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,1561 +1,0 @@
-/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 9.0.  */
-
-#ifndef _EMMINTRIN_H_INCLUDED
-#define _EMMINTRIN_H_INCLUDED
-
-/* We need definitions from the SSE header files*/
-#include <xmmintrin.h>
-
-#ifndef __SSE2__
-#pragma GCC push_options
-#pragma GCC target("sse2")
-#define __DISABLE_SSE2__
-#endif /* __SSE2__ */
-
-/* SSE2 */
-typedef double __v2df __attribute__ ((__vector_size__ (16)));
-typedef long long __v2di __attribute__ ((__vector_size__ (16)));
-typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
-typedef int __v4si __attribute__ ((__vector_size__ (16)));
-typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
-typedef short __v8hi __attribute__ ((__vector_size__ (16)));
-typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
-typedef char __v16qi __attribute__ ((__vector_size__ (16)));
-typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
-typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
-
-/* Create a selector for use with the SHUFPD instruction.  */
-#define _MM_SHUFFLE2(fp1,fp0) \
- (((fp1) << 1) | (fp0))
-
-/* Create a vector with element 0 as F and the rest zero.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_sd (double __F)
-{
-  return __extension__ (__m128d){ __F, 0.0 };
-}
-
-/* Create a vector with both elements equal to F.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_pd (double __F)
-{
-  return __extension__ (__m128d){ __F, __F };
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pd1 (double __F)
-{
-  return _mm_set1_pd (__F);
-}
-
-/* Create a vector with the lower value X and upper value W.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pd (double __W, double __X)
-{
-  return __extension__ (__m128d){ __X, __W };
-}
-
-/* Create a vector with the lower value W and upper value X.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_pd (double __W, double __X)
-{
-  return __extension__ (__m128d){ __W, __X };
-}
-
-/* Create an undefined vector.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_undefined_pd (void)
-{
-  __m128d __Y = __Y;
-  return __Y;
-}
-
-/* Create a vector of zeros.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_pd (void)
-{
-  return __extension__ (__m128d){ 0.0, 0.0 };
-}
-
-/* Sets the low DPFP value of A from the low value of B.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_move_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
-}
-
-/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_pd (double const *__P)
-{
-  return *(__m128d *)__P;
-}
-
-/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadu_pd (double const *__P)
-{
-  return __builtin_ia32_loadupd (__P);
-}
-
-/* Create a vector with all two elements equal to *P.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load1_pd (double const *__P)
-{
-  return _mm_set1_pd (*__P);
-}
-
-/* Create a vector with element 0 as *P and the rest zero.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_sd (double const *__P)
-{
-  return _mm_set_sd (*__P);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_pd1 (double const *__P)
-{
-  return _mm_load1_pd (__P);
-}
-
-/* Load two DPFP values in reverse order.  The address must be aligned.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadr_pd (double const *__P)
-{
-  __m128d __tmp = _mm_load_pd (__P);
-  return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
-}
-
-/* Store two DPFP values.  The address must be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_pd (double *__P, __m128d __A)
-{
-  *(__m128d *)__P = __A;
-}
-
-/* Store two DPFP values.  The address need not be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storeu_pd (double *__P, __m128d __A)
-{
-  __builtin_ia32_storeupd (__P, __A);
-}
-
-/* Stores the lower DPFP value.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_sd (double *__P, __m128d __A)
-{
-  *__P = ((__v2df)__A)[0];
-}
-
-extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_f64 (__m128d __A)
-{
-  return ((__v2df)__A)[0];
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storel_pd (double *__P, __m128d __A)
-{
-  _mm_store_sd (__P, __A);
-}
-
-/* Stores the upper DPFP value.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storeh_pd (double *__P, __m128d __A)
-{
-  *__P = ((__v2df)__A)[1];
-}
-
-/* Store the lower DPFP value across two words.
-   The address must be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store1_pd (double *__P, __m128d __A)
-{
-  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_pd1 (double *__P, __m128d __A)
-{
-  _mm_store1_pd (__P, __A);
-}
-
-/* Store two DPFP values in reverse order.  The address must be aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storer_pd (double *__P, __m128d __A)
-{
-  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi128_si32 (__m128i __A)
-{
-  return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
-}
-
-#ifdef __x86_64__
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi128_si64 (__m128i __A)
-{
-  return ((__v2di)__A)[0];
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi128_si64x (__m128i __A)
-{
-  return ((__v2di)__A)[0];
-}
-#endif
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d) ((__v2df)__A + (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d) ((__v2df)__A - (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d) ((__v2df)__A * (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d) ((__v2df)__A / (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_pd (__m128d __A)
-{
-  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
-}
-
-/* Return pair {sqrt (B[0]), A[1]}.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_sd (__m128d __A, __m128d __B)
-{
-  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
-  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_and_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_andnot_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_or_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_xor_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnlt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnle_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpngt_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnge_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpord_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpunord_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmpltsd ((__v2df) __B,
-								 (__v2df)
-								 __A));
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmplesd ((__v2df) __B,
-								 (__v2df)
-								 __A));
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnlt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnle_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpngt_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmpnltsd ((__v2df) __B,
-								  (__v2df)
-								  __A));
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnge_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
-					 (__v2df)
-					 __builtin_ia32_cmpnlesd ((__v2df) __B,
-								  (__v2df)
-								  __A));
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpord_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpunord_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comieq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comilt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comile_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comigt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comige_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comineq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomieq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomilt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomile_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomigt_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomige_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomineq_sd (__m128d __A, __m128d __B)
-{
-  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
-}
-
-/* Create a vector of Qi, where i is the element number.  */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_epi64x (long long __q1, long long __q0)
-{
-  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_epi64 (__m64 __q1,  __m64 __q0)
-{
-  return _mm_set_epi64x ((long long)__q1, (long long)__q0);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
-{
-  return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
-	       short __q3, short __q2, short __q1, short __q0)
-{
-  return __extension__ (__m128i)(__v8hi){
-    __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
-	      char __q11, char __q10, char __q09, char __q08,
-	      char __q07, char __q06, char __q05, char __q04,
-	      char __q03, char __q02, char __q01, char __q00)
-{
-  return __extension__ (__m128i)(__v16qi){
-    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
-    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
-  };
-}
-
-/* Set all of the elements of the vector to A.  */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_epi64x (long long __A)
-{
-  return _mm_set_epi64x (__A, __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_epi64 (__m64 __A)
-{
-  return _mm_set_epi64 (__A, __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_epi32 (int __A)
-{
-  return _mm_set_epi32 (__A, __A, __A, __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_epi16 (short __A)
-{
-  return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_epi8 (char __A)
-{
-  return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
-		       __A, __A, __A, __A, __A, __A, __A, __A);
-}
-
-/* Create a vector of Qi, where i is the element number.
-   The parameter order is reversed from the _mm_set_epi* functions.  */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_epi64 (__m64 __q0, __m64 __q1)
-{
-  return _mm_set_epi64 (__q1, __q0);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
-{
-  return _mm_set_epi32 (__q3, __q2, __q1, __q0);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
-	        short __q4, short __q5, short __q6, short __q7)
-{
-  return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
-	       char __q04, char __q05, char __q06, char __q07,
-	       char __q08, char __q09, char __q10, char __q11,
-	       char __q12, char __q13, char __q14, char __q15)
-{
-  return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
-		       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
-}
-
-/* Create a vector with element 0 as *P and the rest zero.  */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_si128 (__m128i const *__P)
-{
-  return *__P;
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadu_si128 (__m128i const *__P)
-{
-  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadl_epi64 (__m128i const *__P)
-{
-  return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_si128 (__m128i *__P, __m128i __B)
-{
-  *__P = __B;
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storeu_si128 (__m128i *__P, __m128i __B)
-{
-  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storel_epi64 (__m128i *__P, __m128i __B)
-{
-  *(long long *)__P = ((__v2di)__B)[0];
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movepi64_pi64 (__m128i __B)
-{
-  return (__m64) ((__v2di)__B)[0];
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movpi64_epi64 (__m64 __A)
-{
-  return _mm_set_epi64 ((__m64)0LL, __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_move_epi64 (__m128i __A)
-{
-  return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
-}
-
-/* Create an undefined vector.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_undefined_si128 (void)
-{
-  __m128i __Y = __Y;
-  return __Y;
-}
-
-/* Create a vector of zeros.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_si128 (void)
-{
-  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi32_pd (__m128i __A)
-{
-  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi32_ps (__m128i __A)
-{
-  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpd_epi32 (__m128d __A)
-{
-  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpd_pi32 (__m128d __A)
-{
-  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpd_ps (__m128d __A)
-{
-  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttpd_epi32 (__m128d __A)
-{
-  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttpd_pi32 (__m128d __A)
-{
-  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi32_pd (__m64 __A)
-{
-  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_epi32 (__m128 __A)
-{
-  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttps_epi32 (__m128 __A)
-{
-  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_pd (__m128 __A)
-{
-  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_si32 (__m128d __A)
-{
-  return __builtin_ia32_cvtsd2si ((__v2df) __A);
-}
-
-#ifdef __x86_64__
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_si64 (__m128d __A)
-{
-  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_si64x (__m128d __A)
-{
-  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
-}
-#endif
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_si32 (__m128d __A)
-{
-  return __builtin_ia32_cvttsd2si ((__v2df) __A);
-}
-
-#ifdef __x86_64__
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_si64 (__m128d __A)
-{
-  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_si64x (__m128d __A)
-{
-  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
-}
-#endif
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_ss (__m128 __A, __m128d __B)
-{
-  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi32_sd (__m128d __A, int __B)
-{
-  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
-}
-
-#ifdef __x86_64__
-/* Intel intrinsic.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64_sd (__m128d __A, long long __B)
-{
-  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64x_sd (__m128d __A, long long __B)
-{
-  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
-}
-#endif
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_sd (__m128d __A, __m128 __B)
-{
-  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
-{
-  return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
-}
-#else
-#define _mm_shuffle_pd(A, B, N)						\
-  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),		\
-				   (__v2df)(__m128d)(B), (int)(N)))
-#endif
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_pd (__m128d __A, __m128d __B)
-{
-  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadh_pd (__m128d __A, double const *__B)
-{
-  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadl_pd (__m128d __A, double const *__B)
-{
-  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_pd (__m128d __A)
-{
-  return __builtin_ia32_movmskpd ((__v2df)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packs_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packs_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packus_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v16qu)__A + (__v16qu)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v8hu)__A + (__v8hu)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v4su)__A + (__v4su)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v2du)__A + (__v2du)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v16qu)__A - (__v16qu)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v8hu)__A - (__v8hu)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v4su)__A - (__v4su)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v2du)__A - (__v2du)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_madd_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mullo_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v8hu)__A * (__v8hu)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_su32 (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_epu32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_epi16 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_epi32 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_epi64 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srai_epi16 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srai_epi32 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_bsrli_si128 (__m128i __A, const int __N)
-{
-  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_bslli_si128 (__m128i __A, const int __N)
-{
-  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_si128 (__m128i __A, const int __N)
-{
-  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_si128 (__m128i __A, const int __N)
-{
-  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
-}
-#else
-#define _mm_bsrli_si128(A, N) \
-  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
-#define _mm_bslli_si128(A, N) \
-  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
-#define _mm_srli_si128(A, N) \
-  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
-#define _mm_slli_si128(A, N) \
-  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
-#endif
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_epi16 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_epi32 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_epi64 (__m128i __A, int __B)
-{
-  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sll_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sll_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sll_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sra_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sra_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srl_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srl_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srl_epi64 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_and_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v2du)__A & (__v2du)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_andnot_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_or_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v2du)__A | (__v2du)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_xor_si128 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v2du)__A ^ (__v2du)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v16qi)__A == (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v8hi)__A == (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v4si)__A == (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v16qi)__A < (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v8hi)__A < (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v4si)__A < (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v16qi)__A > (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v8hi)__A > (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) ((__v4si)__A > (__v4si)__B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_epi16 (__m128i const __A, int const __N)
-{
-  return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
-{
-  return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
-}
-#else
-#define _mm_extract_epi16(A, N) \
-  ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
-#define _mm_insert_epi16(A, D, N)				\
-  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A),	\
-					  (int)(D), (int)(N)))
-#endif
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epi16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_epi8 (__m128i __A)
-{
-  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shufflehi_epi16 (__m128i __A, const int __mask)
-{
-  return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shufflelo_epi16 (__m128i __A, const int __mask)
-{
-  return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_epi32 (__m128i __A, const int __mask)
-{
-  return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
-}
-#else
-#define _mm_shufflehi_epi16(A, N) \
-  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
-#define _mm_shufflelo_epi16(A, N) \
-  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
-#define _mm_shuffle_epi32(A, N) \
-  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
-#endif
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
-{
-  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_avg_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_avg_epu16 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sad_epu8 (__m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_si32 (int *__A, int __B)
-{
-  __builtin_ia32_movnti (__A, __B);
-}
-
-#ifdef __x86_64__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_si64 (long long int *__A, long long int __B)
-{
-  __builtin_ia32_movnti64 (__A, __B);
-}
-#endif
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_si128 (__m128i *__A, __m128i __B)
-{
-  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_pd (double *__A, __m128d __B)
-{
-  __builtin_ia32_movntpd (__A, (__v2df)__B);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_clflush (void const *__A)
-{
-  __builtin_ia32_clflush (__A);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_lfence (void)
-{
-  __builtin_ia32_lfence ();
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mfence (void)
-{
-  __builtin_ia32_mfence ();
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi32_si128 (int __A)
-{
-  return _mm_set_epi32 (0, 0, 0, __A);
-}
-
-#ifdef __x86_64__
-/* Intel intrinsic.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64_si128 (long long __A)
-{
-  return _mm_set_epi64x (0, __A);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64x_si128 (long long __A)
-{
-  return _mm_set_epi64x (0, __A);
-}
-#endif
-
-/* Casts between various SP, DP, INT vector types.  Note that these do no
-   conversion of values, they just change the type.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_castpd_ps(__m128d __A)
-{
-  return (__m128) __A;
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_castpd_si128(__m128d __A)
-{
-  return (__m128i) __A;
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_castps_pd(__m128 __A)
-{
-  return (__m128d) __A;
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_castps_si128(__m128 __A)
-{
-  return (__m128i) __A;
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_castsi128_ps(__m128i __A)
-{
-  return (__m128) __A;
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_castsi128_pd(__m128i __A)
-{
-  return (__m128d) __A;
-}
-
-#ifdef __DISABLE_SSE2__
-#undef __DISABLE_SSE2__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE2__ */
-
-#endif /* _EMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/f16cintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/f16cintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,98 +1,0 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
-#endif
-
-#ifndef _F16CINTRIN_H_INCLUDED
-#define _F16CINTRIN_H_INCLUDED
-
-#ifndef __F16C__
-#pragma GCC push_options
-#pragma GCC target("f16c")
-#define __DISABLE_F16C__
-#endif /* __F16C__ */
-
-extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_cvtsh_ss (unsigned short __S)
-{
-  __v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
-  __v4sf __A = __builtin_ia32_vcvtph2ps (__H);
-  return __builtin_ia32_vec_ext_v4sf (__A, 0);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtph_ps (__m128i __A)
-{
-  return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtph_ps (__m128i __A)
-{
-  return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_cvtss_sh (float __F, const int __I)
-{
-  __v4sf __A =  __extension__ (__v4sf){ __F, 0, 0, 0 };
-  __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
-  return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_ph (__m128 __A, const int __I)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtps_ph (__m256 __A, const int __I)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
-}
-#else
-#define _cvtss_sh(__F, __I)						\
-  (__extension__ 							\
-   ({									\
-      __v4sf __A =  __extension__ (__v4sf){ __F, 0, 0, 0 };		\
-      __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);			\
-      (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);		\
-    }))
-
-#define _mm_cvtps_ph(A, I) \
-  ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I)))
-
-#define _mm256_cvtps_ph(A, I) \
-  ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I)))
-#endif /* __OPTIMIZE */
-
-#ifdef __DISABLE_F16C__
-#undef __DISABLE_F16C__
-#pragma GCC pop_options
-#endif /* __DISABLE_F16C__ */
-
-#endif /* _F16CINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/float.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/float.h	(revision 1046)
+++ 	(revision )
@@ -1,275 +1,0 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/*
- * ISO C Standard:  5.2.4.2.2  Characteristics of floating types <float.h>
- */
-
-#ifndef _FLOAT_H___
-#define _FLOAT_H___
-
-/* Radix of exponent representation, b. */
-#undef FLT_RADIX
-#define FLT_RADIX	__FLT_RADIX__
-
-/* Number of base-FLT_RADIX digits in the significand, p.  */
-#undef FLT_MANT_DIG
-#undef DBL_MANT_DIG
-#undef LDBL_MANT_DIG
-#define FLT_MANT_DIG	__FLT_MANT_DIG__
-#define DBL_MANT_DIG	__DBL_MANT_DIG__
-#define LDBL_MANT_DIG	__LDBL_MANT_DIG__
-
-/* Number of decimal digits, q, such that any floating-point number with q
-   decimal digits can be rounded into a floating-point number with p radix b
-   digits and back again without change to the q decimal digits,
-
-	p * log10(b)			if b is a power of 10
-	floor((p - 1) * log10(b))	otherwise
-*/
-#undef FLT_DIG
-#undef DBL_DIG
-#undef LDBL_DIG
-#define FLT_DIG		__FLT_DIG__
-#define DBL_DIG		__DBL_DIG__
-#define LDBL_DIG	__LDBL_DIG__
-
-/* Minimum int x such that FLT_RADIX**(x-1) is a normalized float, emin */
-#undef FLT_MIN_EXP
-#undef DBL_MIN_EXP
-#undef LDBL_MIN_EXP
-#define FLT_MIN_EXP	__FLT_MIN_EXP__
-#define DBL_MIN_EXP	__DBL_MIN_EXP__
-#define LDBL_MIN_EXP	__LDBL_MIN_EXP__
-
-/* Minimum negative integer such that 10 raised to that power is in the
-   range of normalized floating-point numbers,
-
-	ceil(log10(b) * (emin - 1))
-*/
-#undef FLT_MIN_10_EXP
-#undef DBL_MIN_10_EXP
-#undef LDBL_MIN_10_EXP
-#define FLT_MIN_10_EXP	__FLT_MIN_10_EXP__
-#define DBL_MIN_10_EXP	__DBL_MIN_10_EXP__
-#define LDBL_MIN_10_EXP	__LDBL_MIN_10_EXP__
-
-/* Maximum int x such that FLT_RADIX**(x-1) is a representable float, emax.  */
-#undef FLT_MAX_EXP
-#undef DBL_MAX_EXP
-#undef LDBL_MAX_EXP
-#define FLT_MAX_EXP	__FLT_MAX_EXP__
-#define DBL_MAX_EXP	__DBL_MAX_EXP__
-#define LDBL_MAX_EXP	__LDBL_MAX_EXP__
-
-/* Maximum integer such that 10 raised to that power is in the range of
-   representable finite floating-point numbers,
-
-	floor(log10((1 - b**-p) * b**emax))
-*/
-#undef FLT_MAX_10_EXP
-#undef DBL_MAX_10_EXP
-#undef LDBL_MAX_10_EXP
-#define FLT_MAX_10_EXP	__FLT_MAX_10_EXP__
-#define DBL_MAX_10_EXP	__DBL_MAX_10_EXP__
-#define LDBL_MAX_10_EXP	__LDBL_MAX_10_EXP__
-
-/* Maximum representable finite floating-point number,
-
-	(1 - b**-p) * b**emax
-*/
-#undef FLT_MAX
-#undef DBL_MAX
-#undef LDBL_MAX
-#define FLT_MAX		__FLT_MAX__
-#define DBL_MAX		__DBL_MAX__
-#define LDBL_MAX	__LDBL_MAX__
-
-/* The difference between 1 and the least value greater than 1 that is
-   representable in the given floating point type, b**1-p.  */
-#undef FLT_EPSILON
-#undef DBL_EPSILON
-#undef LDBL_EPSILON
-#define FLT_EPSILON	__FLT_EPSILON__
-#define DBL_EPSILON	__DBL_EPSILON__
-#define LDBL_EPSILON	__LDBL_EPSILON__
-
-/* Minimum normalized positive floating-point number, b**(emin - 1).  */
-#undef FLT_MIN
-#undef DBL_MIN
-#undef LDBL_MIN
-#define FLT_MIN		__FLT_MIN__
-#define DBL_MIN		__DBL_MIN__
-#define LDBL_MIN	__LDBL_MIN__
-
-/* Addition rounds to 0: zero, 1: nearest, 2: +inf, 3: -inf, -1: unknown.  */
-/* ??? This is supposed to change with calls to fesetround in <fenv.h>.  */
-#undef FLT_ROUNDS
-#define FLT_ROUNDS 1
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-/* The floating-point expression evaluation method.
-        -1  indeterminate
-         0  evaluate all operations and constants just to the range and
-            precision of the type
-         1  evaluate operations and constants of type float and double
-            to the range and precision of the double type, evaluate
-            long double operations and constants to the range and
-            precision of the long double type
-         2  evaluate all operations and constants to the range and
-            precision of the long double type
-
-   ??? This ought to change with the setting of the fp control word;
-   the value provided by the compiler assumes the widest setting.  */
-#undef FLT_EVAL_METHOD
-#define FLT_EVAL_METHOD	__FLT_EVAL_METHOD__
-
-/* Number of decimal digits, n, such that any floating-point number in the
-   widest supported floating type with pmax radix b digits can be rounded
-   to a floating-point number with n decimal digits and back again without
-   change to the value,
-
-	pmax * log10(b)			if b is a power of 10
-	ceil(1 + pmax * log10(b))	otherwise
-*/
-#undef DECIMAL_DIG
-#define DECIMAL_DIG	__DECIMAL_DIG__
-
-#endif /* C99 */
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-/* Versions of DECIMAL_DIG for each floating-point type.  */
-#undef FLT_DECIMAL_DIG
-#undef DBL_DECIMAL_DIG
-#undef LDBL_DECIMAL_DIG
-#define FLT_DECIMAL_DIG		__FLT_DECIMAL_DIG__
-#define DBL_DECIMAL_DIG		__DBL_DECIMAL_DIG__
-#define LDBL_DECIMAL_DIG	__DECIMAL_DIG__
-
-/* Whether types support subnormal numbers.  */
-#undef FLT_HAS_SUBNORM
-#undef DBL_HAS_SUBNORM
-#undef LDBL_HAS_SUBNORM
-#define FLT_HAS_SUBNORM		__FLT_HAS_DENORM__
-#define DBL_HAS_SUBNORM		__DBL_HAS_DENORM__
-#define LDBL_HAS_SUBNORM	__LDBL_HAS_DENORM__
-
-/* Minimum positive values, including subnormals.  */
-#undef FLT_TRUE_MIN
-#undef DBL_TRUE_MIN
-#undef LDBL_TRUE_MIN
-#define FLT_TRUE_MIN	__FLT_DENORM_MIN__
-#define DBL_TRUE_MIN	__DBL_DENORM_MIN__
-#define LDBL_TRUE_MIN	__LDBL_DENORM_MIN__
-
-#endif /* C11 */
-
-#ifdef __STDC_WANT_DEC_FP__
-/* Draft Technical Report 24732, extension for decimal floating-point
-   arithmetic: Characteristic of decimal floating types <float.h>.  */
-
-/* Number of base-FLT_RADIX digits in the significand, p.  */
-#undef DEC32_MANT_DIG
-#undef DEC64_MANT_DIG
-#undef DEC128_MANT_DIG
-#define DEC32_MANT_DIG	__DEC32_MANT_DIG__
-#define DEC64_MANT_DIG	__DEC64_MANT_DIG__
-#define DEC128_MANT_DIG	__DEC128_MANT_DIG__
-
-/* Minimum exponent. */
-#undef DEC32_MIN_EXP
-#undef DEC64_MIN_EXP
-#undef DEC128_MIN_EXP
-#define DEC32_MIN_EXP	__DEC32_MIN_EXP__
-#define DEC64_MIN_EXP	__DEC64_MIN_EXP__
-#define DEC128_MIN_EXP	__DEC128_MIN_EXP__
-
-/* Maximum exponent. */
-#undef DEC32_MAX_EXP
-#undef DEC64_MAX_EXP
-#undef DEC128_MAX_EXP
-#define DEC32_MAX_EXP	__DEC32_MAX_EXP__
-#define DEC64_MAX_EXP	__DEC64_MAX_EXP__
-#define DEC128_MAX_EXP	__DEC128_MAX_EXP__
-
-/* Maximum representable finite decimal floating-point number
-   (there are 6, 15, and 33 9s after the decimal points respectively). */
-#undef DEC32_MAX
-#undef DEC64_MAX
-#undef DEC128_MAX
-#define DEC32_MAX   __DEC32_MAX__
-#define DEC64_MAX   __DEC64_MAX__
-#define DEC128_MAX  __DEC128_MAX__
-
-/* The difference between 1 and the least value greater than 1 that is
-   representable in the given floating point type. */
-#undef DEC32_EPSILON
-#undef DEC64_EPSILON
-#undef DEC128_EPSILON
-#define DEC32_EPSILON	__DEC32_EPSILON__
-#define DEC64_EPSILON	__DEC64_EPSILON__
-#define DEC128_EPSILON	__DEC128_EPSILON__
-
-/* Minimum normalized positive floating-point number. */
-#undef DEC32_MIN
-#undef DEC64_MIN
-#undef DEC128_MIN
-#define DEC32_MIN	__DEC32_MIN__
-#define DEC64_MIN	__DEC64_MIN__
-#define DEC128_MIN	__DEC128_MIN__
-
-/* Minimum subnormal positive floating-point number. */
-#undef DEC32_SUBNORMAL_MIN
-#undef DEC64_SUBNORMAL_MIN
-#undef DEC128_SUBNORMAL_MIN
-#define DEC32_SUBNORMAL_MIN       __DEC32_SUBNORMAL_MIN__
-#define DEC64_SUBNORMAL_MIN       __DEC64_SUBNORMAL_MIN__
-#define DEC128_SUBNORMAL_MIN      __DEC128_SUBNORMAL_MIN__
-
-/* The floating-point expression evaluation method.
-         -1  indeterminate
-         0  evaluate all operations and constants just to the range and
-            precision of the type
-         1  evaluate operations and constants of type _Decimal32 
-	    and _Decimal64 to the range and precision of the _Decimal64 
-            type, evaluate _Decimal128 operations and constants to the 
-	    range and precision of the _Decimal128 type;
-	 2  evaluate all operations and constants to the range and
-	    precision of the _Decimal128 type.  */
-
-#undef DEC_EVAL_METHOD
-#define DEC_EVAL_METHOD	__DEC_EVAL_METHOD__
-
-#endif /* __STDC_WANT_DEC_FP__ */
-
-#if defined (__MINGW32__) && ! defined (_MINGW_FLOAT_H_)
-/* MinGW.org's runtime libraries provide a supplementary float.h, which
- * must also be included to complement this one.  Ideally that MinGW.org
- * header should be included first, and it will include this one, but in
- * a default configuration it doesn't normally happen this way; when we
- * didn't see it first, include the MinGW.org header now!
- */
-# include_next <float.h>
-#endif
-
-#endif /* _FLOAT_H___ */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/fma4intrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/fma4intrin.h	(revision 1046)
+++ 	(revision )
@@ -1,241 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _FMA4INTRIN_H_INCLUDED
-#define _FMA4INTRIN_H_INCLUDED
-
-/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
-#include <ammintrin.h>
-
-#ifndef __FMA4__
-#pragma GCC push_options
-#pragma GCC target("fma4")
-#define __DISABLE_FMA4__
-#endif /* __FMA4__ */
-
-/* 128b Floating point multiply/add type instructions.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
-
-{
-  return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
-}
-
-/* 256b Floating point multiply/add type instructions.  */
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
-
-{
-  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
-}
-
-#ifdef __DISABLE_FMA4__
-#undef __DISABLE_FMA4__
-#pragma GCC pop_options
-#endif /* __DISABLE_FMA4__ */
-
-#endif
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/fmaintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/fmaintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,302 +1,0 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _FMAINTRIN_H_INCLUDED
-#define _FMAINTRIN_H_INCLUDED
-
-#ifndef __FMA__
-#pragma GCC push_options
-#pragma GCC target("fma")
-#define __DISABLE_FMA__
-#endif /* __FMA__ */
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
-                                           (__v2df)__C);
-}
-
-extern __inline __m256d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
-                                              (__v4df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
-                                          (__v4sf)__C);
-}
-
-extern __inline __m256
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
-                                             (__v8sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
-                                             (__v2df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
-                                            (__v4sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
-                                           -(__v2df)__C);
-}
-
-extern __inline __m256d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
-                                              -(__v4df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
-                                          -(__v4sf)__C);
-}
-
-extern __inline __m256
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
-                                             -(__v8sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
-                                            -(__v2df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
-                                           -(__v4sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
-                                           (__v2df)__C);
-}
-
-extern __inline __m256d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
-                                              (__v4df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
-                                          (__v4sf)__C);
-}
-
-extern __inline __m256
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
-                                             (__v8sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
-                                            (__v2df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
-                                           (__v4sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
-                                           -(__v2df)__C);
-}
-
-extern __inline __m256d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
-                                              -(__v4df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
-                                          -(__v4sf)__C);
-}
-
-extern __inline __m256
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
-                                             -(__v8sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
-                                            -(__v2df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
-                                           -(__v4sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
-                                              (__v2df)__C);
-}
-
-extern __inline __m256d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
-                                                 (__v4df)__B,
-                                                 (__v4df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
-                                             (__v4sf)__C);
-}
-
-extern __inline __m256
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
-                                                (__v8sf)__B,
-                                                (__v8sf)__C);
-}
-
-extern __inline __m128d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
-                                              -(__v2df)__C);
-}
-
-extern __inline __m256d
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
-                                                 (__v4df)__B,
-                                                 -(__v4df)__C);
-}
-
-extern __inline __m128
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
-                                             -(__v4sf)__C);
-}
-
-extern __inline __m256
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
-                                                (__v8sf)__B,
-                                                -(__v8sf)__C);
-}
-
-#ifdef __DISABLE_FMA__
-#undef __DISABLE_FMA__
-#pragma GCC pop_options
-#endif /* __DISABLE_FMA__ */
-
-#endif
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/fxsrintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/fxsrintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,73 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
-/* # error "Never use <fxsrintrin.h> directly; include <x86intrin.h> instead." */
-/* #endif */
-
-#ifndef _FXSRINTRIN_H_INCLUDED
-#define _FXSRINTRIN_H_INCLUDED
-
-#ifndef __FXSR__
-#pragma GCC push_options
-#pragma GCC target("fxsr")
-#define __DISABLE_FXSR__
-#endif /* __FXSR__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_fxsave (void *__P)
-{
-  return __builtin_ia32_fxsave (__P);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_fxrstor (void *__P)
-{
-  return __builtin_ia32_fxrstor (__P);
-}
-
-#ifdef __x86_64__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_fxsave64 (void *__P)
-{
-    return __builtin_ia32_fxsave64 (__P);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_fxrstor64 (void *__P)
-{
-    return __builtin_ia32_fxrstor64 (__P);
-}
-#endif
-
-#ifdef __DISABLE_FXSR__
-#undef __DISABLE_FXSR__
-#pragma GCC pop_options
-#endif /* __DISABLE_FXSR__ */
-
-
-#endif /* _FXSRINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ia32intrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ia32intrin.h	(revision 1046)
+++ 	(revision )
@@ -1,299 +1,0 @@
-/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-/* 32bit bsf */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bsfd (int __X)
-{
-  return __builtin_ctz (__X);
-}
-
-/* 32bit bsr */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bsrd (int __X)
-{
-  return __builtin_ia32_bsrsi (__X);
-}
-
-/* 32bit bswap */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bswapd (int __X)
-{
-  return __builtin_bswap32 (__X);
-}
-
-#ifndef __SSE4_2__
-#pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
-#endif /* __SSE4_2__ */
-
-/* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__crc32b (unsigned int __C, unsigned char __V)
-{
-  return __builtin_ia32_crc32qi (__C, __V);
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__crc32w (unsigned int __C, unsigned short __V)
-{
-  return __builtin_ia32_crc32hi (__C, __V);
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__crc32d (unsigned int __C, unsigned int __V)
-{
-  return __builtin_ia32_crc32si (__C, __V);
-}
-
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_2__ */
-
-/* 32bit popcnt */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__popcntd (unsigned int __X)
-{
-  return __builtin_popcount (__X);
-}
-
-/* rdpmc */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rdpmc (int __S)
-{
-  return __builtin_ia32_rdpmc (__S);
-}
-
-/* rdtsc */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rdtsc (void)
-{
-  return __builtin_ia32_rdtsc ();
-}
-
-/* rdtscp */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rdtscp (unsigned int *__A)
-{
-  return __builtin_ia32_rdtscp (__A);
-}
-
-/* 8bit rol */
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rolb (unsigned char __X, int __C)
-{
-  return __builtin_ia32_rolqi (__X, __C);
-}
-
-/* 16bit rol */
-extern __inline unsigned short
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rolw (unsigned short __X, int __C)
-{
-  return __builtin_ia32_rolhi (__X, __C);
-}
-
-/* 32bit rol */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rold (unsigned int __X, int __C)
-{
-  return (__X << __C) | (__X >> (32 - __C));
-}
-
-/* 8bit ror */
-extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rorb (unsigned char __X, int __C)
-{
-  return __builtin_ia32_rorqi (__X, __C);
-}
-
-/* 16bit ror */
-extern __inline unsigned short
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rorw (unsigned short __X, int __C)
-{
-  return __builtin_ia32_rorhi (__X, __C);
-}
-
-/* 32bit ror */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rord (unsigned int __X, int __C)
-{
-  return (__X >> __C) | (__X << (32 - __C));
-}
-
-/* Pause */
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__pause (void)
-{
-  __builtin_ia32_pause ();
-}
-
-#ifdef __x86_64__
-/* 64bit bsf */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bsfq (long long __X)
-{
-  return __builtin_ctzll (__X);
-}
-
-/* 64bit bsr */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bsrq (long long __X)
-{
-  return __builtin_ia32_bsrdi (__X);
-}
-
-/* 64bit bswap */
-extern __inline long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bswapq (long long __X)
-{
-  return __builtin_bswap64 (__X);
-}
-
-#ifndef __SSE4_2__
-#pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
-#endif /* __SSE4_2__ */
-
-/* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__crc32q (unsigned long long __C, unsigned long long __V)
-{
-  return __builtin_ia32_crc32di (__C, __V);
-}
-
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_2__ */
-
-/* 64bit popcnt */
-extern __inline long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__popcntq (unsigned long long __X)
-{
-  return __builtin_popcountll (__X);
-}
-
-/* 64bit rol */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rolq (unsigned long long __X, int __C)
-{
-  return (__X << __C) | (__X >> (64 - __C));
-}
-
-/* 64bit ror */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__rorq (unsigned long long __X, int __C)
-{
-  return (__X >> __C) | (__X << (64 - __C));
-}
-
-/* Read flags register */
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__readeflags (void)
-{
-  return __builtin_ia32_readeflags_u64 ();
-}
-
-/* Write flags register */
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__writeeflags (unsigned long long X)
-{
-  __builtin_ia32_writeeflags_u64 (X);
-}
-
-#define _bswap64(a)		__bswapq(a)
-#define _popcnt64(a)		__popcntq(a)
-#else
-
-/* Read flags register */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__readeflags (void)
-{
-  return __builtin_ia32_readeflags_u32 ();
-}
-
-/* Write flags register */
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__writeeflags (unsigned int X)
-{
-  __builtin_ia32_writeeflags_u32 (X);
-}
-
-#endif
-
-/* On LP64 systems, longs are 64-bit.  Use the appropriate rotate
- * function.  */
-#ifdef __LP64__
-#define _lrotl(a,b)		__rolq((a), (b))
-#define _lrotr(a,b)		__rorq((a), (b))
-#else
-#define _lrotl(a,b)		__rold((a), (b))
-#define _lrotr(a,b)		__rord((a), (b))
-#endif
-
-#define _bit_scan_forward(a)	__bsfd(a)
-#define _bit_scan_reverse(a)	__bsrd(a)
-#define _bswap(a)		__bswapd(a)
-#define _popcnt32(a)		__popcntd(a)
-#define _rdpmc(a)		__rdpmc(a)
-#define _rdtsc()		__rdtsc()
-#define _rdtscp(a)		__rdtscp(a)
-#define _rotwl(a,b)		__rolw((a), (b))
-#define _rotwr(a,b)		__rorw((a), (b))
-#define _rotl(a,b)		__rold((a), (b))
-#define _rotr(a,b)		__rord((a), (b))
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/immintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/immintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,195 +1,0 @@
-/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#define _IMMINTRIN_H_INCLUDED
-
-#include <mmintrin.h>
-
-#include <xmmintrin.h>
-
-#include <emmintrin.h>
-
-#include <pmmintrin.h>
-
-#include <tmmintrin.h>
-
-#include <smmintrin.h>
-
-#include <wmmintrin.h>
-
-#include <avxintrin.h>
-
-#include <avx2intrin.h>
-
-#include <avx512fintrin.h>
-
-#include <avx512erintrin.h>
-
-#include <avx512pfintrin.h>
-
-#include <avx512cdintrin.h>
-
-#include <avx512vlintrin.h>
-
-#include <avx512bwintrin.h>
-
-#include <avx512dqintrin.h>
-
-#include <avx512vlbwintrin.h>
-
-#include <avx512vldqintrin.h>
-
-#include <avx512ifmaintrin.h>
-
-#include <avx512ifmavlintrin.h>
-
-#include <avx512vbmiintrin.h>
-
-#include <avx512vbmivlintrin.h>
-
-#include <shaintrin.h>
-
-#include <lzcntintrin.h>
-
-#include <bmiintrin.h>
-
-#include <bmi2intrin.h>
-
-#include <fmaintrin.h>
-
-#include <f16cintrin.h>
-
-#include <rtmintrin.h>
-
-#include <xtestintrin.h>
-
-#ifndef __RDRND__
-#pragma GCC push_options
-#pragma GCC target("rdrnd")
-#define __DISABLE_RDRND__
-#endif /* __RDRND__ */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand16_step (unsigned short *__P)
-{
-  return __builtin_ia32_rdrand16_step (__P);
-}
-
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand32_step (unsigned int *__P)
-{
-  return __builtin_ia32_rdrand32_step (__P);
-}
-#ifdef __DISABLE_RDRND__
-#undef __DISABLE_RDRND__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDRND__ */
-
-#ifdef  __x86_64__
-
-#ifndef __FSGSBASE__
-#pragma GCC push_options
-#pragma GCC target("fsgsbase")
-#define __DISABLE_FSGSBASE__
-#endif /* __FSGSBASE__ */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readfsbase_u32 (void)
-{
-  return __builtin_ia32_rdfsbase32 ();
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readfsbase_u64 (void)
-{
-  return __builtin_ia32_rdfsbase64 ();
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readgsbase_u32 (void)
-{
-  return __builtin_ia32_rdgsbase32 ();
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readgsbase_u64 (void)
-{
-  return __builtin_ia32_rdgsbase64 ();
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writefsbase_u32 (unsigned int __B)
-{
-  __builtin_ia32_wrfsbase32 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writefsbase_u64 (unsigned long long __B)
-{
-  __builtin_ia32_wrfsbase64 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writegsbase_u32 (unsigned int __B)
-{
-  __builtin_ia32_wrgsbase32 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writegsbase_u64 (unsigned long long __B)
-{
-  __builtin_ia32_wrgsbase64 (__B);
-}
-#ifdef __DISABLE_FSGSBASE__
-#undef __DISABLE_FSGSBASE__
-#pragma GCC pop_options
-#endif /* __DISABLE_FSGSBASE__ */
-
-#ifndef __RDRND__
-#pragma GCC push_options
-#pragma GCC target("rdrnd")
-#define __DISABLE_RDRND__
-#endif /* __RDRND__ */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand64_step (unsigned long long *__P)
-{
-  return __builtin_ia32_rdrand64_step (__P);
-}
-#ifdef __DISABLE_RDRND__
-#undef __DISABLE_RDRND__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDRND__ */
-
-#endif /* __x86_64__  */
-
-#endif /* _IMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/iso646.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/iso646.h	(revision 1046)
+++ 	(revision )
@@ -1,45 +1,0 @@
-/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/*
- * ISO C Standard:  7.9  Alternative spellings  <iso646.h>
- */
-
-#ifndef _ISO646_H
-#define _ISO646_H
-
-#ifndef __cplusplus
-#define and	&&
-#define and_eq	&=
-#define bitand	&
-#define bitor	|
-#define compl	~
-#define not	!
-#define not_eq	!=
-#define or	||
-#define or_eq	|=
-#define xor	^
-#define xor_eq	^=
-#endif
-
-#endif
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/lwpintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/lwpintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,105 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _LWPINTRIN_H_INCLUDED
-#define _LWPINTRIN_H_INCLUDED
-
-#ifndef __LWP__
-#pragma GCC push_options
-#pragma GCC target("lwp")
-#define __DISABLE_LWP__
-#endif /* __LWP__ */
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__llwpcb (void *pcbAddress)
-{
-  __builtin_ia32_llwpcb (pcbAddress);
-}
-
-extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__slwpcb (void)
-{
-  return __builtin_ia32_slwpcb ();
-}
-
-#ifdef __OPTIMIZE__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
-{
-  __builtin_ia32_lwpval32 (data2, data1, flags);
-}
-
-#ifdef __x86_64__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags)
-{
-  __builtin_ia32_lwpval64 (data2, data1, flags);
-}
-#endif
-#else
-#define __lwpval32(D2, D1, F) \
-  (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), \
-			    (unsigned int) (F)))
-#ifdef __x86_64__
-#define __lwpval64(D2, D1, F) \
-  (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), \
-			    (unsigned int) (F)))
-#endif
-#endif
-
-
-#ifdef __OPTIMIZE__
-extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
-{
-  return __builtin_ia32_lwpins32 (data2, data1, flags);
-}
-
-#ifdef __x86_64__
-extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
-{
-  return __builtin_ia32_lwpins64 (data2, data1, flags);
-}
-#endif
-#else
-#define __lwpins32(D2, D1, F) \
-  (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), \
-			    (unsigned int) (F)))
-#ifdef __x86_64__
-#define __lwpins64(D2, D1, F) \
-  (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), \
-			    (unsigned int) (F)))
-#endif
-#endif
-
-#ifdef __DISABLE_LWP__
-#undef __DISABLE_LWP__
-#pragma GCC pop_options
-#endif /* __DISABLE_LWP__ */
-
-#endif /* _LWPINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/lzcntintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/lzcntintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,75 +1,0 @@
-/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-
-#ifndef _LZCNTINTRIN_H_INCLUDED
-#define _LZCNTINTRIN_H_INCLUDED
-
-#ifndef __LZCNT__
-#pragma GCC push_options
-#pragma GCC target("lzcnt")
-#define __DISABLE_LZCNT__
-#endif /* __LZCNT__ */
-
-extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lzcnt16 (unsigned short __X)
-{
-  return __builtin_clzs (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lzcnt32 (unsigned int __X)
-{
-  return __builtin_clz (__X);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_lzcnt_u32 (unsigned int __X)
-{
-  return __builtin_clz (__X);
-}
-
-#ifdef __x86_64__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lzcnt64 (unsigned long long __X)
-{
-  return __builtin_clzll (__X);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_lzcnt_u64 (unsigned long long __X)
-{
-  return __builtin_clzll (__X);
-}
-#endif
-
-#ifdef __DISABLE_LZCNT__
-#undef __DISABLE_LZCNT__
-#pragma GCC pop_options
-#endif /* __DISABLE_LZCNT__ */
-
-#endif /* _LZCNTINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mm3dnow.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mm3dnow.h	(revision 1046)
+++ 	(revision )
@@ -1,218 +1,0 @@
-/* Copyright (C) 2004-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
-   MSVC 7.1.  */
-
-#ifndef _MM3DNOW_H_INCLUDED
-#define _MM3DNOW_H_INCLUDED
-
-#include <mmintrin.h>
-#include <prfchwintrin.h>
-
-#ifndef __3dNOW__
-#pragma GCC push_options
-#pragma GCC target("3dnow")
-#define __DISABLE_3dNOW__
-#endif /* __3dNOW__ */
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_femms (void)
-{
-  __builtin_ia32_femms();
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pavgusb (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pf2id (__m64 __A)
-{
-  return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfacc (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfadd (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfcmpeq (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfcmpge (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfcmpgt (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfmax (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfmin (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfmul (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfrcp (__m64 __A)
-{
-  return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfrcpit1 (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfrcpit2 (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfrsqrt (__m64 __A)
-{
-  return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfrsqit1 (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfsub (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfsubr (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pi2fd (__m64 __A)
-{
-  return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmulhrw (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_prefetch (void *__P)
-{
-  __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_from_float (float __A)
-{
-  return __extension__ (__m64)(__v2sf){ __A, 0.0f };
-}
-
-extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_to_float (__m64 __A)
-{
-  union { __v2sf v; float a[2]; } __tmp;
-  __tmp.v = (__v2sf)__A;
-  return __tmp.a[0];
-}
-
-#ifdef __3dNOW_A__
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pf2iw (__m64 __A)
-{
-  return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfnacc (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pfpnacc (__m64 __A, __m64 __B)
-{
-  return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pi2fw (__m64 __A)
-{
-  return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pswapd (__m64 __A)
-{
-  return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
-}
-
-#endif /* __3dNOW_A__ */
-
-#ifdef __DISABLE_3dNOW__
-#undef __DISABLE_3dNOW__
-#pragma GCC pop_options
-#endif /* __DISABLE_3dNOW__ */
-
-#endif /* _MM3DNOW_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mm_malloc.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mm_malloc.h	(revision 1046)
+++ 	(revision )
@@ -1,74 +1,0 @@
-/* Copyright (C) 2004-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _MM_MALLOC_H_INCLUDED
-#define _MM_MALLOC_H_INCLUDED
-
-#include <stdlib.h>
-#include <errno.h>
-
-static __inline__ void* 
-_mm_malloc (size_t size, size_t align)
-{
-  void * malloc_ptr;
-  void * aligned_ptr;
-
-  /* Error if align is not a power of two.  */
-  if (align & (align - 1))
-    {
-      errno = EINVAL;
-      return ((void*) 0);
-    }
-
-  if (size == 0)
-    return ((void *) 0);
-
- /* Assume malloc'd pointer is aligned at least to sizeof (void*).
-    If necessary, add another sizeof (void*) to store the value
-    returned by malloc. Effectively this enforces a minimum alignment
-    of sizeof double. */     
-    if (align < 2 * sizeof (void *))
-      align = 2 * sizeof (void *);
-
-  malloc_ptr = malloc (size + align);
-  if (!malloc_ptr)
-    return ((void *) 0);
-
-  /* Align  We have at least sizeof (void *) space below malloc'd ptr. */
-  aligned_ptr = (void *) (((size_t) malloc_ptr + align)
-			  & ~((size_t) (align) - 1));
-
-  /* Store the original pointer just before p.  */	
-  ((void **) aligned_ptr) [-1] = malloc_ptr;
-
-  return aligned_ptr;
-}
-
-static __inline__ void
-_mm_free (void * aligned_ptr)
-{
-  if (aligned_ptr)
-    free (((void **) aligned_ptr) [-1]);
-}
-
-#endif /* _MM_MALLOC_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,942 +1,0 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 9.0.  */
-
-#ifndef _MMINTRIN_H_INCLUDED
-#define _MMINTRIN_H_INCLUDED
-
-#ifndef __MMX__
-#pragma GCC push_options
-#pragma GCC target("mmx")
-#define __DISABLE_MMX__
-#endif /* __MMX__ */
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
-
-/* Internal data types for implementing the intrinsics.  */
-typedef int __v2si __attribute__ ((__vector_size__ (8)));
-typedef short __v4hi __attribute__ ((__vector_size__ (8)));
-typedef char __v8qi __attribute__ ((__vector_size__ (8)));
-typedef long long __v1di __attribute__ ((__vector_size__ (8)));
-typedef float __v2sf __attribute__ ((__vector_size__ (8)));
-
-/* Empty the multimedia state.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_empty (void)
-{
-  __builtin_ia32_emms ();
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_empty (void)
-{
-  _mm_empty ();
-}
-
-/* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
-extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi32_si64 (int __i)
-{
-  return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
-}
-
-extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_from_int (int __i)
-{
-  return _mm_cvtsi32_si64 (__i);
-}
-
-#ifdef __x86_64__
-/* Convert I to a __m64 object.  */
-
-/* Intel intrinsic.  */
-extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_from_int64 (long long __i)
-{
-  return (__m64) __i;
-}
-
-extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64_m64 (long long __i)
-{
-  return (__m64) __i;
-}
-
-/* Microsoft intrinsic.  */
-extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64x_si64 (long long __i)
-{
-  return (__m64) __i;
-}
-
-extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pi64x (long long __i)
-{
-  return (__m64) __i;
-}
-#endif
-
-/* Convert the lower 32 bits of the __m64 object into an integer.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64_si32 (__m64 __i)
-{
-  return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_to_int (__m64 __i)
-{
-  return _mm_cvtsi64_si32 (__i);
-}
-
-#ifdef __x86_64__
-/* Convert the __m64 object to a 64bit integer.  */
-
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_to_int64 (__m64 __i)
-{
-  return (long long)__i;
-}
-
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtm64_si64 (__m64 __i)
-{
-  return (long long)__i;
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64_si64x (__m64 __i)
-{
-  return (long long)__i;
-}
-#endif
-
-/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
-   the result, and the four 16-bit values from M2 into the upper four 8-bit
-   values of the result, all with signed saturation.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packs_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_packsswb (__m64 __m1, __m64 __m2)
-{
-  return _mm_packs_pi16 (__m1, __m2);
-}
-
-/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
-   the result, and the two 32-bit values from M2 into the upper two 16-bit
-   values of the result, all with signed saturation.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packs_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_packssdw (__m64 __m1, __m64 __m2)
-{
-  return _mm_packs_pi32 (__m1, __m2);
-}
-
-/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
-   the result, and the four 16-bit values from M2 into the upper four 8-bit
-   values of the result, all with unsigned saturation.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packs_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_packuswb (__m64 __m1, __m64 __m2)
-{
-  return _mm_packs_pu16 (__m1, __m2);
-}
-
-/* Interleave the four 8-bit values from the high half of M1 with the four
-   8-bit values from the high half of M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_punpckhbw (__m64 __m1, __m64 __m2)
-{
-  return _mm_unpackhi_pi8 (__m1, __m2);
-}
-
-/* Interleave the two 16-bit values from the high half of M1 with the two
-   16-bit values from the high half of M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_punpckhwd (__m64 __m1, __m64 __m2)
-{
-  return _mm_unpackhi_pi16 (__m1, __m2);
-}
-
-/* Interleave the 32-bit value from the high half of M1 with the 32-bit
-   value from the high half of M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_punpckhdq (__m64 __m1, __m64 __m2)
-{
-  return _mm_unpackhi_pi32 (__m1, __m2);
-}
-
-/* Interleave the four 8-bit values from the low half of M1 with the four
-   8-bit values from the low half of M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_punpcklbw (__m64 __m1, __m64 __m2)
-{
-  return _mm_unpacklo_pi8 (__m1, __m2);
-}
-
-/* Interleave the two 16-bit values from the low half of M1 with the two
-   16-bit values from the low half of M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_punpcklwd (__m64 __m1, __m64 __m2)
-{
-  return _mm_unpacklo_pi16 (__m1, __m2);
-}
-
-/* Interleave the 32-bit value from the low half of M1 with the 32-bit
-   value from the low half of M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_punpckldq (__m64 __m1, __m64 __m2)
-{
-  return _mm_unpacklo_pi32 (__m1, __m2);
-}
-
-/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_paddb (__m64 __m1, __m64 __m2)
-{
-  return _mm_add_pi8 (__m1, __m2);
-}
-
-/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_paddw (__m64 __m1, __m64 __m2)
-{
-  return _mm_add_pi16 (__m1, __m2);
-}
-
-/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_paddd (__m64 __m1, __m64 __m2)
-{
-  return _mm_add_pi32 (__m1, __m2);
-}
-
-/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
-#ifndef __SSE2__
-#pragma GCC push_options
-#pragma GCC target("sse2")
-#define __DISABLE_SSE2__
-#endif /* __SSE2__ */
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_si64 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
-}
-#ifdef __DISABLE_SSE2__
-#undef __DISABLE_SSE2__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE2__ */
-
-/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
-   saturated arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_paddsb (__m64 __m1, __m64 __m2)
-{
-  return _mm_adds_pi8 (__m1, __m2);
-}
-
-/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
-   saturated arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_paddsw (__m64 __m1, __m64 __m2)
-{
-  return _mm_adds_pi16 (__m1, __m2);
-}
-
-/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
-   saturated arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_pu8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_paddusb (__m64 __m1, __m64 __m2)
-{
-  return _mm_adds_pu8 (__m1, __m2);
-}
-
-/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
-   saturated arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_adds_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_paddusw (__m64 __m1, __m64 __m2)
-{
-  return _mm_adds_pu16 (__m1, __m2);
-}
-
-/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psubb (__m64 __m1, __m64 __m2)
-{
-  return _mm_sub_pi8 (__m1, __m2);
-}
-
-/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psubw (__m64 __m1, __m64 __m2)
-{
-  return _mm_sub_pi16 (__m1, __m2);
-}
-
-/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psubd (__m64 __m1, __m64 __m2)
-{
-  return _mm_sub_pi32 (__m1, __m2);
-}
-
-/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
-#ifndef __SSE2__
-#pragma GCC push_options
-#pragma GCC target("sse2")
-#define __DISABLE_SSE2__
-#endif /* __SSE2__ */
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_si64 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
-}
-#ifdef __DISABLE_SSE2__
-#undef __DISABLE_SSE2__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE2__ */
-
-/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
-   saturating arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psubsb (__m64 __m1, __m64 __m2)
-{
-  return _mm_subs_pi8 (__m1, __m2);
-}
-
-/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
-   signed saturating arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psubsw (__m64 __m1, __m64 __m2)
-{
-  return _mm_subs_pi16 (__m1, __m2);
-}
-
-/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
-   unsigned saturating arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_pu8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psubusb (__m64 __m1, __m64 __m2)
-{
-  return _mm_subs_pu8 (__m1, __m2);
-}
-
-/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
-   unsigned saturating arithmetic.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subs_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psubusw (__m64 __m1, __m64 __m2)
-{
-  return _mm_subs_pu16 (__m1, __m2);
-}
-
-/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
-   four 32-bit intermediate results, which are then summed by pairs to
-   produce two 32-bit results.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_madd_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmaddwd (__m64 __m1, __m64 __m2)
-{
-  return _mm_madd_pi16 (__m1, __m2);
-}
-
-/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
-   M2 and produce the high 16 bits of the 32-bit results.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmulhw (__m64 __m1, __m64 __m2)
-{
-  return _mm_mulhi_pi16 (__m1, __m2);
-}
-
-/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
-   the low 16 bits of the results.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmullw (__m64 __m1, __m64 __m2)
-{
-  return _mm_mullo_pi16 (__m1, __m2);
-}
-
-/* Shift four 16-bit values in M left by COUNT.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sll_pi16 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psllw (__m64 __m, __m64 __count)
-{
-  return _mm_sll_pi16 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_pi16 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psllwi (__m64 __m, int __count)
-{
-  return _mm_slli_pi16 (__m, __count);
-}
-
-/* Shift two 32-bit values in M left by COUNT.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sll_pi32 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pslld (__m64 __m, __m64 __count)
-{
-  return _mm_sll_pi32 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_pi32 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pslldi (__m64 __m, int __count)
-{
-  return _mm_slli_pi32 (__m, __count);
-}
-
-/* Shift the 64-bit value in M left by COUNT.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sll_si64 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psllq (__m64 __m, __m64 __count)
-{
-  return _mm_sll_si64 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_slli_si64 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psllqi (__m64 __m, int __count)
-{
-  return _mm_slli_si64 (__m, __count);
-}
-
-/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sra_pi16 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psraw (__m64 __m, __m64 __count)
-{
-  return _mm_sra_pi16 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srai_pi16 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrawi (__m64 __m, int __count)
-{
-  return _mm_srai_pi16 (__m, __count);
-}
-
-/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sra_pi32 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrad (__m64 __m, __m64 __count)
-{
-  return _mm_sra_pi32 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srai_pi32 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psradi (__m64 __m, int __count)
-{
-  return _mm_srai_pi32 (__m, __count);
-}
-
-/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srl_pi16 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrlw (__m64 __m, __m64 __count)
-{
-  return _mm_srl_pi16 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_pi16 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrlwi (__m64 __m, int __count)
-{
-  return _mm_srli_pi16 (__m, __count);
-}
-
-/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srl_pi32 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrld (__m64 __m, __m64 __count)
-{
-  return _mm_srl_pi32 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_pi32 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrldi (__m64 __m, int __count)
-{
-  return _mm_srli_pi32 (__m, __count);
-}
-
-/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srl_si64 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrlq (__m64 __m, __m64 __count)
-{
-  return _mm_srl_si64 (__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srli_si64 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psrlqi (__m64 __m, int __count)
-{
-  return _mm_srli_si64 (__m, __count);
-}
-
-/* Bit-wise AND the 64-bit values in M1 and M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_and_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_ia32_pand (__m1, __m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pand (__m64 __m1, __m64 __m2)
-{
-  return _mm_and_si64 (__m1, __m2);
-}
-
-/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
-   64-bit value in M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_andnot_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_ia32_pandn (__m1, __m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pandn (__m64 __m1, __m64 __m2)
-{
-  return _mm_andnot_si64 (__m1, __m2);
-}
-
-/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_or_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_ia32_por (__m1, __m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_por (__m64 __m1, __m64 __m2)
-{
-  return _mm_or_si64 (__m1, __m2);
-}
-
-/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_xor_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_ia32_pxor (__m1, __m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pxor (__m64 __m1, __m64 __m2)
-{
-  return _mm_xor_si64 (__m1, __m2);
-}
-
-/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
-   test is true and zero if false.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pcmpeqb (__m64 __m1, __m64 __m2)
-{
-  return _mm_cmpeq_pi8 (__m1, __m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pcmpgtb (__m64 __m1, __m64 __m2)
-{
-  return _mm_cmpgt_pi8 (__m1, __m2);
-}
-
-/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
-   the test is true and zero if false.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pcmpeqw (__m64 __m1, __m64 __m2)
-{
-  return _mm_cmpeq_pi16 (__m1, __m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pcmpgtw (__m64 __m1, __m64 __m2)
-{
-  return _mm_cmpgt_pi16 (__m1, __m2);
-}
-
-/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
-   the test is true and zero if false.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pcmpeqd (__m64 __m1, __m64 __m2)
-{
-  return _mm_cmpeq_pi32 (__m1, __m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pcmpgtd (__m64 __m1, __m64 __m2)
-{
-  return _mm_cmpgt_pi32 (__m1, __m2);
-}
-
-/* Creates a 64-bit zero.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_si64 (void)
-{
-  return (__m64)0LL;
-}
-
-/* Creates a vector of two 32-bit values; I0 is least significant.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pi32 (int __i1, int __i0)
-{
-  return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
-}
-
-/* Creates a vector of four 16-bit values; W0 is least significant.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
-{
-  return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
-}
-
-/* Creates a vector of eight 8-bit values; B0 is least significant.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
-	     char __b3, char __b2, char __b1, char __b0)
-{
-  return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
-					       __b4, __b5, __b6, __b7);
-}
-
-/* Similar, but with the arguments in reverse order.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_pi32 (int __i0, int __i1)
-{
-  return _mm_set_pi32 (__i1, __i0);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
-{
-  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
-	      char __b4, char __b5, char __b6, char __b7)
-{
-  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
-}
-
-/* Creates a vector of two 32-bit values, both elements containing I.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_pi32 (int __i)
-{
-  return _mm_set_pi32 (__i, __i);
-}
-
-/* Creates a vector of four 16-bit values, all elements containing W.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_pi16 (short __w)
-{
-  return _mm_set_pi16 (__w, __w, __w, __w);
-}
-
-/* Creates a vector of eight 8-bit values, all elements containing B.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_pi8 (char __b)
-{
-  return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
-}
-#ifdef __DISABLE_MMX__
-#undef __DISABLE_MMX__
-#pragma GCC pop_options
-#endif /* __DISABLE_MMX__ */
-
-#endif /* _MMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mwaitxintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/mwaitxintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,50 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _MWAITXINTRIN_H_INCLUDED
-#define _MWAITXINTRIN_H_INCLUDED
-
-#ifndef __MWAITX__
-#pragma GCC push_options
-#pragma GCC target("mwaitx")
-#define __DISABLE_MWAITX__
-#endif /* __MWAITX__ */
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_monitorx (void const * __P, unsigned int __E, unsigned int __H)
-{
-  __builtin_ia32_monitorx (__P, __E, __H);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C)
-{
-  __builtin_ia32_mwaitx (__E, __H, __C);
-}
-
-#ifdef __DISABLE_MWAITX__
-#undef __DISABLE_MWAITX__
-#pragma GCC pop_options
-#endif /* __DISABLE_MWAITX__ */
-
-#endif /* _MWAITXINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/nmmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/nmmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,33 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 10.0.  */
-
-#ifndef _NMMINTRIN_H_INCLUDED
-#define _NMMINTRIN_H_INCLUDED
-
-/* We just include SSE4.1 header file.  */
-#include <smmintrin.h>
-
-#endif /* _NMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/omp.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/omp.h	(revision 1046)
+++ 	(revision )
@@ -1,128 +1,0 @@
-/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
-   Contributed by Richard Henderson <rth@redhat.com>.
-
-   This file is part of the GNU Offloading and Multi Processing Library
-   (libgomp).
-
-   Libgomp is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _OMP_H
-#define _OMP_H 1
-
-#ifndef _LIBGOMP_OMP_LOCK_DEFINED
-#define _LIBGOMP_OMP_LOCK_DEFINED 1
-/* These two structures get edited by the libgomp build process to 
-   reflect the shape of the two types.  Their internals are private
-   to the library.  */
-
-typedef struct
-{
-  unsigned char _x[4] 
-    __attribute__((__aligned__(4)));
-} omp_lock_t;
-
-typedef struct
-{
-  unsigned char _x[12] 
-    __attribute__((__aligned__(4)));
-} omp_nest_lock_t;
-#endif
-
-typedef enum omp_sched_t
-{
-  omp_sched_static = 1,
-  omp_sched_dynamic = 2,
-  omp_sched_guided = 3,
-  omp_sched_auto = 4
-} omp_sched_t;
-
-typedef enum omp_proc_bind_t
-{
-  omp_proc_bind_false = 0,
-  omp_proc_bind_true = 1,
-  omp_proc_bind_master = 2,
-  omp_proc_bind_close = 3,
-  omp_proc_bind_spread = 4
-} omp_proc_bind_t;
-
-#ifdef __cplusplus
-extern "C" {
-# define __GOMP_NOTHROW throw ()
-#else
-# define __GOMP_NOTHROW __attribute__((__nothrow__))
-#endif
-
-extern void omp_set_num_threads (int) __GOMP_NOTHROW;
-extern int omp_get_num_threads (void) __GOMP_NOTHROW;
-extern int omp_get_max_threads (void) __GOMP_NOTHROW;
-extern int omp_get_thread_num (void) __GOMP_NOTHROW;
-extern int omp_get_num_procs (void) __GOMP_NOTHROW;
-
-extern int omp_in_parallel (void) __GOMP_NOTHROW;
-
-extern void omp_set_dynamic (int) __GOMP_NOTHROW;
-extern int omp_get_dynamic (void) __GOMP_NOTHROW;
-
-extern void omp_set_nested (int) __GOMP_NOTHROW;
-extern int omp_get_nested (void) __GOMP_NOTHROW;
-
-extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW;
-extern void omp_destroy_lock (omp_lock_t *) __GOMP_NOTHROW;
-extern void omp_set_lock (omp_lock_t *) __GOMP_NOTHROW;
-extern void omp_unset_lock (omp_lock_t *) __GOMP_NOTHROW;
-extern int omp_test_lock (omp_lock_t *) __GOMP_NOTHROW;
-
-extern void omp_init_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
-extern void omp_destroy_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
-extern void omp_set_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
-extern void omp_unset_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
-extern int omp_test_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW;
-
-extern double omp_get_wtime (void) __GOMP_NOTHROW;
-extern double omp_get_wtick (void) __GOMP_NOTHROW;
-
-extern void omp_set_schedule (omp_sched_t, int) __GOMP_NOTHROW;
-extern void omp_get_schedule (omp_sched_t *, int *) __GOMP_NOTHROW;
-extern int omp_get_thread_limit (void) __GOMP_NOTHROW;
-extern void omp_set_max_active_levels (int) __GOMP_NOTHROW;
-extern int omp_get_max_active_levels (void) __GOMP_NOTHROW;
-extern int omp_get_level (void) __GOMP_NOTHROW;
-extern int omp_get_ancestor_thread_num (int) __GOMP_NOTHROW;
-extern int omp_get_team_size (int) __GOMP_NOTHROW;
-extern int omp_get_active_level (void) __GOMP_NOTHROW;
-
-extern int omp_in_final (void) __GOMP_NOTHROW;
-
-extern int omp_get_cancellation (void) __GOMP_NOTHROW;
-extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW;
-
-extern void omp_set_default_device (int) __GOMP_NOTHROW;
-extern int omp_get_default_device (void) __GOMP_NOTHROW;
-extern int omp_get_num_devices (void) __GOMP_NOTHROW;
-extern int omp_get_num_teams (void) __GOMP_NOTHROW;
-extern int omp_get_team_num (void) __GOMP_NOTHROW;
-
-extern int omp_is_initial_device (void) __GOMP_NOTHROW;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _OMP_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/openacc.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/openacc.h	(revision 1046)
+++ 	(revision )
@@ -1,118 +1,0 @@
-/* OpenACC Runtime Library User-facing Declarations
-
-   Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   Contributed by Mentor Embedded.
-
-   This file is part of the GNU Offloading and Multi Processing Library
-   (libgomp).
-
-   Libgomp is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _OPENACC_H
-#define _OPENACC_H 1
-
-/* The OpenACC standard is silent on whether or not including <openacc.h>
-   might or must not include other header files.  We chose to include
-   some.  */
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if __cplusplus >= 201103
-# define __GOACC_NOTHROW noexcept ()
-#elif __cplusplus
-# define __GOACC_NOTHROW throw ()
-#else /* Not C++ */
-# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
-#endif
-
-/* Types */
-typedef enum acc_device_t
-  {
-    /* Keep in sync with include/gomp-constants.h.  */
-    acc_device_none = 0,
-    acc_device_default = 1,
-    acc_device_host = 2,
-    acc_device_host_nonshm = 3,
-    acc_device_not_host = 4,
-    acc_device_nvidia = 5,
-    _ACC_device_hwm
-  } acc_device_t;
-
-typedef enum acc_async_t
-  {
-    /* Keep in sync with include/gomp-constants.h.  */
-    acc_async_noval = -1,
-    acc_async_sync  = -2
-  } acc_async_t;
-
-int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW;
-void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
-acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
-void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
-int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
-int acc_async_test (int) __GOACC_NOTHROW;
-int acc_async_test_all (void) __GOACC_NOTHROW;
-void acc_wait (int) __GOACC_NOTHROW;
-void acc_wait_async (int, int) __GOACC_NOTHROW;
-void acc_wait_all (void) __GOACC_NOTHROW;
-void acc_wait_all_async (int) __GOACC_NOTHROW;
-void acc_init (acc_device_t) __GOACC_NOTHROW;
-void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
-int acc_on_device (acc_device_t) __GOACC_NOTHROW;
-void *acc_malloc (size_t) __GOACC_NOTHROW;
-void acc_free (void *) __GOACC_NOTHROW;
-/* Some of these would be more correct with const qualifiers, but
-   the standard specifies otherwise.  */
-void *acc_copyin (void *, size_t) __GOACC_NOTHROW;
-void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW;
-void *acc_create (void *, size_t) __GOACC_NOTHROW;
-void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW;
-void acc_copyout (void *, size_t) __GOACC_NOTHROW;
-void acc_delete (void *, size_t) __GOACC_NOTHROW;
-void acc_update_device (void *, size_t) __GOACC_NOTHROW;
-void acc_update_self (void *, size_t) __GOACC_NOTHROW;
-void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW;
-void acc_unmap_data (void *) __GOACC_NOTHROW;
-void *acc_deviceptr (void *) __GOACC_NOTHROW;
-void *acc_hostptr (void *) __GOACC_NOTHROW;
-int acc_is_present (void *, size_t) __GOACC_NOTHROW;
-void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
-void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
-
-/* Old names.  OpenACC does not specify whether these can or must
-   not be macros, inlines or aliases for the new names.  */
-#define acc_pcreate acc_present_or_create
-#define acc_pcopyin acc_present_or_copyin
-
-/* CUDA-specific routines.  */
-void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
-void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
-void *acc_get_cuda_stream (int) __GOACC_NOTHROW;
-int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _OPENACC_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/pcommitintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/pcommitintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,49 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <pcommitintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _PCOMMITINTRIN_H_INCLUDED
-#define _PCOMMITINTRIN_H_INCLUDED
-
-#ifndef __PCOMMIT__
-#pragma GCC push_options
-#pragma GCC target("pcommit")
-#define __DISABLE_PCOMMIT__
-#endif /* __PCOMMIT__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_pcommit (void)
-{
-  __builtin_ia32_pcommit ();
-}
-
-#ifdef __DISABLE_PCOMMIT__
-#undef __DISABLE_PCOMMIT__
-#pragma GCC pop_options
-#endif /* __DISABLE_PCOMMIT__ */
-
-#endif /* _PCOMMITINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/pmmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/pmmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,132 +1,0 @@
-/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 9.0.  */
-
-#ifndef _PMMINTRIN_H_INCLUDED
-#define _PMMINTRIN_H_INCLUDED
-
-/* We need definitions from the SSE2 and SSE header files*/
-#include <emmintrin.h>
-
-#ifndef __SSE3__
-#pragma GCC push_options
-#pragma GCC target("sse3")
-#define __DISABLE_SSE3__
-#endif /* __SSE3__ */
-
-/* Additional bits in the MXCSR.  */
-#define _MM_DENORMALS_ZERO_MASK		0x0040
-#define _MM_DENORMALS_ZERO_ON		0x0040
-#define _MM_DENORMALS_ZERO_OFF		0x0000
-
-#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
-  _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
-#define _MM_GET_DENORMALS_ZERO_MODE() \
-  (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_addsub_ps (__m128 __X, __m128 __Y)
-{
-  return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadd_ps (__m128 __X, __m128 __Y)
-{
-  return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsub_ps (__m128 __X, __m128 __Y)
-{
-  return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movehdup_ps (__m128 __X)
-{
-  return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_moveldup_ps (__m128 __X)
-{
-  return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_addsub_pd (__m128d __X, __m128d __Y)
-{
-  return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadd_pd (__m128d __X, __m128d __Y)
-{
-  return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsub_pd (__m128d __X, __m128d __Y)
-{
-  return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loaddup_pd (double const *__P)
-{
-  return _mm_load1_pd (__P);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movedup_pd (__m128d __X)
-{
-  return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_lddqu_si128 (__m128i const *__P)
-{
-  return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
-{
-  __builtin_ia32_monitor (__P, __E, __H);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mwait (unsigned int __E, unsigned int __H)
-{
-  __builtin_ia32_mwait (__E, __H);
-}
-
-#ifdef __DISABLE_SSE3__
-#undef __DISABLE_SSE3__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE3__ */
-
-#endif /* _PMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/popcntintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/popcntintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,53 +1,0 @@
-/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _POPCNTINTRIN_H_INCLUDED
-#define _POPCNTINTRIN_H_INCLUDED
-
-#ifndef __POPCNT__
-#pragma GCC push_options
-#pragma GCC target("popcnt")
-#define __DISABLE_POPCNT__
-#endif /* __POPCNT__ */
-
-/* Calculate a number of bits set to 1.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u32 (unsigned int __X)
-{
-  return __builtin_popcount (__X);
-}
-
-#ifdef __x86_64__
-extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u64 (unsigned long long __X)
-{
-  return __builtin_popcountll (__X);
-}
-#endif
-
-#ifdef __DISABLE_POPCNT__
-#undef __DISABLE_POPCNT__
-#pragma GCC pop_options
-#endif  /* __DISABLE_POPCNT__ */
-
-#endif /* _POPCNTINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/prfchwintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/prfchwintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,37 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED && !defined _MM3DNOW_H_INCLUDED
-# error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
-#endif
-
-#ifndef _PRFCHWINTRIN_H_INCLUDED
-#define _PRFCHWINTRIN_H_INCLUDED
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_prefetchw (void *__P)
-{
-  __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
-}
-
-#endif /* _PRFCHWINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/quadmath.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/quadmath.h	(revision 1046)
+++ 	(revision )
@@ -1,200 +1,0 @@
-/* GCC Quad-Precision Math Library
-   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
-   Written by Francois-Xavier Coudert  <fxcoudert@gcc.gnu.org>
-
-This file is part of the libquadmath library.
-Libquadmath is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public
-License as published by the Free Software Foundation; either
-version 2 of the License, or (at your option) any later version.
-
-Libquadmath is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with libquadmath; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
-Boston, MA 02110-1301, USA.  */
-
-#ifndef QUADMATH_H
-#define QUADMATH_H
-
-#include <stdlib.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Define the complex type corresponding to __float128
-   ("_Complex __float128" is not allowed) */
-typedef _Complex float __attribute__((mode(TC))) __complex128;
-
-#ifdef __cplusplus
-# define __quadmath_throw throw ()
-# define __quadmath_nth(fct) fct throw ()
-#else
-# define __quadmath_throw __attribute__((__nothrow__))
-# define __quadmath_nth(fct) __attribute__((__nothrow__)) fct
-#endif
-
-/* Prototypes for real functions */
-extern __float128 acosq (__float128) __quadmath_throw;
-extern __float128 acoshq (__float128) __quadmath_throw;
-extern __float128 asinq (__float128) __quadmath_throw;
-extern __float128 asinhq (__float128) __quadmath_throw;
-extern __float128 atanq (__float128) __quadmath_throw;
-extern __float128 atanhq (__float128) __quadmath_throw;
-extern __float128 atan2q (__float128, __float128) __quadmath_throw;
-extern __float128 cbrtq (__float128) __quadmath_throw;
-extern __float128 ceilq (__float128) __quadmath_throw;
-extern __float128 copysignq (__float128, __float128) __quadmath_throw;
-extern __float128 coshq (__float128) __quadmath_throw;
-extern __float128 cosq (__float128) __quadmath_throw;
-extern __float128 erfq (__float128) __quadmath_throw;
-extern __float128 erfcq (__float128) __quadmath_throw;
-extern __float128 expq (__float128) __quadmath_throw;
-extern __float128 expm1q (__float128) __quadmath_throw;
-extern __float128 fabsq (__float128) __quadmath_throw;
-extern __float128 fdimq (__float128, __float128) __quadmath_throw;
-extern int finiteq (__float128) __quadmath_throw;
-extern __float128 floorq (__float128) __quadmath_throw;
-extern __float128 fmaq (__float128, __float128, __float128) __quadmath_throw;
-extern __float128 fmaxq (__float128, __float128) __quadmath_throw;
-extern __float128 fminq (__float128, __float128) __quadmath_throw;
-extern __float128 fmodq (__float128, __float128) __quadmath_throw;
-extern __float128 frexpq (__float128, int *) __quadmath_throw;
-extern __float128 hypotq (__float128, __float128) __quadmath_throw;
-extern int isinfq (__float128) __quadmath_throw;
-extern int ilogbq (__float128) __quadmath_throw;
-extern int isnanq (__float128) __quadmath_throw;
-extern __float128 j0q (__float128) __quadmath_throw;
-extern __float128 j1q (__float128) __quadmath_throw;
-extern __float128 jnq (int, __float128) __quadmath_throw;
-extern __float128 ldexpq (__float128, int) __quadmath_throw;
-extern __float128 lgammaq (__float128) __quadmath_throw;
-extern long long int llrintq (__float128) __quadmath_throw;
-extern long long int llroundq (__float128) __quadmath_throw;
-extern __float128 logq (__float128) __quadmath_throw;
-extern __float128 log10q (__float128) __quadmath_throw;
-extern __float128 log2q (__float128) __quadmath_throw;
-extern __float128 log1pq (__float128) __quadmath_throw;
-extern long int lrintq (__float128) __quadmath_throw;
-extern long int lroundq (__float128) __quadmath_throw;
-extern __float128 modfq (__float128, __float128 *) __quadmath_throw;
-extern __float128 nanq (const char *) __quadmath_throw;
-extern __float128 nearbyintq (__float128) __quadmath_throw;
-extern __float128 nextafterq (__float128, __float128) __quadmath_throw;
-extern __float128 powq (__float128, __float128) __quadmath_throw;
-extern __float128 remainderq (__float128, __float128) __quadmath_throw;
-extern __float128 remquoq (__float128, __float128, int *) __quadmath_throw;
-extern __float128 rintq (__float128) __quadmath_throw;
-extern __float128 roundq (__float128) __quadmath_throw;
-extern __float128 scalblnq (__float128, long int) __quadmath_throw;
-extern __float128 scalbnq (__float128, int) __quadmath_throw;
-extern int signbitq (__float128) __quadmath_throw;
-extern void sincosq (__float128, __float128 *, __float128 *) __quadmath_throw;
-extern __float128 sinhq (__float128) __quadmath_throw;
-extern __float128 sinq (__float128) __quadmath_throw;
-extern __float128 sqrtq (__float128) __quadmath_throw;
-extern __float128 tanq (__float128) __quadmath_throw;
-extern __float128 tanhq (__float128) __quadmath_throw;
-extern __float128 tgammaq (__float128) __quadmath_throw;
-extern __float128 truncq (__float128) __quadmath_throw;
-extern __float128 y0q (__float128) __quadmath_throw;
-extern __float128 y1q (__float128) __quadmath_throw;
-extern __float128 ynq (int, __float128) __quadmath_throw;
-
-
-/* Prototypes for complex functions */
-extern __float128 cabsq (__complex128) __quadmath_throw;
-extern __float128 cargq (__complex128) __quadmath_throw;
-extern __float128 cimagq (__complex128) __quadmath_throw;
-extern __float128 crealq (__complex128) __quadmath_throw;
-extern __complex128 cacosq (__complex128) __quadmath_throw;
-extern __complex128 cacoshq (__complex128) __quadmath_throw;
-extern __complex128 casinq (__complex128) __quadmath_throw;
-extern __complex128 casinhq (__complex128) __quadmath_throw;
-extern __complex128 catanq (__complex128) __quadmath_throw;
-extern __complex128 catanhq (__complex128) __quadmath_throw;
-extern __complex128 ccosq (__complex128) __quadmath_throw;
-extern __complex128 ccoshq (__complex128) __quadmath_throw;
-extern __complex128 cexpq (__complex128) __quadmath_throw;
-extern __complex128 cexpiq (__float128) __quadmath_throw;
-extern __complex128 clogq (__complex128) __quadmath_throw;
-extern __complex128 clog10q (__complex128) __quadmath_throw;
-extern __complex128 conjq (__complex128) __quadmath_throw;
-extern __complex128 cpowq (__complex128, __complex128) __quadmath_throw;
-extern __complex128 cprojq (__complex128) __quadmath_throw;
-extern __complex128 csinq (__complex128) __quadmath_throw;
-extern __complex128 csinhq (__complex128) __quadmath_throw;
-extern __complex128 csqrtq (__complex128) __quadmath_throw;
-extern __complex128 ctanq (__complex128) __quadmath_throw;
-extern __complex128 ctanhq (__complex128) __quadmath_throw;
-
-
-/* Prototypes for string <-> __float128 conversion functions */
-extern __float128 strtoflt128 (const char *, char **) __quadmath_throw;
-extern int quadmath_snprintf (char *str, size_t size,
-			      const char *format, ...) __quadmath_throw;
-
-
-/* Macros */
-#define FLT128_MAX 1.18973149535723176508575932662800702e4932Q
-#define FLT128_MIN 3.36210314311209350626267781732175260e-4932Q
-#define FLT128_EPSILON 1.92592994438723585305597794258492732e-34Q
-#define FLT128_DENORM_MIN 6.475175119438025110924438958227646552e-4966Q
-#define FLT128_MANT_DIG 113
-#define FLT128_MIN_EXP (-16381)
-#define FLT128_MAX_EXP 16384
-#define FLT128_DIG 33
-#define FLT128_MIN_10_EXP (-4931)
-#define FLT128_MAX_10_EXP 4932
-
-
-#define HUGE_VALQ __builtin_huge_valq()
-/* The following alternative is valid, but brings the warning:
-   (floating constant exceeds range of ‘__float128’)  */
-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
-
-#define M_Eq		2.7182818284590452353602874713526625Q  /* e */
-#define M_LOG2Eq	1.4426950408889634073599246810018921Q  /* log_2 e */
-#define M_LOG10Eq	0.4342944819032518276511289189166051Q  /* log_10 e */
-#define M_LN2q		0.6931471805599453094172321214581766Q  /* log_e 2 */
-#define M_LN10q		2.3025850929940456840179914546843642Q  /* log_e 10 */
-#define M_PIq		3.1415926535897932384626433832795029Q  /* pi */
-#define M_PI_2q		1.5707963267948966192313216916397514Q  /* pi/2 */
-#define M_PI_4q		0.7853981633974483096156608458198757Q  /* pi/4 */
-#define M_1_PIq		0.3183098861837906715377675267450287Q  /* 1/pi */
-#define M_2_PIq		0.6366197723675813430755350534900574Q  /* 2/pi */
-#define M_2_SQRTPIq	1.1283791670955125738961589031215452Q  /* 2/sqrt(pi) */
-#define M_SQRT2q	1.4142135623730950488016887242096981Q  /* sqrt(2) */
-#define M_SQRT1_2q	0.7071067811865475244008443621048490Q  /* 1/sqrt(2) */
-
-#define __quadmath_extern_inline \
-  extern inline __attribute__ ((__gnu_inline__))
-
-__quadmath_extern_inline __float128
-__quadmath_nth (cimagq (__complex128 __z))
-{
-  return __imag__ __z;
-}
-
-__quadmath_extern_inline __float128
-__quadmath_nth (crealq (__complex128 __z))
-{
-  return __real__ __z;
-}
-
-__quadmath_extern_inline __complex128
-__quadmath_nth (conjq (__complex128 __z))
-{
-  return __extension__ ~__z;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/quadmath_weak.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/quadmath_weak.h	(revision 1046)
+++ 	(revision )
@@ -1,137 +1,0 @@
-/* GCC Quad-Precision Math Library
-   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
-   Written by Tobias Burnus  <burnus@net-b.de>
-
-This file is part of the libquadmath library.
-Libquadmath is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public
-License as published by the Free Software Foundation; either
-version 2 of the License, or (at your option) any later version.
-
-Libquadmath is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with libquadmath; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
-Boston, MA 02110-1301, USA.  */
-
-#ifndef QUADMATH_WEAK_H
-#define QUADMATH_WEAK_H
-
-#include "quadmath.h"
-
-#if SUPPORTS_WEAK
-# define __qmath2(name,name2,type) \
-  static __typeof(type) name __attribute__ ((__weakref__(#name2)));
-# define __qmath_(name) __qmath_ ## name
-#else
-# define __qmath2(name,name2,type)
-# define __qmath_(name) name
-#endif
-
-/* __qmath_foo is a weak reference to symbol foo.  */
-#define __qmath3(name) __qmath2(__qmath_ ## name,name,name)
-
-/* Prototypes for real functions.  */
-__qmath3 (acosq)
-__qmath3 (acoshq)
-__qmath3 (asinq)
-__qmath3 (asinhq)
-__qmath3 (atanq)
-__qmath3 (atanhq)
-__qmath3 (atan2q)
-__qmath3 (cbrtq)
-__qmath3 (ceilq)
-__qmath3 (copysignq)
-__qmath3 (coshq)
-__qmath3 (cosq)
-__qmath3 (erfq)
-__qmath3 (erfcq)
-__qmath3 (expq)
-__qmath3 (expm1q)
-__qmath3 (fabsq)
-__qmath3 (fdimq)
-__qmath3 (finiteq)
-__qmath3 (floorq)
-__qmath3 (fmaq)
-__qmath3 (fmaxq)
-__qmath3 (fminq)
-__qmath3 (fmodq)
-__qmath3 (frexpq)
-__qmath3 (hypotq)
-__qmath3 (ilogbq)
-__qmath3 (isinfq)
-__qmath3 (isnanq)
-__qmath3 (j0q)
-__qmath3 (j1q)
-__qmath3 (jnq)
-__qmath3 (ldexpq)
-__qmath3 (lgammaq)
-__qmath3 (llrintq)
-__qmath3 (llroundq)
-__qmath3 (logq)
-__qmath3 (log10q)
-__qmath3 (log1pq)
-__qmath3 (log2q)
-__qmath3 (lrintq)
-__qmath3 (lroundq)
-__qmath3 (modfq)
-__qmath3 (nanq)
-__qmath3 (nearbyintq)
-__qmath3 (nextafterq)
-__qmath3 (powq)
-__qmath3 (remainderq)
-__qmath3 (remquoq)
-__qmath3 (rintq)
-__qmath3 (roundq)
-__qmath3 (scalblnq)
-__qmath3 (scalbnq)
-__qmath3 (signbitq)
-__qmath3 (sincosq)
-__qmath3 (sinhq)
-__qmath3 (sinq)
-__qmath3 (sqrtq)
-__qmath3 (tanq)
-__qmath3 (tanhq)
-__qmath3 (tgammaq)
-__qmath3 (truncq)
-__qmath3 (y0q)
-__qmath3 (y1q)
-__qmath3 (ynq)
-
-
-/* Prototypes for complex functions.  */
-__qmath3 (cabsq)
-__qmath3 (cargq)
-__qmath3 (cimagq)
-__qmath3 (crealq)
-__qmath3 (cacosq)
-__qmath3 (cacoshq)
-__qmath3 (casinq)
-__qmath3 (casinhq)
-__qmath3 (catanq)
-__qmath3 (catanhq)
-__qmath3 (ccosq)
-__qmath3 (ccoshq)
-__qmath3 (cexpq)
-__qmath3 (cexpiq)
-__qmath3 (clogq)
-__qmath3 (clog10q)
-__qmath3 (conjq)
-__qmath3 (cpowq)
-__qmath3 (cprojq)
-__qmath3 (csinq)
-__qmath3 (csinhq)
-__qmath3 (csqrtq)
-__qmath3 (ctanq)
-__qmath3 (ctanhq)
-
-
-/* Prototypes for string <-> flt128 conversion functions.  */
-__qmath3 (strtoflt128)
-__qmath3 (quadmath_snprintf)
-
-#endif
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/rdseedintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/rdseedintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,66 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _RDSEEDINTRIN_H_INCLUDED
-#define _RDSEEDINTRIN_H_INCLUDED
-
-#ifndef __RDSEED__
-#pragma GCC push_options
-#pragma GCC target("rdseed")
-#define __DISABLE_RDSEED__
-#endif /* __RDSEED__ */
-
-
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdseed16_step (unsigned short *p)
-{
-    return __builtin_ia32_rdseed_hi_step (p);
-}
-
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdseed32_step (unsigned int *p)
-{
-    return __builtin_ia32_rdseed_si_step (p);
-}
-
-#ifdef __x86_64__
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdseed64_step (unsigned long long *p)
-{
-    return __builtin_ia32_rdseed_di_step (p);
-}
-#endif
-
-#ifdef __DISABLE_RDSEED__
-#undef __DISABLE_RDSEED__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDSEED__ */
-
-#endif /* _RDSEEDINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/rtmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/rtmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,84 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _RTMINTRIN_H_INCLUDED
-#define _RTMINTRIN_H_INCLUDED
-
-#ifndef __RTM__
-#pragma GCC push_options
-#pragma GCC target("rtm")
-#define __DISABLE_RTM__
-#endif /* __RTM__ */
-
-#define _XBEGIN_STARTED		(~0u)
-#define _XABORT_EXPLICIT	(1 << 0)
-#define _XABORT_RETRY		(1 << 1)
-#define _XABORT_CONFLICT	(1 << 2)
-#define _XABORT_CAPACITY	(1 << 3)
-#define _XABORT_DEBUG		(1 << 4)
-#define _XABORT_NESTED		(1 << 5)
-#define _XABORT_CODE(x)		(((x) >> 24) & 0xFF)
-
-/* Start an RTM code region.  Return _XBEGIN_STARTED on success and the
-   abort condition otherwise.  */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xbegin (void)
-{
-  return __builtin_ia32_xbegin ();
-}
-
-/* Specify the end of an RTM code region.  If it corresponds to the
-   outermost transaction, then attempts the transaction commit.  If the
-   commit fails, then control is transferred to the outermost transaction
-   fallback handler.  */
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xend (void)
-{
-  __builtin_ia32_xend ();
-}
-
-/* Force an RTM abort condition. The control is transferred to the
-   outermost transaction fallback handler with the abort condition IMM.  */
-#ifdef __OPTIMIZE__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xabort (const unsigned int imm)
-{
-  __builtin_ia32_xabort (imm);
-}
-#else
-#define _xabort(N)  __builtin_ia32_xabort (N)
-#endif /* __OPTIMIZE__ */
-
-#ifdef __DISABLE_RTM__
-#undef __DISABLE_RTM__
-#pragma GCC pop_options
-#endif /* __DISABLE_RTM__ */
-
-#endif /* _RTMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/shaintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/shaintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,98 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _SHAINTRIN_H_INCLUDED
-#define _SHAINTRIN_H_INCLUDED
-
-#ifndef __SHA__
-#pragma GCC push_options
-#pragma GCC target("sha")
-#define __DISABLE_SHA__
-#endif /* __SHA__ */
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha1msg1_epu32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_sha1msg1 ((__v4si) __A, (__v4si) __B);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha1msg2_epu32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_sha1msg2 ((__v4si) __A, (__v4si) __B);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha1nexte_epu32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_sha1nexte ((__v4si) __A, (__v4si) __B);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha1rnds4_epu32 (__m128i __A, __m128i __B, const int __I)
-{
-  return (__m128i) __builtin_ia32_sha1rnds4 ((__v4si) __A, (__v4si) __B, __I);
-}
-#else
-#define _mm_sha1rnds4_epu32(A, B, I)				    \
-  ((__m128i) __builtin_ia32_sha1rnds4 ((__v4si)(__m128i)A,	    \
-				       (__v4si)(__m128i)B, (int)I))
-#endif
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha256msg1_epu32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_sha256msg1 ((__v4si) __A, (__v4si) __B);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha256msg2_epu32 (__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_sha256msg2 ((__v4si) __A, (__v4si) __B);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
-{
-  return (__m128i) __builtin_ia32_sha256rnds2 ((__v4si) __A, (__v4si) __B,
-					       (__v4si) __C);
-}
-
-#ifdef __DISABLE_SHA__
-#undef __DISABLE_SHA__
-#pragma GCC pop_options
-#endif /* __DISABLE_SHA__ */
-
-#endif /* _SHAINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/smmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/smmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,862 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 10.0.  */
-
-#ifndef _SMMINTRIN_H_INCLUDED
-#define _SMMINTRIN_H_INCLUDED
-
-/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
-   files.  */
-#include <tmmintrin.h>
-
-#ifndef __SSE4_1__
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-#define __DISABLE_SSE4_1__
-#endif /* __SSE4_1__ */
-
-/* Rounding mode macros. */
-#define _MM_FROUND_TO_NEAREST_INT	0x00
-#define _MM_FROUND_TO_NEG_INF		0x01
-#define _MM_FROUND_TO_POS_INF		0x02
-#define _MM_FROUND_TO_ZERO		0x03
-#define _MM_FROUND_CUR_DIRECTION	0x04
-
-#define _MM_FROUND_RAISE_EXC		0x00
-#define _MM_FROUND_NO_EXC		0x08
-
-#define _MM_FROUND_NINT		\
-  (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_FLOOR	\
-  (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_CEIL		\
-  (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_TRUNC	\
-  (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_RINT		\
-  (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
-#define _MM_FROUND_NEARBYINT	\
-  (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
-
-/* Test Instruction */
-/* Packed integer 128-bit bitwise comparison. Return 1 if
-   (__V & __M) == 0.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testz_si128 (__m128i __M, __m128i __V)
-{
-  return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
-}
-
-/* Packed integer 128-bit bitwise comparison. Return 1 if
-   (__V & ~__M) == 0.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testc_si128 (__m128i __M, __m128i __V)
-{
-  return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
-}
-
-/* Packed integer 128-bit bitwise comparison. Return 1 if
-   (__V & __M) != 0 && (__V & ~__M) != 0.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_testnzc_si128 (__m128i __M, __m128i __V)
-{
-  return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
-}
-
-/* Macros for packed integer 128-bit comparison intrinsics.  */
-#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
-
-#define _mm_test_all_ones(V) \
-  _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
-
-#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
-
-/* Packed/scalar double precision floating point rounding.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_pd (__m128d __V, const int __M)
-{
-  return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_sd(__m128d __D, __m128d __V, const int __M)
-{
-  return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
-					   (__v2df)__V,
-					   __M);
-}
-#else
-#define _mm_round_pd(V, M) \
-  ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
-
-#define _mm_round_sd(D, V, M)						\
-  ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D),		\
-				     (__v2df)(__m128d)(V), (int)(M)))
-#endif
-
-/* Packed/scalar single precision floating point rounding.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_ps (__m128 __V, const int __M)
-{
-  return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_round_ss (__m128 __D, __m128 __V, const int __M)
-{
-  return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
-					  (__v4sf)__V,
-					  __M);
-}
-#else
-#define _mm_round_ps(V, M) \
-  ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
-
-#define _mm_round_ss(D, V, M)						\
-  ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D),		\
-				    (__v4sf)(__m128)(V), (int)(M)))
-#endif
-
-/* Macros for ceil/floor intrinsics.  */
-#define _mm_ceil_pd(V)	   _mm_round_pd ((V), _MM_FROUND_CEIL)
-#define _mm_ceil_sd(D, V)  _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
-
-#define _mm_floor_pd(V)	   _mm_round_pd((V), _MM_FROUND_FLOOR)
-#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
-
-#define _mm_ceil_ps(V)	   _mm_round_ps ((V), _MM_FROUND_CEIL)
-#define _mm_ceil_ss(D, V)  _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
-
-#define _mm_floor_ps(V)	   _mm_round_ps ((V), _MM_FROUND_FLOOR)
-#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
-
-/* SSE4.1 */
-
-/* Integer blend instructions - select data from 2 sources using
-   constant/variable mask.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
-{
-  return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
-					      (__v8hi)__Y,
-					      __M);
-}
-#else
-#define _mm_blend_epi16(X, Y, M)					\
-  ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X),		\
-					(__v8hi)(__m128i)(Y), (int)(M)))
-#endif
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
-{
-  return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
-					       (__v16qi)__Y,
-					       (__v16qi)__M);
-}
-
-/* Single precision floating point blend instructions - select data
-   from 2 sources using constant/variable mask.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
-{
-  return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
-					  (__v4sf)__Y,
-					  __M);
-}
-#else
-#define _mm_blend_ps(X, Y, M)						\
-  ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X),		\
-				    (__v4sf)(__m128)(Y), (int)(M)))
-#endif
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
-{
-  return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
-					   (__v4sf)__Y,
-					   (__v4sf)__M);
-}
-
-/* Double precision floating point blend instructions - select data
-   from 2 sources using constant/variable mask.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
-{
-  return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
-					   (__v2df)__Y,
-					   __M);
-}
-#else
-#define _mm_blend_pd(X, Y, M)						\
-  ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X),		\
-				     (__v2df)(__m128d)(Y), (int)(M)))
-#endif
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
-{
-  return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
-					    (__v2df)__Y,
-					    (__v2df)__M);
-}
-
-/* Dot product instructions with mask-defined summing and zeroing parts
-   of result.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
-{
-  return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
-				       (__v4sf)__Y,
-				       __M);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
-{
-  return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
-					(__v2df)__Y,
-					__M);
-}
-#else
-#define _mm_dp_ps(X, Y, M)						\
-  ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X),			\
-				 (__v4sf)(__m128)(Y), (int)(M)))
-
-#define _mm_dp_pd(X, Y, M)						\
-  ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),			\
-				  (__v2df)(__m128d)(Y), (int)(M)))
-#endif
-
-/* Packed integer 64-bit comparison, zeroing or filling with ones
-   corresponding parts of result.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) ((__v2di)__X == (__v2di)__Y);
-}
-
-/*  Min/max packed integer instructions.  */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epi8 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epi8 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epu16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epu16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_epu32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_epu32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
-}
-
-/* Packed integer 32-bit multiplication with truncation of upper
-   halves of results.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mullo_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) ((__v4su)__X * (__v4su)__Y);
-}
-
-/* Packed integer 32-bit multiplication of 2 pairs of operands
-   with two 64-bit results.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
-}
-
-/* Insert single precision float into packed single precision array
-   element selected by index N.  The bits [7-6] of N define S
-   index, the bits [5-4] define D index, and bits [3-0] define
-   zeroing mask for D.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
-{
-  return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
-					      (__v4sf)__S,
-					      __N);
-}
-#else
-#define _mm_insert_ps(D, S, N)						\
-  ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D),		\
-					(__v4sf)(__m128)(S), (int)(N)))
-#endif
-
-/* Helper macro to create the N value for _mm_insert_ps.  */
-#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
-
-/* Extract binary representation of single precision float from packed
-   single precision array element of X selected by index N.  */
-
-#ifdef __OPTIMIZE__
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_ps (__m128 __X, const int __N)
-{
-  union { int i; float f; } __tmp;
-  __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
-  return __tmp.i;
-}
-#else
-#define _mm_extract_ps(X, N)						\
-  (__extension__							\
-   ({									\
-     union { int i; float f; } __tmp;					\
-     __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
-     __tmp.i;								\
-   }))
-#endif
-
-/* Extract binary representation of single precision float into
-   D from packed single precision array element of S selected
-   by index N.  */
-#define _MM_EXTRACT_FLOAT(D, S, N) \
-  { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
-  
-/* Extract specified single precision float element into the lower
-   part of __m128.  */
-#define _MM_PICK_OUT_PS(X, N)				\
-  _mm_insert_ps (_mm_setzero_ps (), (X), 		\
-		 _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
-
-/* Insert integer, S, into packed integer array element of D
-   selected by index N.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_epi8 (__m128i __D, int __S, const int __N)
-{
-  return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
-						 __S, __N);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_epi32 (__m128i __D, int __S, const int __N)
-{
-  return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
-						 __S, __N);
-}
-
-#ifdef __x86_64__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
-{
-  return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
-						 __S, __N);
-}
-#endif
-#else
-#define _mm_insert_epi8(D, S, N)					\
-  ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D),	\
-					   (int)(S), (int)(N)))
-
-#define _mm_insert_epi32(D, S, N)				\
-  ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D),	\
-					  (int)(S), (int)(N)))
-
-#ifdef __x86_64__
-#define _mm_insert_epi64(D, S, N)					\
-  ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D),		\
-					  (long long)(S), (int)(N)))
-#endif
-#endif
-
-/* Extract integer from packed integer array element of X selected by
-   index N.  */
-
-#ifdef __OPTIMIZE__
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_epi8 (__m128i __X, const int __N)
-{
-   return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_epi32 (__m128i __X, const int __N)
-{
-   return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
-}
-
-#ifdef __x86_64__
-extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_epi64 (__m128i __X, const int __N)
-{
-  return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
-}
-#endif
-#else
-#define _mm_extract_epi8(X, N) \
-  ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
-#define _mm_extract_epi32(X, N) \
-  ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
-
-#ifdef __x86_64__
-#define _mm_extract_epi64(X, N) \
-  ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
-#endif
-#endif
-
-/* Return horizontal packed word minimum and its index in bits [15:0]
-   and bits [18:16] respectively.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_minpos_epu16 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
-}
-
-/* Packed integer sign-extension.  */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi8_epi32 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi16_epi32 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi8_epi64 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi32_epi64 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi16_epi64 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepi8_epi16 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
-}
-
-/* Packed integer zero-extension. */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu8_epi32 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu16_epi32 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu8_epi64 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu32_epi64 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu16_epi64 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtepu8_epi16 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
-}
-
-/* Pack 8 double words from 2 operands into 8 words of result with
-   unsigned saturation. */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_packus_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
-}
-
-/* Sum absolute 8-bit integer difference of adjacent groups of 4
-   byte integers in the first 2 operands.  Starting offsets within
-   operands are determined by the 3rd mask operand.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
-{
-  return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
-					      (__v16qi)__Y, __M);
-}
-#else
-#define _mm_mpsadbw_epu8(X, Y, M)					\
-  ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X),		\
-					(__v16qi)(__m128i)(Y), (int)(M)))
-#endif
-
-/* Load double quadword using non-temporal aligned hint.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_load_si128 (__m128i *__X)
-{
-  return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
-}
-
-#ifndef __SSE4_2__
-#pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
-#endif /* __SSE4_2__ */
-
-/* These macros specify the source data format.  */
-#define _SIDD_UBYTE_OPS			0x00
-#define _SIDD_UWORD_OPS			0x01
-#define _SIDD_SBYTE_OPS			0x02
-#define _SIDD_SWORD_OPS			0x03
-
-/* These macros specify the comparison operation.  */
-#define _SIDD_CMP_EQUAL_ANY		0x00
-#define _SIDD_CMP_RANGES		0x04
-#define _SIDD_CMP_EQUAL_EACH		0x08
-#define _SIDD_CMP_EQUAL_ORDERED		0x0c
-
-/* These macros specify the polarity.  */
-#define _SIDD_POSITIVE_POLARITY		0x00
-#define _SIDD_NEGATIVE_POLARITY		0x10
-#define _SIDD_MASKED_POSITIVE_POLARITY	0x20
-#define _SIDD_MASKED_NEGATIVE_POLARITY	0x30
-
-/* These macros specify the output selection in _mm_cmpXstri ().  */
-#define _SIDD_LEAST_SIGNIFICANT		0x00
-#define _SIDD_MOST_SIGNIFICANT		0x40
-
-/* These macros specify the output selection in _mm_cmpXstrm ().  */
-#define _SIDD_BIT_MASK			0x00
-#define _SIDD_UNIT_MASK			0x40
-
-/* Intrinsics for text/string processing.  */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
-{
-  return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
-						(__v16qi)__Y,
-						__M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
-{
-  return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
-				      (__v16qi)__Y,
-				      __M);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
-{
-  return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
-						(__v16qi)__Y, __LY,
-						__M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
-{
-  return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
-				      (__v16qi)__Y, __LY,
-				      __M);
-}
-#else
-#define _mm_cmpistrm(X, Y, M)						\
-  ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X),	\
-					  (__v16qi)(__m128i)(Y), (int)(M)))
-#define _mm_cmpistri(X, Y, M)						\
-  ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X),		\
-				      (__v16qi)(__m128i)(Y), (int)(M)))
-
-#define _mm_cmpestrm(X, LX, Y, LY, M)					\
-  ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X),	\
-					  (int)(LX), (__v16qi)(__m128i)(Y), \
-					  (int)(LY), (int)(M)))
-#define _mm_cmpestri(X, LX, Y, LY, M)					\
-  ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX),	\
-				      (__v16qi)(__m128i)(Y), (int)(LY),	\
-				      (int)(M)))
-#endif
-
-/* Intrinsics for text/string processing and reading values of
-   EFlags.  */
-
-#ifdef __OPTIMIZE__
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
-{
-  return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
-				       (__v16qi)__Y,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
-{
-  return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
-				       (__v16qi)__Y,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
-{
-  return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
-				       (__v16qi)__Y,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
-{
-  return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
-				       (__v16qi)__Y,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
-{
-  return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
-				       (__v16qi)__Y,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
-{
-  return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
-				       (__v16qi)__Y, __LY,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
-{
-  return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
-				       (__v16qi)__Y, __LY,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
-{
-  return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
-				       (__v16qi)__Y, __LY,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
-{
-  return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
-				       (__v16qi)__Y, __LY,
-				       __M);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
-{
-  return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
-				       (__v16qi)__Y, __LY,
-				       __M);
-}
-#else
-#define _mm_cmpistra(X, Y, M)						\
-  ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X),		\
-				       (__v16qi)(__m128i)(Y), (int)(M)))
-#define _mm_cmpistrc(X, Y, M)						\
-  ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X),		\
-				       (__v16qi)(__m128i)(Y), (int)(M)))
-#define _mm_cmpistro(X, Y, M)						\
-  ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X),		\
-				       (__v16qi)(__m128i)(Y), (int)(M)))
-#define _mm_cmpistrs(X, Y, M)						\
-  ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X),		\
-				       (__v16qi)(__m128i)(Y), (int)(M)))
-#define _mm_cmpistrz(X, Y, M)						\
-  ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X),		\
-				       (__v16qi)(__m128i)(Y), (int)(M)))
-
-#define _mm_cmpestra(X, LX, Y, LY, M)					\
-  ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
-				       (__v16qi)(__m128i)(Y), (int)(LY), \
-				       (int)(M)))
-#define _mm_cmpestrc(X, LX, Y, LY, M)					\
-  ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
-				       (__v16qi)(__m128i)(Y), (int)(LY), \
-				       (int)(M)))
-#define _mm_cmpestro(X, LX, Y, LY, M)					\
-  ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
-				       (__v16qi)(__m128i)(Y), (int)(LY), \
-				       (int)(M)))
-#define _mm_cmpestrs(X, LX, Y, LY, M)					\
-  ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
-				       (__v16qi)(__m128i)(Y), (int)(LY), \
-				       (int)(M)))
-#define _mm_cmpestrz(X, LX, Y, LY, M)					\
-  ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
-				       (__v16qi)(__m128i)(Y), (int)(LY), \
-				       (int)(M)))
-#endif
-
-/* Packed integer 64-bit comparison, zeroing or filling with ones
-   corresponding parts of result.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) ((__v2di)__X > (__v2di)__Y);
-}
-
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_2__ */
-
-#ifdef __DISABLE_SSE4_1__
-#undef __DISABLE_SSE4_1__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_1__ */
-
-#include <popcntintrin.h>
-
-#ifndef __SSE4_1__
-#pragma GCC push_options
-#pragma GCC target("sse4.1")
-#define __DISABLE_SSE4_1__
-#endif /* __SSE4_1__ */
-
-#ifndef __SSE4_2__
-#pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
-#endif /* __SSE4_1__ */
-
-/* Accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u8 (unsigned int __C, unsigned char __V)
-{
-  return __builtin_ia32_crc32qi (__C, __V);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u16 (unsigned int __C, unsigned short __V)
-{
-  return __builtin_ia32_crc32hi (__C, __V);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u32 (unsigned int __C, unsigned int __V)
-{
-  return __builtin_ia32_crc32si (__C, __V);
-}
-
-#ifdef __x86_64__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
-{
-  return __builtin_ia32_crc32di (__C, __V);
-}
-#endif
-
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_2__ */
-
-#ifdef __DISABLE_SSE4_1__
-#undef __DISABLE_SSE4_1__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_1__ */
-
-#endif /* _SMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/ssp.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/ssp.h	(revision 1046)
+++ 	(revision )
@@ -1,65 +1,0 @@
-/* Object size checking support macros.
-   Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-
-#ifndef _SSP_H
-#define _SSP_H 1
-
-#if _FORTIFY_SOURCE > 0 && __OPTIMIZE__ > 0 \
-    && defined __GNUC__ \
-    && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) \
-    && !defined __cplusplus
-# if _FORTIFY_SOURCE == 1
-#  define __SSP_FORTIFY_LEVEL 1
-# elif _FORTIFY_SOURCE > 1
-#  define __SSP_FORTIFY_LEVEL 2
-# endif
-#endif
-
-#if __SSP_FORTIFY_LEVEL > 0
-# include <stddef.h>
-# define __ssp_bos(ptr) __builtin_object_size (ptr, __SSP_FORTIFY_LEVEL > 1)
-# define __ssp_bos0(ptr) __builtin_object_size (ptr, 0)
-
-# define __SSP_REDIRECT(name, proto, alias) \
-  name proto __asm__ (__SSP_ASMNAME (#alias))
-# define __SSP_ASMNAME(cname)  __SSP_ASMNAME2 (__USER_LABEL_PREFIX__, cname)
-# define __SSP_ASMNAME2(prefix, cname) __SSP_ASMNAME3 (prefix) cname
-# define __SSP_ASMNAME3(prefix) #prefix
-
-# undef __SSP_HAVE_VSNPRINTF
-
-extern void __chk_fail (void) __attribute__((__noreturn__));
-#endif
-
-#endif /* _SSP_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/stdio.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/stdio.h	(revision 1046)
+++ 	(revision )
@@ -1,100 +1,0 @@
-/* Checking macros for stdio functions.
-   Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-
-#ifndef _SSP_STDIO_H
-#define _SSP_STDIO_H 1
-
-#include <ssp.h>
-#include_next <stdio.h>
-
-#if __SSP_FORTIFY_LEVEL > 0
-
-#include <stdarg.h>
-
-#undef sprintf
-#undef vsprintf
-#undef snprintf
-#undef vsnprintf
-#undef gets
-#undef fgets
-
-extern int __sprintf_chk (char *__restrict__ __s, int __flag, size_t __slen,
-			  __const char *__restrict__ __format, ...);
-extern int __vsprintf_chk (char *__restrict__ __s, int __flag, size_t __slen,
-			   __const char *__restrict__ __format,
-			   va_list __ap);
-
-#define sprintf(str, ...) \
-  __builtin___sprintf_chk (str, 0, __ssp_bos (str), \
-			   __VA_ARGS__)
-#define vsprintf(str, fmt, ap) \
-  __builtin___vsprintf_chk (str, 0, __ssp_bos (str), fmt, ap)
-
-extern int __snprintf_chk (char *__restrict__ __s, size_t __n, int __flag,
-			   size_t __slen, __const char *__restrict__ __format,
-			   ...);
-extern int __vsnprintf_chk (char *__restrict__ __s, size_t __n, int __flag,
-			    size_t __slen, __const char *__restrict__ __format,
-			    va_list __ap);
-
-#define snprintf(str, len, ...) \
-  __builtin___snprintf_chk (str, len, 0, __ssp_bos (str), __VA_ARGS__)
-#define vsnprintf(str, len, fmt, ap) \
-  __builtin___vsnprintf_chk (str, len, 0, __ssp_bos (str), fmt, ap)
-
-extern char *__gets_chk (char *__str, size_t);
-extern char *__SSP_REDIRECT (__gets_alias, (char *__str), gets);
-
-extern inline __attribute__((__always_inline__)) char *
-gets (char *__str)
-{
-  if (__ssp_bos (__str) != (size_t) -1)
-    return __gets_chk (__str, __ssp_bos (__str));
-  return __gets_alias (__str);
-}
-
-extern char *__SSP_REDIRECT (__fgets_alias,
-			     (char *__restrict__ __s, int __n,
-			      FILE *__restrict__ __stream), fgets);
-
-extern inline __attribute__((__always_inline__)) char *
-fgets (char *__restrict__ __s, int __n, FILE *__restrict__ __stream)
-{
-  if (__ssp_bos (__s) != (size_t) -1 && (size_t) __n > __ssp_bos (__s))
-    __chk_fail ();
-  return __fgets_alias (__s, __n, __stream);
-}
-
-#endif /* __SSP_FORTIFY_LEVEL > 0 */
-#endif /* _SSP_STDIO_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/string.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/string.h	(revision 1046)
+++ 	(revision )
@@ -1,167 +1,0 @@
-/* Checking macros for string functions.
-   Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-
-#ifndef _SSP_STRING_H
-#define _SSP_STRING_H 1
-
-#include <ssp.h>
-#include_next <string.h>
-
-#if __SSP_FORTIFY_LEVEL > 0
-
-#undef memcpy
-#undef memmove
-#undef memset
-#undef strcat
-#undef strcpy
-#undef strncat
-#undef strncpy
-#undef mempcpy
-#undef stpcpy
-#undef bcopy
-#undef bzero
-
-#define memcpy(dest, src, len) \
-  ((__ssp_bos0 (dest) != (size_t) -1)					\
-   ? __builtin___memcpy_chk (dest, src, len, __ssp_bos0 (dest))		\
-   : __memcpy_ichk (dest, src, len))
-static inline __attribute__((__always_inline__)) void *
-__memcpy_ichk (void *__restrict__ __dest, const void *__restrict__ __src,
-	       size_t __len)
-{
-  return __builtin___memcpy_chk (__dest, __src, __len, __ssp_bos0 (__dest));
-}
-
-
-#define memmove(dest, src, len) \
-  ((__ssp_bos0 (dest) != (size_t) -1)					\
-   ? __builtin___memmove_chk (dest, src, len, __ssp_bos0 (dest))		\
-   : __memmove_ichk (dest, src, len))
-static inline __attribute__((__always_inline__)) void *
-__memmove_ichk (void *__dest, const void *__src, size_t __len)
-{
-  return __builtin___memmove_chk (__dest, __src, __len, __ssp_bos0 (__dest));
-}
-
-
-#define mempcpy(dest, src, len) \
-  ((__ssp_bos0 (dest) != (size_t) -1)					\
-   ? __builtin___mempcpy_chk (dest, src, len, __ssp_bos0 (dest))	\
-   : __mempcpy_ichk (dest, src, len))
-static inline __attribute__((__always_inline__)) void *
-__mempcpy_ichk (void *__restrict__ __dest, const void *__restrict__ __src,
-		size_t __len)
-{
-  return __builtin___mempcpy_chk (__dest, __src, __len, __ssp_bos0 (__dest));
-}
-
-
-#define memset(dest, ch, len) \
-  ((__ssp_bos0 (dest) != (size_t) -1)					\
-   ? __builtin___memset_chk (dest, ch, len, __ssp_bos0 (dest))		\
-   : __memset_ichk (dest, ch, len))
-static inline __attribute__((__always_inline__)) void *
-__memset_ichk (void *__dest, int __ch, size_t __len)
-{
-  return __builtin___memset_chk (__dest, __ch, __len, __ssp_bos0 (__dest));
-}
-
-#define bcopy(src, dest, len) ((void) \
- ((__ssp_bos0 (dest) != (size_t) -1)					\
-   ? __builtin___memmove_chk (dest, src, len, __ssp_bos0 (dest))	\
-   : __memmove_ichk (dest, src, len)))
-#define bzero(dest, len) ((void) \
-  ((__ssp_bos0 (dest) != (size_t) -1)					\
-   ? __builtin___memset_chk (dest, '\0', len, __ssp_bos0 (dest))	\
-   : __memset_ichk (dest, '\0', len)))
-
-
-#define strcpy(dest, src) \
-  ((__ssp_bos (dest) != (size_t) -1)					\
-   ? __builtin___strcpy_chk (dest, src, __ssp_bos (dest))		\
-   : __strcpy_ichk (dest, src))
-static inline __attribute__((__always_inline__)) char *
-__strcpy_ichk (char *__restrict__ __dest, const char *__restrict__ __src)
-{
-  return __builtin___strcpy_chk (__dest, __src, __ssp_bos (__dest));
-}
-
-
-#define stpcpy(dest, src) \
-  ((__ssp_bos (dest) != (size_t) -1)					\
-   ? __builtin___stpcpy_chk (dest, src, __ssp_bos (dest))		\
-   : __stpcpy_ichk (dest, src))
-static inline __attribute__((__always_inline__)) char *
-__stpcpy_ichk (char *__restrict__ __dest, const char *__restrict__ __src)
-{
-  return __builtin___stpcpy_chk (__dest, __src, __ssp_bos (__dest));
-}
-
-
-#define strncpy(dest, src, len) \
-  ((__ssp_bos (dest) != (size_t) -1)					\
-   ? __builtin___strncpy_chk (dest, src, len, __ssp_bos (dest))		\
-   : __strncpy_ichk (dest, src, len))
-static inline __attribute__((__always_inline__)) char *
-__strncpy_ichk (char *__restrict__ __dest, const char *__restrict__ __src,
-		size_t __len)
-{
-  return __builtin___strncpy_chk (__dest, __src, __len, __ssp_bos (__dest));
-}
-
-
-#define strcat(dest, src) \
-  ((__ssp_bos (dest) != (size_t) -1)					\
-   ? __builtin___strcat_chk (dest, src, __ssp_bos (dest))		\
-   : __strcat_ichk (dest, src))
-static inline __attribute__((__always_inline__)) char *
-__strcat_ichk (char *__restrict__ __dest, const char *__restrict__ __src)
-{
-  return __builtin___strcat_chk (__dest, __src, __ssp_bos (__dest));
-}
-
-
-#define strncat(dest, src, len) \
-  ((__ssp_bos (dest) != (size_t) -1)					\
-   ? __builtin___strncat_chk (dest, src, len, __ssp_bos (dest))		\
-   : __strncat_ichk (dest, src, len))
-static inline __attribute__((__always_inline__)) char *
-__strncat_ichk (char *__restrict__ __dest, const char *__restrict__ __src,
-		size_t __len)
-{
-  return __builtin___strncat_chk (__dest, __src, __len, __ssp_bos (__dest));
-}
-
-#endif /* __SSP_FORTIFY_LEVEL > 0 */
-#endif /* _SSP_STRING_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/unistd.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/ssp/unistd.h	(revision 1046)
+++ 	(revision )
@@ -1,84 +1,0 @@
-/* Checking macros for unistd functions.
-   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-
-#ifndef _SSP_UNISTD_H
-#define _SSP_UNISTD_H 1
-
-#include <ssp.h>
-#include_next <unistd.h>
-
-#if __SSP_FORTIFY_LEVEL > 0
-
-#undef read
-#undef readlink
-#undef getcwd
-
-extern ssize_t __SSP_REDIRECT (__read_alias, (int __fd, void *__buf,
-					      size_t __nbytes), read);
-
-extern inline __attribute__((__always_inline__)) ssize_t
-read (int __fd, void *__buf, size_t __nbytes)
-{
-  if (__ssp_bos0 (__buf) != (size_t) -1 && __nbytes > __ssp_bos0 (__buf))
-    __chk_fail ();
-  return __read_alias (__fd, __buf, __nbytes);
-}
-
-extern int __SSP_REDIRECT (__readlink_alias,
-			   (const char *__restrict__ __path,
-			    char *__restrict__ __buf, size_t __len),
-			   readlink);
-
-extern inline __attribute__((__always_inline__)) int
-readlink (const char *__restrict__ __path, char *__restrict__ __buf,
-	  size_t __len)
-{
-  if (__ssp_bos (__buf) != (size_t) -1 && __len > __ssp_bos (__buf))
-    __chk_fail ();
-  return __readlink_alias (__path, __buf, __len);
-}
-
-extern char *__SSP_REDIRECT (__getcwd_alias,
-			     (char *__buf, size_t __size), getcwd);
-
-extern inline __attribute__((__always_inline__)) char *
-getcwd (char *__buf, size_t __size)
-{
-  if (__ssp_bos (__buf) != (size_t) -1 && __size > __ssp_bos (__buf))
-    __chk_fail ();
-  return __getcwd_alias (__buf, __size);
-}
-
-#endif /* __SSP_FORTIFY_LEVEL > 0 */
-#endif /* _SSP_UNISTD_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdalign.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdalign.h	(revision 1046)
+++ 	(revision )
@@ -1,39 +1,0 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* ISO C1X: 7.15 Alignment <stdalign.h>.  */
-
-#ifndef _STDALIGN_H
-#define _STDALIGN_H
-
-#ifndef __cplusplus
-
-#define alignas _Alignas
-#define alignof _Alignof
-
-#define __alignas_is_defined 1
-#define __alignof_is_defined 1
-
-#endif
-
-#endif	/* stdalign.h */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdarg.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdarg.h	(revision 1046)
+++ 	(revision )
@@ -1,126 +1,0 @@
-/* Copyright (C) 1989-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/*
- * ISO C Standard:  7.15  Variable arguments  <stdarg.h>
- */
-
-#ifndef _STDARG_H
-#ifndef _ANSI_STDARG_H_
-#ifndef __need___va_list
-#define _STDARG_H
-#define _ANSI_STDARG_H_
-#endif /* not __need___va_list */
-#undef __need___va_list
-
-/* Define __gnuc_va_list.  */
-
-#ifndef __GNUC_VA_LIST
-#define __GNUC_VA_LIST
-typedef __builtin_va_list __gnuc_va_list;
-#endif
-
-/* Define the standard macros for the user,
-   if this invocation was from the user program.  */
-#ifdef _STDARG_H
-
-#define va_start(v,l)	__builtin_va_start(v,l)
-#define va_end(v)	__builtin_va_end(v)
-#define va_arg(v,l)	__builtin_va_arg(v,l)
-#if !defined(__STRICT_ANSI__) || __STDC_VERSION__ + 0 >= 199900L || defined(__GXX_EXPERIMENTAL_CXX0X__)
-#define va_copy(d,s)	__builtin_va_copy(d,s)
-#endif
-#define __va_copy(d,s)	__builtin_va_copy(d,s)
-
-/* Define va_list, if desired, from __gnuc_va_list. */
-/* We deliberately do not define va_list when called from
-   stdio.h, because ANSI C says that stdio.h is not supposed to define
-   va_list.  stdio.h needs to have access to that data type, 
-   but must not use that name.  It should use the name __gnuc_va_list,
-   which is safe because it is reserved for the implementation.  */
-
-#ifdef _BSD_VA_LIST
-#undef _BSD_VA_LIST
-#endif
-
-#if defined(__svr4__) || (defined(_SCO_DS) && !defined(__VA_LIST))
-/* SVR4.2 uses _VA_LIST for an internal alias for va_list,
-   so we must avoid testing it and setting it here.
-   SVR4 uses _VA_LIST as a flag in stdarg.h, but we should
-   have no conflict with that.  */
-#ifndef _VA_LIST_
-#define _VA_LIST_
-#ifdef __i860__
-#ifndef _VA_LIST
-#define _VA_LIST va_list
-#endif
-#endif /* __i860__ */
-typedef __gnuc_va_list va_list;
-#ifdef _SCO_DS
-#define __VA_LIST
-#endif
-#endif /* _VA_LIST_ */
-#else /* not __svr4__ || _SCO_DS */
-
-/* The macro _VA_LIST_ is the same thing used by this file in Ultrix.
-   But on BSD NET2 we must not test or define or undef it.
-   (Note that the comments in NET 2's ansi.h
-   are incorrect for _VA_LIST_--see stdio.h!)  */
-#if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT)
-/* The macro _VA_LIST_DEFINED is used in Windows NT 3.5  */
-#ifndef _VA_LIST_DEFINED
-/* The macro _VA_LIST is used in SCO Unix 3.2.  */
-#ifndef _VA_LIST
-/* The macro _VA_LIST_T_H is used in the Bull dpx2  */
-#ifndef _VA_LIST_T_H
-/* The macro __va_list__ is used by BeOS.  */
-#ifndef __va_list__
-typedef __gnuc_va_list va_list;
-#endif /* not __va_list__ */
-#endif /* not _VA_LIST_T_H */
-#endif /* not _VA_LIST */
-#endif /* not _VA_LIST_DEFINED */
-#if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__))
-#define _VA_LIST_
-#endif
-#ifndef _VA_LIST
-#define _VA_LIST
-#endif
-#ifndef _VA_LIST_DEFINED
-#define _VA_LIST_DEFINED
-#endif
-#ifndef _VA_LIST_T_H
-#define _VA_LIST_T_H
-#endif
-#ifndef __va_list__
-#define __va_list__
-#endif
-
-#endif /* not _VA_LIST_, except on certain systems */
-
-#endif /* not __svr4__ */
-
-#endif /* _STDARG_H */
-
-#endif /* not _ANSI_STDARG_H_ */
-#endif /* not _STDARG_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdatomic.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdatomic.h	(revision 1046)
+++ 	(revision )
@@ -1,238 +1,0 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* ISO C11 Standard:  7.17  Atomics <stdatomic.h>.  */
-
-#ifndef _STDATOMIC_H
-#define _STDATOMIC_H
-
-typedef enum
-  {
-    memory_order_relaxed = __ATOMIC_RELAXED,
-    memory_order_consume = __ATOMIC_CONSUME,
-    memory_order_acquire = __ATOMIC_ACQUIRE,
-    memory_order_release = __ATOMIC_RELEASE,
-    memory_order_acq_rel = __ATOMIC_ACQ_REL,
-    memory_order_seq_cst = __ATOMIC_SEQ_CST
-  } memory_order;
-
-
-typedef _Atomic _Bool atomic_bool;
-typedef _Atomic char atomic_char;
-typedef _Atomic signed char atomic_schar;
-typedef _Atomic unsigned char atomic_uchar;
-typedef _Atomic short atomic_short;
-typedef _Atomic unsigned short atomic_ushort;
-typedef _Atomic int atomic_int;
-typedef _Atomic unsigned int atomic_uint;
-typedef _Atomic long atomic_long;
-typedef _Atomic unsigned long atomic_ulong;
-typedef _Atomic long long atomic_llong;
-typedef _Atomic unsigned long long atomic_ullong;
-typedef _Atomic __CHAR16_TYPE__ atomic_char16_t;
-typedef _Atomic __CHAR32_TYPE__ atomic_char32_t;
-typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t;
-typedef _Atomic __INT_LEAST8_TYPE__ atomic_int_least8_t;
-typedef _Atomic __UINT_LEAST8_TYPE__ atomic_uint_least8_t;
-typedef _Atomic __INT_LEAST16_TYPE__ atomic_int_least16_t;
-typedef _Atomic __UINT_LEAST16_TYPE__ atomic_uint_least16_t;
-typedef _Atomic __INT_LEAST32_TYPE__ atomic_int_least32_t;
-typedef _Atomic __UINT_LEAST32_TYPE__ atomic_uint_least32_t;
-typedef _Atomic __INT_LEAST64_TYPE__ atomic_int_least64_t;
-typedef _Atomic __UINT_LEAST64_TYPE__ atomic_uint_least64_t;
-typedef _Atomic __INT_FAST8_TYPE__ atomic_int_fast8_t;
-typedef _Atomic __UINT_FAST8_TYPE__ atomic_uint_fast8_t;
-typedef _Atomic __INT_FAST16_TYPE__ atomic_int_fast16_t;
-typedef _Atomic __UINT_FAST16_TYPE__ atomic_uint_fast16_t;
-typedef _Atomic __INT_FAST32_TYPE__ atomic_int_fast32_t;
-typedef _Atomic __UINT_FAST32_TYPE__ atomic_uint_fast32_t;
-typedef _Atomic __INT_FAST64_TYPE__ atomic_int_fast64_t;
-typedef _Atomic __UINT_FAST64_TYPE__ atomic_uint_fast64_t;
-typedef _Atomic __INTPTR_TYPE__ atomic_intptr_t;
-typedef _Atomic __UINTPTR_TYPE__ atomic_uintptr_t;
-typedef _Atomic __SIZE_TYPE__ atomic_size_t;
-typedef _Atomic __PTRDIFF_TYPE__ atomic_ptrdiff_t;
-typedef _Atomic __INTMAX_TYPE__ atomic_intmax_t;
-typedef _Atomic __UINTMAX_TYPE__ atomic_uintmax_t;        
-
-
-#define ATOMIC_VAR_INIT(VALUE)	(VALUE)
-#define atomic_init(PTR, VAL)			\
-  do						\
-    {						\
-      *(PTR) = (VAL);				\
-    }						\
-  while (0)
-
-#define kill_dependency(Y)			\
-  __extension__					\
-  ({						\
-    __auto_type __kill_dependency_tmp = (Y);	\
-    __kill_dependency_tmp;			\
-  })
-
-#define atomic_thread_fence(MO)	__atomic_thread_fence (MO)
-#define atomic_signal_fence(MO)	__atomic_signal_fence  (MO)
-#define atomic_is_lock_free(OBJ) __atomic_is_lock_free (sizeof (*(OBJ)), (OBJ))
-
-#define ATOMIC_BOOL_LOCK_FREE		__GCC_ATOMIC_BOOL_LOCK_FREE
-#define ATOMIC_CHAR_LOCK_FREE		__GCC_ATOMIC_CHAR_LOCK_FREE
-#define ATOMIC_CHAR16_T_LOCK_FREE	__GCC_ATOMIC_CHAR16_T_LOCK_FREE
-#define ATOMIC_CHAR32_T_LOCK_FREE	__GCC_ATOMIC_CHAR32_T_LOCK_FREE
-#define ATOMIC_WCHAR_T_LOCK_FREE	__GCC_ATOMIC_WCHAR_T_LOCK_FREE
-#define ATOMIC_SHORT_LOCK_FREE		__GCC_ATOMIC_SHORT_LOCK_FREE
-#define ATOMIC_INT_LOCK_FREE		__GCC_ATOMIC_INT_LOCK_FREE
-#define ATOMIC_LONG_LOCK_FREE		__GCC_ATOMIC_LONG_LOCK_FREE
-#define ATOMIC_LLONG_LOCK_FREE		__GCC_ATOMIC_LLONG_LOCK_FREE
-#define ATOMIC_POINTER_LOCK_FREE	__GCC_ATOMIC_POINTER_LOCK_FREE
-
-
-/* Note that these macros require __typeof__ and __auto_type to remove
-   _Atomic qualifiers (and const qualifiers, if those are valid on
-   macro operands).
-   
-   Also note that the header file uses the generic form of __atomic
-   builtins, which requires the address to be taken of the value
-   parameter, and then we pass that value on.  This allows the macros
-   to work for any type, and the compiler is smart enough to convert
-   these to lock-free _N variants if possible, and throw away the
-   temps.  */
-
-#define atomic_store_explicit(PTR, VAL, MO)				\
-  __extension__								\
-  ({									\
-    __auto_type __atomic_store_ptr = (PTR);				\
-    __typeof__ (*__atomic_store_ptr) __atomic_store_tmp = (VAL);	\
-    __atomic_store (__atomic_store_ptr, &__atomic_store_tmp, (MO));	\
-  })
-
-#define atomic_store(PTR, VAL)				\
-  atomic_store_explicit (PTR, VAL, __ATOMIC_SEQ_CST)
-
-
-#define atomic_load_explicit(PTR, MO)					\
-  __extension__								\
-  ({									\
-    __auto_type __atomic_load_ptr = (PTR);				\
-    __typeof__ (*__atomic_load_ptr) __atomic_load_tmp;			\
-    __atomic_load (__atomic_load_ptr, &__atomic_load_tmp, (MO));	\
-    __atomic_load_tmp;							\
-  })
-
-#define atomic_load(PTR)  atomic_load_explicit (PTR, __ATOMIC_SEQ_CST)
-
-
-#define atomic_exchange_explicit(PTR, VAL, MO)				\
-  __extension__								\
-  ({									\
-    __auto_type __atomic_exchange_ptr = (PTR);				\
-    __typeof__ (*__atomic_exchange_ptr) __atomic_exchange_val = (VAL);	\
-    __typeof__ (*__atomic_exchange_ptr) __atomic_exchange_tmp;		\
-    __atomic_exchange (__atomic_exchange_ptr, &__atomic_exchange_val,	\
-		       &__atomic_exchange_tmp, (MO));			\
-    __atomic_exchange_tmp;						\
-  })
-
-#define atomic_exchange(PTR, VAL) 			\
-  atomic_exchange_explicit (PTR, VAL, __ATOMIC_SEQ_CST)
-
-
-#define atomic_compare_exchange_strong_explicit(PTR, VAL, DES, SUC, FAIL) \
-  __extension__								\
-  ({									\
-    __auto_type __atomic_compare_exchange_ptr = (PTR);			\
-    __typeof__ (*__atomic_compare_exchange_ptr) __atomic_compare_exchange_tmp \
-      = (DES);								\
-    __atomic_compare_exchange (__atomic_compare_exchange_ptr, (VAL),	\
-			       &__atomic_compare_exchange_tmp, 0,	\
-			       (SUC), (FAIL));				\
-  })
-
-#define atomic_compare_exchange_strong(PTR, VAL, DES) 			   \
-  atomic_compare_exchange_strong_explicit (PTR, VAL, DES, __ATOMIC_SEQ_CST, \
-					   __ATOMIC_SEQ_CST)
-
-#define atomic_compare_exchange_weak_explicit(PTR, VAL, DES, SUC, FAIL) \
-  __extension__								\
-  ({									\
-    __auto_type __atomic_compare_exchange_ptr = (PTR);			\
-    __typeof__ (*__atomic_compare_exchange_ptr) __atomic_compare_exchange_tmp \
-      = (DES);								\
-    __atomic_compare_exchange (__atomic_compare_exchange_ptr, (VAL),	\
-			       &__atomic_compare_exchange_tmp, 1,	\
-			       (SUC), (FAIL));				\
-  })
-
-#define atomic_compare_exchange_weak(PTR, VAL, DES)			\
-  atomic_compare_exchange_weak_explicit (PTR, VAL, DES, __ATOMIC_SEQ_CST, \
-					 __ATOMIC_SEQ_CST)
-
-
-
-#define atomic_fetch_add(PTR, VAL) __atomic_fetch_add ((PTR), (VAL), 	\
-						       __ATOMIC_SEQ_CST)
-#define atomic_fetch_add_explicit(PTR, VAL, MO) 			\
-			  __atomic_fetch_add ((PTR), (VAL), (MO))
-
-#define atomic_fetch_sub(PTR, VAL) __atomic_fetch_sub ((PTR), (VAL), 	\
-						       __ATOMIC_SEQ_CST)
-#define atomic_fetch_sub_explicit(PTR, VAL, MO) 			\
-			  __atomic_fetch_sub ((PTR), (VAL), (MO))
-
-#define atomic_fetch_or(PTR, VAL) __atomic_fetch_or ((PTR), (VAL), 	\
-						       __ATOMIC_SEQ_CST)
-#define atomic_fetch_or_explicit(PTR, VAL, MO) 			\
-			  __atomic_fetch_or ((PTR), (VAL), (MO))
-
-#define atomic_fetch_xor(PTR, VAL) __atomic_fetch_xor ((PTR), (VAL), 	\
-						       __ATOMIC_SEQ_CST)
-#define atomic_fetch_xor_explicit(PTR, VAL, MO) 			\
-			  __atomic_fetch_xor ((PTR), (VAL), (MO))
-
-#define atomic_fetch_and(PTR, VAL) __atomic_fetch_and ((PTR), (VAL), 	\
-						       __ATOMIC_SEQ_CST)
-#define atomic_fetch_and_explicit(PTR, VAL, MO) 			\
-			  __atomic_fetch_and ((PTR), (VAL), (MO))
-
-
-typedef _Atomic struct
-{
-#if __GCC_ATOMIC_TEST_AND_SET_TRUEVAL == 1
-  _Bool __val;
-#else
-  unsigned char __val;
-#endif
-} atomic_flag;
-
-#define ATOMIC_FLAG_INIT	{ 0 }
-
-
-#define atomic_flag_test_and_set(PTR) 					\
-			__atomic_test_and_set ((PTR), __ATOMIC_SEQ_CST)
-#define atomic_flag_test_and_set_explicit(PTR, MO)			\
-			__atomic_test_and_set ((PTR), (MO))
-
-#define atomic_flag_clear(PTR)	__atomic_clear ((PTR), __ATOMIC_SEQ_CST)
-#define atomic_flag_clear_explicit(PTR, MO)   __atomic_clear ((PTR), (MO))
-
-#endif  /* _STDATOMIC_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdbool.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdbool.h	(revision 1046)
+++ 	(revision )
@@ -1,54 +1,0 @@
-/* Copyright (C) 1998-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/*
- * ISO C Standard:  7.16  Boolean type and values  <stdbool.h>
- */
-
-#ifndef _STDBOOL_H
-#define _STDBOOL_H
-
-#ifndef __cplusplus
-
-#define bool	_Bool
-#define true	1
-#define false	0
-
-#else /* __cplusplus */
-
-/* Supporting _Bool in C++ is a GCC extension.  */
-#define _Bool	bool
-
-#if __cplusplus < 201103L
-/* Defining these macros in C++98 is a GCC extension.  */
-#define bool	bool
-#define false	false
-#define true	true
-#endif
-
-#endif /* __cplusplus */
-
-/* Signal that all the definitions are present.  */
-#define __bool_true_false_are_defined	1
-
-#endif	/* stdbool.h */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stddef.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stddef.h	(revision 1046)
+++ 	(revision )
@@ -1,443 +1,0 @@
-/* Copyright (C) 1989-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/*
- * ISO C Standard:  7.17  Common definitions  <stddef.h>
- */
-#if (!defined(_STDDEF_H) && !defined(_STDDEF_H_) && !defined(_ANSI_STDDEF_H) \
-     && !defined(__STDDEF_H__)) \
-    || defined(__need_wchar_t) || defined(__need_size_t) \
-    || defined(__need_ptrdiff_t) || defined(__need_NULL) \
-    || defined(__need_wint_t)
-
-/* Any one of these symbols __need_* means that GNU libc
-   wants us just to define one data type.  So don't define
-   the symbols that indicate this file's entire job has been done.  */
-#if (!defined(__need_wchar_t) && !defined(__need_size_t)	\
-     && !defined(__need_ptrdiff_t) && !defined(__need_NULL)	\
-     && !defined(__need_wint_t))
-#define _STDDEF_H
-#define _STDDEF_H_
-/* snaroff@next.com says the NeXT needs this.  */
-#define _ANSI_STDDEF_H
-#endif
-
-#ifndef __sys_stdtypes_h
-/* This avoids lossage on SunOS but only if stdtypes.h comes first.
-   There's no way to win with the other order!  Sun lossage.  */
-
-/* On 4.3bsd-net2, make sure ansi.h is included, so we have
-   one less case to deal with in the following.  */
-#if defined (__BSD_NET2__) || defined (____386BSD____) || (defined (__FreeBSD__) && (__FreeBSD__ < 5)) || defined(__NetBSD__)
-#include <machine/ansi.h>
-#endif
-/* On FreeBSD 5, machine/ansi.h does not exist anymore... */
-#if defined (__FreeBSD__) && (__FreeBSD__ >= 5)
-#include <sys/_types.h>
-#endif
-
-/* In 4.3bsd-net2, machine/ansi.h defines these symbols, which are
-   defined if the corresponding type is *not* defined.
-   FreeBSD-2.1 defines _MACHINE_ANSI_H_ instead of _ANSI_H_.
-   NetBSD defines _I386_ANSI_H_ and _X86_64_ANSI_H_ instead of _ANSI_H_ */
-#if defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_)  || defined(_I386_ANSI_H_)
-#if !defined(_SIZE_T_) && !defined(_BSD_SIZE_T_)
-#define _SIZE_T
-#endif
-#if !defined(_PTRDIFF_T_) && !defined(_BSD_PTRDIFF_T_)
-#define _PTRDIFF_T
-#endif
-/* On BSD/386 1.1, at least, machine/ansi.h defines _BSD_WCHAR_T_
-   instead of _WCHAR_T_. */
-#if !defined(_WCHAR_T_) && !defined(_BSD_WCHAR_T_)
-#ifndef _BSD_WCHAR_T_
-#define _WCHAR_T
-#endif
-#endif
-/* Undef _FOO_T_ if we are supposed to define foo_t.  */
-#if defined (__need_ptrdiff_t) || defined (_STDDEF_H_)
-#undef _PTRDIFF_T_
-#undef _BSD_PTRDIFF_T_
-#endif
-#if defined (__need_size_t) || defined (_STDDEF_H_)
-#undef _SIZE_T_
-#undef _BSD_SIZE_T_
-#endif
-#if defined (__need_wchar_t) || defined (_STDDEF_H_)
-#undef _WCHAR_T_
-#undef _BSD_WCHAR_T_
-#endif
-#endif /* defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_) */
-
-/* Sequent's header files use _PTRDIFF_T_ in some conflicting way.
-   Just ignore it.  */
-#if defined (__sequent__) && defined (_PTRDIFF_T_)
-#undef _PTRDIFF_T_
-#endif
-
-/* On VxWorks, <type/vxTypesBase.h> may have defined macros like
-   _TYPE_size_t which will typedef size_t.  fixincludes patched the
-   vxTypesBase.h so that this macro is only defined if _GCC_SIZE_T is
-   not defined, and so that defining this macro defines _GCC_SIZE_T.
-   If we find that the macros are still defined at this point, we must
-   invoke them so that the type is defined as expected.  */
-#if defined (_TYPE_ptrdiff_t) && (defined (__need_ptrdiff_t) || defined (_STDDEF_H_))
-_TYPE_ptrdiff_t;
-#undef _TYPE_ptrdiff_t
-#endif
-#if defined (_TYPE_size_t) && (defined (__need_size_t) || defined (_STDDEF_H_))
-_TYPE_size_t;
-#undef _TYPE_size_t
-#endif
-#if defined (_TYPE_wchar_t) && (defined (__need_wchar_t) || defined (_STDDEF_H_))
-_TYPE_wchar_t;
-#undef _TYPE_wchar_t
-#endif
-
-/* In case nobody has defined these types, but we aren't running under
-   GCC 2.00, make sure that __PTRDIFF_TYPE__, __SIZE_TYPE__, and
-   __WCHAR_TYPE__ have reasonable values.  This can happen if the
-   parts of GCC is compiled by an older compiler, that actually
-   include gstddef.h, such as collect2.  */
-
-/* Signed type of difference of two pointers.  */
-
-/* Define this type if we are doing the whole job,
-   or if we want this type in particular.  */
-#if defined (_STDDEF_H) || defined (__need_ptrdiff_t)
-#ifndef _PTRDIFF_T	/* in case <sys/types.h> has defined it. */
-#ifndef _T_PTRDIFF_
-#ifndef _T_PTRDIFF
-#ifndef __PTRDIFF_T
-#ifndef _PTRDIFF_T_
-#ifndef _BSD_PTRDIFF_T_
-#ifndef ___int_ptrdiff_t_h
-#ifndef _GCC_PTRDIFF_T
-#ifndef _PTRDIFF_T_DECLARED /* DragonFly */
-#define _PTRDIFF_T
-#define _T_PTRDIFF_
-#define _T_PTRDIFF
-#define __PTRDIFF_T
-#define _PTRDIFF_T_
-#define _BSD_PTRDIFF_T_
-#define ___int_ptrdiff_t_h
-#define _GCC_PTRDIFF_T
-#define _PTRDIFF_T_DECLARED
-#ifndef __PTRDIFF_TYPE__
-#define __PTRDIFF_TYPE__ long int
-#endif
-typedef __PTRDIFF_TYPE__ ptrdiff_t;
-#endif /* _PTRDIFF_T_DECLARED */
-#endif /* _GCC_PTRDIFF_T */
-#endif /* ___int_ptrdiff_t_h */
-#endif /* _BSD_PTRDIFF_T_ */
-#endif /* _PTRDIFF_T_ */
-#endif /* __PTRDIFF_T */
-#endif /* _T_PTRDIFF */
-#endif /* _T_PTRDIFF_ */
-#endif /* _PTRDIFF_T */
-
-/* If this symbol has done its job, get rid of it.  */
-#undef	__need_ptrdiff_t
-
-#endif /* _STDDEF_H or __need_ptrdiff_t.  */
-
-/* Unsigned type of `sizeof' something.  */
-
-/* Define this type if we are doing the whole job,
-   or if we want this type in particular.  */
-#if defined (_STDDEF_H) || defined (__need_size_t)
-#ifndef __size_t__	/* BeOS */
-#ifndef __SIZE_T__	/* Cray Unicos/Mk */
-#ifndef _SIZE_T	/* in case <sys/types.h> has defined it. */
-#ifndef _SYS_SIZE_T_H
-#ifndef _T_SIZE_
-#ifndef _T_SIZE
-#ifndef __SIZE_T
-#ifndef _SIZE_T_
-#ifndef _BSD_SIZE_T_
-#ifndef _SIZE_T_DEFINED_
-#ifndef _SIZE_T_DEFINED
-#ifndef _BSD_SIZE_T_DEFINED_	/* Darwin */
-#ifndef _SIZE_T_DECLARED	/* FreeBSD 5 */
-#ifndef ___int_size_t_h
-#ifndef _GCC_SIZE_T
-#ifndef _SIZET_
-#ifndef __size_t
-#define __size_t__	/* BeOS */
-#define __SIZE_T__	/* Cray Unicos/Mk */
-#define _SIZE_T
-#define _SYS_SIZE_T_H
-#define _T_SIZE_
-#define _T_SIZE
-#define __SIZE_T
-#define _SIZE_T_
-#define _BSD_SIZE_T_
-#define _SIZE_T_DEFINED_
-#define _SIZE_T_DEFINED
-#define _BSD_SIZE_T_DEFINED_	/* Darwin */
-#define _SIZE_T_DECLARED	/* FreeBSD 5 */
-#define ___int_size_t_h
-#define _GCC_SIZE_T
-#define _SIZET_
-#if (defined (__FreeBSD__) && (__FreeBSD__ >= 5)) \
-  || defined(__DragonFly__) \
-  || defined(__FreeBSD_kernel__)
-/* __size_t is a typedef on FreeBSD 5, must not trash it. */
-#elif defined (__VMS__)
-/* __size_t is also a typedef on VMS.  */
-#else
-#define __size_t
-#endif
-#ifndef __SIZE_TYPE__
-#define __SIZE_TYPE__ long unsigned int
-#endif
-#if !(defined (__GNUG__) && defined (size_t))
-typedef __SIZE_TYPE__ size_t;
-#ifdef __BEOS__
-typedef long ssize_t;
-#endif /* __BEOS__ */
-#endif /* !(defined (__GNUG__) && defined (size_t)) */
-#endif /* __size_t */
-#endif /* _SIZET_ */
-#endif /* _GCC_SIZE_T */
-#endif /* ___int_size_t_h */
-#endif /* _SIZE_T_DECLARED */
-#endif /* _BSD_SIZE_T_DEFINED_ */
-#endif /* _SIZE_T_DEFINED */
-#endif /* _SIZE_T_DEFINED_ */
-#endif /* _BSD_SIZE_T_ */
-#endif /* _SIZE_T_ */
-#endif /* __SIZE_T */
-#endif /* _T_SIZE */
-#endif /* _T_SIZE_ */
-#endif /* _SYS_SIZE_T_H */
-#endif /* _SIZE_T */
-#endif /* __SIZE_T__ */
-#endif /* __size_t__ */
-#undef	__need_size_t
-#endif /* _STDDEF_H or __need_size_t.  */
-
-
-/* Wide character type.
-   Locale-writers should change this as necessary to
-   be big enough to hold unique values not between 0 and 127,
-   and not (wchar_t) -1, for each defined multibyte character.  */
-
-/* Define this type if we are doing the whole job,
-   or if we want this type in particular.  */
-#if defined (_STDDEF_H) || defined (__need_wchar_t)
-#ifndef __wchar_t__	/* BeOS */
-#ifndef __WCHAR_T__	/* Cray Unicos/Mk */
-#ifndef _WCHAR_T
-#ifndef _T_WCHAR_
-#ifndef _T_WCHAR
-#ifndef __WCHAR_T
-#ifndef _WCHAR_T_
-#ifndef _BSD_WCHAR_T_
-#ifndef _BSD_WCHAR_T_DEFINED_    /* Darwin */
-#ifndef _BSD_RUNE_T_DEFINED_	/* Darwin */
-#ifndef _WCHAR_T_DECLARED /* FreeBSD 5 */
-#ifndef _WCHAR_T_DEFINED_
-#ifndef _WCHAR_T_DEFINED
-#ifndef _WCHAR_T_H
-#ifndef ___int_wchar_t_h
-#ifndef __INT_WCHAR_T_H
-#ifndef _GCC_WCHAR_T
-#define __wchar_t__	/* BeOS */
-#define __WCHAR_T__	/* Cray Unicos/Mk */
-#define _WCHAR_T
-#define _T_WCHAR_
-#define _T_WCHAR
-#define __WCHAR_T
-#define _WCHAR_T_
-#define _BSD_WCHAR_T_
-#define _WCHAR_T_DEFINED_
-#define _WCHAR_T_DEFINED
-#define _WCHAR_T_H
-#define ___int_wchar_t_h
-#define __INT_WCHAR_T_H
-#define _GCC_WCHAR_T
-#define _WCHAR_T_DECLARED
-
-/* On BSD/386 1.1, at least, machine/ansi.h defines _BSD_WCHAR_T_
-   instead of _WCHAR_T_, and _BSD_RUNE_T_ (which, unlike the other
-   symbols in the _FOO_T_ family, stays defined even after its
-   corresponding type is defined).  If we define wchar_t, then we
-   must undef _WCHAR_T_; for BSD/386 1.1 (and perhaps others), if
-   we undef _WCHAR_T_, then we must also define rune_t, since 
-   headers like runetype.h assume that if machine/ansi.h is included,
-   and _BSD_WCHAR_T_ is not defined, then rune_t is available.
-   machine/ansi.h says, "Note that _WCHAR_T_ and _RUNE_T_ must be of
-   the same type." */
-#ifdef _BSD_WCHAR_T_
-#undef _BSD_WCHAR_T_
-#ifdef _BSD_RUNE_T_
-#if !defined (_ANSI_SOURCE) && !defined (_POSIX_SOURCE)
-typedef _BSD_RUNE_T_ rune_t;
-#define _BSD_WCHAR_T_DEFINED_
-#define _BSD_RUNE_T_DEFINED_	/* Darwin */
-#if defined (__FreeBSD__) && (__FreeBSD__ < 5)
-/* Why is this file so hard to maintain properly?  In contrast to
-   the comment above regarding BSD/386 1.1, on FreeBSD for as long
-   as the symbol has existed, _BSD_RUNE_T_ must not stay defined or
-   redundant typedefs will occur when stdlib.h is included after this file. */
-#undef _BSD_RUNE_T_
-#endif
-#endif
-#endif
-#endif
-/* FreeBSD 5 can't be handled well using "traditional" logic above
-   since it no longer defines _BSD_RUNE_T_ yet still desires to export
-   rune_t in some cases... */
-#if defined (__FreeBSD__) && (__FreeBSD__ >= 5)
-#if !defined (_ANSI_SOURCE) && !defined (_POSIX_SOURCE)
-#if __BSD_VISIBLE
-#ifndef _RUNE_T_DECLARED
-typedef __rune_t        rune_t;
-#define _RUNE_T_DECLARED
-#endif
-#endif
-#endif
-#endif
-
-#ifndef __WCHAR_TYPE__
-#define __WCHAR_TYPE__ int
-#endif
-#ifndef __cplusplus
-typedef __WCHAR_TYPE__ wchar_t;
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif /* _WCHAR_T_DECLARED */
-#endif /* _BSD_RUNE_T_DEFINED_ */
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif
-#endif /* __WCHAR_T__ */
-#endif /* __wchar_t__ */
-#undef	__need_wchar_t
-#endif /* _STDDEF_H or __need_wchar_t.  */
-
-#if defined (__need_wint_t)
-#ifndef _WINT_T
-#define _WINT_T
-
-#ifndef __WINT_TYPE__
-#define __WINT_TYPE__ unsigned int
-#endif
-typedef __WINT_TYPE__ wint_t;
-#endif
-#undef __need_wint_t
-#endif
-
-/*  In 4.3bsd-net2, leave these undefined to indicate that size_t, etc.
-    are already defined.  */
-/*  BSD/OS 3.1 and FreeBSD [23].x require the MACHINE_ANSI_H check here.  */
-/*  NetBSD 5 requires the I386_ANSI_H and X86_64_ANSI_H checks here.  */
-#if defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_)
-/*  The references to _GCC_PTRDIFF_T_, _GCC_SIZE_T_, and _GCC_WCHAR_T_
-    are probably typos and should be removed before 2.8 is released.  */
-#ifdef _GCC_PTRDIFF_T_
-#undef _PTRDIFF_T_
-#undef _BSD_PTRDIFF_T_
-#endif
-#ifdef _GCC_SIZE_T_
-#undef _SIZE_T_
-#undef _BSD_SIZE_T_
-#endif
-#ifdef _GCC_WCHAR_T_
-#undef _WCHAR_T_
-#undef _BSD_WCHAR_T_
-#endif
-/*  The following ones are the real ones.  */
-#ifdef _GCC_PTRDIFF_T
-#undef _PTRDIFF_T_
-#undef _BSD_PTRDIFF_T_
-#endif
-#ifdef _GCC_SIZE_T
-#undef _SIZE_T_
-#undef _BSD_SIZE_T_
-#endif
-#ifdef _GCC_WCHAR_T
-#undef _WCHAR_T_
-#undef _BSD_WCHAR_T_
-#endif
-#endif /* _ANSI_H_ || _MACHINE_ANSI_H_ || _X86_64_ANSI_H_ || _I386_ANSI_H_ */
-
-#endif /* __sys_stdtypes_h */
-
-/* A null pointer constant.  */
-
-#if defined (_STDDEF_H) || defined (__need_NULL)
-#undef NULL		/* in case <stdio.h> has defined it. */
-#ifdef __GNUG__
-#define NULL __null
-#else   /* G++ */
-#ifndef __cplusplus
-#define NULL ((void *)0)
-#else   /* C++ */
-#define NULL 0
-#endif  /* C++ */
-#endif  /* G++ */
-#endif	/* NULL not defined and <stddef.h> or need NULL.  */
-#undef	__need_NULL
-
-#ifdef _STDDEF_H
-
-/* Offset of member MEMBER in a struct of type TYPE. */
-#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
-
-#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) \
-  || (defined(__cplusplus) && __cplusplus >= 201103L)
-#ifndef _GCC_MAX_ALIGN_T
-#define _GCC_MAX_ALIGN_T
-/* Type whose alignment is supported in every context and is at least
-   as great as that of any standard type not using alignment
-   specifiers.  */
-typedef struct {
-  long long __max_align_ll __attribute__((__aligned__(__alignof__(long long))));
-  long double __max_align_ld __attribute__((__aligned__(__alignof__(long double))));
-} max_align_t;
-#endif
-#endif /* C11 or C++11.  */
-
-#if defined(__cplusplus) && __cplusplus >= 201103L
-#ifndef _GXX_NULLPTR_T
-#define _GXX_NULLPTR_T
-  typedef decltype(nullptr) nullptr_t;
-#endif
-#endif /* C++11.  */
-
-#endif /* _STDDEF_H was defined this time */
-
-#endif /* !_STDDEF_H && !_STDDEF_H_ && !_ANSI_STDDEF_H && !__STDDEF_H__
-	  || __need_XXX was not defined before */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdfix.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdfix.h	(revision 1046)
+++ 	(revision )
@@ -1,204 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* ISO/IEC JTC1 SC22 WG14 N1169
- * Date: 2006-04-04
- * ISO/IEC TR 18037
- * Programming languages - C - Extensions to support embedded processors
- */
-
-#ifndef _STDFIX_H
-#define _STDFIX_H
-
-/* 7.18a.1 Introduction.  */
-
-#undef fract
-#undef accum
-#undef sat
-#define fract		_Fract
-#define accum		_Accum
-#define sat		_Sat
-
-/* 7.18a.3 Precision macros.  */
-
-#undef SFRACT_FBIT
-#undef SFRACT_MIN
-#undef SFRACT_MAX
-#undef SFRACT_EPSILON
-#define SFRACT_FBIT	__SFRACT_FBIT__
-#define SFRACT_MIN	__SFRACT_MIN__
-#define SFRACT_MAX	__SFRACT_MAX__
-#define SFRACT_EPSILON	__SFRACT_EPSILON__
-
-#undef USFRACT_FBIT
-#undef USFRACT_MIN
-#undef USFRACT_MAX
-#undef USFRACT_EPSILON
-#define USFRACT_FBIT	__USFRACT_FBIT__
-#define USFRACT_MIN	__USFRACT_MIN__		/* GCC extension.  */
-#define USFRACT_MAX	__USFRACT_MAX__
-#define USFRACT_EPSILON	__USFRACT_EPSILON__
-
-#undef FRACT_FBIT
-#undef FRACT_MIN
-#undef FRACT_MAX
-#undef FRACT_EPSILON
-#define FRACT_FBIT	__FRACT_FBIT__
-#define FRACT_MIN	__FRACT_MIN__
-#define FRACT_MAX	__FRACT_MAX__
-#define FRACT_EPSILON	__FRACT_EPSILON__
-
-#undef UFRACT_FBIT
-#undef UFRACT_MIN
-#undef UFRACT_MAX
-#undef UFRACT_EPSILON
-#define UFRACT_FBIT	__UFRACT_FBIT__
-#define UFRACT_MIN	__UFRACT_MIN__		/* GCC extension.  */
-#define UFRACT_MAX	__UFRACT_MAX__
-#define UFRACT_EPSILON	__UFRACT_EPSILON__
-
-#undef LFRACT_FBIT
-#undef LFRACT_MIN
-#undef LFRACT_MAX
-#undef LFRACT_EPSILON
-#define LFRACT_FBIT	__LFRACT_FBIT__
-#define LFRACT_MIN	__LFRACT_MIN__
-#define LFRACT_MAX	__LFRACT_MAX__
-#define LFRACT_EPSILON	__LFRACT_EPSILON__
-
-#undef ULFRACT_FBIT
-#undef ULFRACT_MIN
-#undef ULFRACT_MAX
-#undef ULFRACT_EPSILON
-#define ULFRACT_FBIT	__ULFRACT_FBIT__
-#define ULFRACT_MIN	__ULFRACT_MIN__		/* GCC extension.  */
-#define ULFRACT_MAX	__ULFRACT_MAX__
-#define ULFRACT_EPSILON	__ULFRACT_EPSILON__
-
-#undef LLFRACT_FBIT
-#undef LLFRACT_MIN
-#undef LLFRACT_MAX
-#undef LLFRACT_EPSILON
-#define LLFRACT_FBIT	__LLFRACT_FBIT__	/* GCC extension.  */
-#define LLFRACT_MIN	__LLFRACT_MIN__		/* GCC extension.  */
-#define LLFRACT_MAX	__LLFRACT_MAX__		/* GCC extension.  */
-#define LLFRACT_EPSILON	__LLFRACT_EPSILON__	/* GCC extension.  */
-
-#undef ULLFRACT_FBIT
-#undef ULLFRACT_MIN
-#undef ULLFRACT_MAX
-#undef ULLFRACT_EPSILON
-#define ULLFRACT_FBIT	__ULLFRACT_FBIT__	/* GCC extension.  */
-#define ULLFRACT_MIN	__ULLFRACT_MIN__	/* GCC extension.  */
-#define ULLFRACT_MAX	__ULLFRACT_MAX__	/* GCC extension.  */
-#define ULLFRACT_EPSILON	__ULLFRACT_EPSILON__	/* GCC extension.  */
-
-#undef SACCUM_FBIT
-#undef SACCUM_IBIT
-#undef SACCUM_MIN
-#undef SACCUM_MAX
-#undef SACCUM_EPSILON
-#define SACCUM_FBIT	__SACCUM_FBIT__
-#define SACCUM_IBIT	__SACCUM_IBIT__
-#define SACCUM_MIN	__SACCUM_MIN__
-#define SACCUM_MAX	__SACCUM_MAX__
-#define SACCUM_EPSILON	__SACCUM_EPSILON__
-
-#undef USACCUM_FBIT
-#undef USACCUM_IBIT
-#undef USACCUM_MIN
-#undef USACCUM_MAX
-#undef USACCUM_EPSILON
-#define USACCUM_FBIT	__USACCUM_FBIT__
-#define USACCUM_IBIT	__USACCUM_IBIT__
-#define USACCUM_MIN	__USACCUM_MIN__		/* GCC extension.  */
-#define USACCUM_MAX	__USACCUM_MAX__
-#define USACCUM_EPSILON	__USACCUM_EPSILON__
-
-#undef ACCUM_FBIT
-#undef ACCUM_IBIT
-#undef ACCUM_MIN
-#undef ACCUM_MAX
-#undef ACCUM_EPSILON
-#define ACCUM_FBIT	__ACCUM_FBIT__
-#define ACCUM_IBIT	__ACCUM_IBIT__
-#define ACCUM_MIN	__ACCUM_MIN__
-#define ACCUM_MAX	__ACCUM_MAX__
-#define ACCUM_EPSILON	__ACCUM_EPSILON__
-
-#undef UACCUM_FBIT
-#undef UACCUM_IBIT
-#undef UACCUM_MIN
-#undef UACCUM_MAX
-#undef UACCUM_EPSILON
-#define UACCUM_FBIT	__UACCUM_FBIT__
-#define UACCUM_IBIT	__UACCUM_IBIT__
-#define UACCUM_MIN	__UACCUM_MIN__		/* GCC extension.  */
-#define UACCUM_MAX	__UACCUM_MAX__
-#define UACCUM_EPSILON	__UACCUM_EPSILON__
-
-#undef LACCUM_FBIT
-#undef LACCUM_IBIT
-#undef LACCUM_MIN
-#undef LACCUM_MAX
-#undef LACCUM_EPSILON
-#define LACCUM_FBIT	__LACCUM_FBIT__
-#define LACCUM_IBIT	__LACCUM_IBIT__
-#define LACCUM_MIN	__LACCUM_MIN__
-#define LACCUM_MAX	__LACCUM_MAX__
-#define LACCUM_EPSILON	__LACCUM_EPSILON__
-
-#undef ULACCUM_FBIT
-#undef ULACCUM_IBIT
-#undef ULACCUM_MIN
-#undef ULACCUM_MAX
-#undef ULACCUM_EPSILON
-#define ULACCUM_FBIT	__ULACCUM_FBIT__
-#define ULACCUM_IBIT	__ULACCUM_IBIT__
-#define ULACCUM_MIN	__ULACCUM_MIN__		/* GCC extension.  */
-#define ULACCUM_MAX	__ULACCUM_MAX__
-#define ULACCUM_EPSILON	__ULACCUM_EPSILON__
-
-#undef LLACCUM_FBIT
-#undef LLACCUM_IBIT
-#undef LLACCUM_MIN
-#undef LLACCUM_MAX
-#undef LLACCUM_EPSILON
-#define LLACCUM_FBIT	__LLACCUM_FBIT__	/* GCC extension.  */
-#define LLACCUM_IBIT	__LLACCUM_IBIT__	/* GCC extension.  */
-#define LLACCUM_MIN	__LLACCUM_MIN__		/* GCC extension.  */
-#define LLACCUM_MAX	__LLACCUM_MAX__		/* GCC extension.  */
-#define LLACCUM_EPSILON	__LLACCUM_EPSILON__	/* GCC extension.  */
-
-#undef ULLACCUM_FBIT
-#undef ULLACCUM_IBIT
-#undef ULLACCUM_MIN
-#undef ULLACCUM_MAX
-#undef ULLACCUM_EPSILON
-#define ULLACCUM_FBIT	__ULLACCUM_FBIT__	/* GCC extension.  */
-#define ULLACCUM_IBIT	__ULLACCUM_IBIT__	/* GCC extension.  */
-#define ULLACCUM_MIN	__ULLACCUM_MIN__	/* GCC extension.  */
-#define ULLACCUM_MAX	__ULLACCUM_MAX__	/* GCC extension.  */
-#define ULLACCUM_EPSILON	__ULLACCUM_EPSILON__	/* GCC extension.  */
-
-#endif /* _STDFIX_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdint-gcc.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdint-gcc.h	(revision 1046)
+++ 	(revision )
@@ -1,263 +1,0 @@
-/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/*
- * ISO C Standard:  7.18  Integer types  <stdint.h>
- */
-
-#ifndef _GCC_STDINT_H
-#define _GCC_STDINT_H
-
-/* 7.8.1.1 Exact-width integer types */
-
-#ifdef __INT8_TYPE__
-typedef __INT8_TYPE__ int8_t;
-#endif
-#ifdef __INT16_TYPE__
-typedef __INT16_TYPE__ int16_t;
-#endif
-#ifdef __INT32_TYPE__
-typedef __INT32_TYPE__ int32_t;
-#endif
-#ifdef __INT64_TYPE__
-typedef __INT64_TYPE__ int64_t;
-#endif
-#ifdef __UINT8_TYPE__
-typedef __UINT8_TYPE__ uint8_t;
-#endif
-#ifdef __UINT16_TYPE__
-typedef __UINT16_TYPE__ uint16_t;
-#endif
-#ifdef __UINT32_TYPE__
-typedef __UINT32_TYPE__ uint32_t;
-#endif
-#ifdef __UINT64_TYPE__
-typedef __UINT64_TYPE__ uint64_t;
-#endif
-
-/* 7.8.1.2 Minimum-width integer types */
-
-typedef __INT_LEAST8_TYPE__ int_least8_t;
-typedef __INT_LEAST16_TYPE__ int_least16_t;
-typedef __INT_LEAST32_TYPE__ int_least32_t;
-typedef __INT_LEAST64_TYPE__ int_least64_t;
-typedef __UINT_LEAST8_TYPE__ uint_least8_t;
-typedef __UINT_LEAST16_TYPE__ uint_least16_t;
-typedef __UINT_LEAST32_TYPE__ uint_least32_t;
-typedef __UINT_LEAST64_TYPE__ uint_least64_t;
-
-/* 7.8.1.3 Fastest minimum-width integer types */
-
-typedef __INT_FAST8_TYPE__ int_fast8_t;
-typedef __INT_FAST16_TYPE__ int_fast16_t;
-typedef __INT_FAST32_TYPE__ int_fast32_t;
-typedef __INT_FAST64_TYPE__ int_fast64_t;
-typedef __UINT_FAST8_TYPE__ uint_fast8_t;
-typedef __UINT_FAST16_TYPE__ uint_fast16_t;
-typedef __UINT_FAST32_TYPE__ uint_fast32_t;
-typedef __UINT_FAST64_TYPE__ uint_fast64_t;
-
-/* 7.8.1.4 Integer types capable of holding object pointers */
-
-#ifdef __INTPTR_TYPE__
-typedef __INTPTR_TYPE__ intptr_t;
-#endif
-#ifdef __UINTPTR_TYPE__
-typedef __UINTPTR_TYPE__ uintptr_t;
-#endif
-
-/* 7.8.1.5 Greatest-width integer types */
-
-typedef __INTMAX_TYPE__ intmax_t;
-typedef __UINTMAX_TYPE__ uintmax_t;
-
-#if (!defined __cplusplus || __cplusplus >= 201103L \
-     || defined __STDC_LIMIT_MACROS)
-
-/* 7.18.2 Limits of specified-width integer types */
-
-#ifdef __INT8_MAX__
-# undef INT8_MAX
-# define INT8_MAX __INT8_MAX__
-# undef INT8_MIN
-# define INT8_MIN (-INT8_MAX - 1)
-#endif
-#ifdef __UINT8_MAX__
-# undef UINT8_MAX
-# define UINT8_MAX __UINT8_MAX__
-#endif
-#ifdef __INT16_MAX__
-# undef INT16_MAX
-# define INT16_MAX __INT16_MAX__
-# undef INT16_MIN
-# define INT16_MIN (-INT16_MAX - 1)
-#endif
-#ifdef __UINT16_MAX__
-# undef UINT16_MAX
-# define UINT16_MAX __UINT16_MAX__
-#endif
-#ifdef __INT32_MAX__
-# undef INT32_MAX
-# define INT32_MAX __INT32_MAX__
-# undef INT32_MIN
-# define INT32_MIN (-INT32_MAX - 1)
-#endif
-#ifdef __UINT32_MAX__
-# undef UINT32_MAX
-# define UINT32_MAX __UINT32_MAX__
-#endif
-#ifdef __INT64_MAX__
-# undef INT64_MAX
-# define INT64_MAX __INT64_MAX__
-# undef INT64_MIN
-# define INT64_MIN (-INT64_MAX - 1)
-#endif
-#ifdef __UINT64_MAX__
-# undef UINT64_MAX
-# define UINT64_MAX __UINT64_MAX__
-#endif
-
-#undef INT_LEAST8_MAX
-#define INT_LEAST8_MAX __INT_LEAST8_MAX__
-#undef INT_LEAST8_MIN
-#define INT_LEAST8_MIN (-INT_LEAST8_MAX - 1)
-#undef UINT_LEAST8_MAX
-#define UINT_LEAST8_MAX __UINT_LEAST8_MAX__
-#undef INT_LEAST16_MAX
-#define INT_LEAST16_MAX __INT_LEAST16_MAX__
-#undef INT_LEAST16_MIN
-#define INT_LEAST16_MIN (-INT_LEAST16_MAX - 1)
-#undef UINT_LEAST16_MAX
-#define UINT_LEAST16_MAX __UINT_LEAST16_MAX__
-#undef INT_LEAST32_MAX
-#define INT_LEAST32_MAX __INT_LEAST32_MAX__
-#undef INT_LEAST32_MIN
-#define INT_LEAST32_MIN (-INT_LEAST32_MAX - 1)
-#undef UINT_LEAST32_MAX
-#define UINT_LEAST32_MAX __UINT_LEAST32_MAX__
-#undef INT_LEAST64_MAX
-#define INT_LEAST64_MAX __INT_LEAST64_MAX__
-#undef INT_LEAST64_MIN
-#define INT_LEAST64_MIN (-INT_LEAST64_MAX - 1)
-#undef UINT_LEAST64_MAX
-#define UINT_LEAST64_MAX __UINT_LEAST64_MAX__
-
-#undef INT_FAST8_MAX
-#define INT_FAST8_MAX __INT_FAST8_MAX__
-#undef INT_FAST8_MIN
-#define INT_FAST8_MIN (-INT_FAST8_MAX - 1)
-#undef UINT_FAST8_MAX
-#define UINT_FAST8_MAX __UINT_FAST8_MAX__
-#undef INT_FAST16_MAX
-#define INT_FAST16_MAX __INT_FAST16_MAX__
-#undef INT_FAST16_MIN
-#define INT_FAST16_MIN (-INT_FAST16_MAX - 1)
-#undef UINT_FAST16_MAX
-#define UINT_FAST16_MAX __UINT_FAST16_MAX__
-#undef INT_FAST32_MAX
-#define INT_FAST32_MAX __INT_FAST32_MAX__
-#undef INT_FAST32_MIN
-#define INT_FAST32_MIN (-INT_FAST32_MAX - 1)
-#undef UINT_FAST32_MAX
-#define UINT_FAST32_MAX __UINT_FAST32_MAX__
-#undef INT_FAST64_MAX
-#define INT_FAST64_MAX __INT_FAST64_MAX__
-#undef INT_FAST64_MIN
-#define INT_FAST64_MIN (-INT_FAST64_MAX - 1)
-#undef UINT_FAST64_MAX
-#define UINT_FAST64_MAX __UINT_FAST64_MAX__
-
-#ifdef __INTPTR_MAX__
-# undef INTPTR_MAX
-# define INTPTR_MAX __INTPTR_MAX__
-# undef INTPTR_MIN
-# define INTPTR_MIN (-INTPTR_MAX - 1)
-#endif
-#ifdef __UINTPTR_MAX__
-# undef UINTPTR_MAX
-# define UINTPTR_MAX __UINTPTR_MAX__
-#endif
-
-#undef INTMAX_MAX
-#define INTMAX_MAX __INTMAX_MAX__
-#undef INTMAX_MIN
-#define INTMAX_MIN (-INTMAX_MAX - 1)
-#undef UINTMAX_MAX
-#define UINTMAX_MAX __UINTMAX_MAX__
-
-/* 7.18.3 Limits of other integer types */
-
-#undef PTRDIFF_MAX
-#define PTRDIFF_MAX __PTRDIFF_MAX__
-#undef PTRDIFF_MIN
-#define PTRDIFF_MIN (-PTRDIFF_MAX - 1)
-
-#undef SIG_ATOMIC_MAX
-#define SIG_ATOMIC_MAX __SIG_ATOMIC_MAX__
-#undef SIG_ATOMIC_MIN
-#define SIG_ATOMIC_MIN __SIG_ATOMIC_MIN__
-
-#undef SIZE_MAX
-#define SIZE_MAX __SIZE_MAX__
-
-#undef WCHAR_MAX
-#define WCHAR_MAX __WCHAR_MAX__
-#undef WCHAR_MIN
-#define WCHAR_MIN __WCHAR_MIN__
-
-#undef WINT_MAX
-#define WINT_MAX __WINT_MAX__
-#undef WINT_MIN
-#define WINT_MIN __WINT_MIN__
-
-#endif /* (!defined __cplusplus || __cplusplus >= 201103L
-	   || defined __STDC_LIMIT_MACROS)  */
-
-#if (!defined __cplusplus || __cplusplus >= 201103L \
-     || defined __STDC_CONSTANT_MACROS)
-
-#undef INT8_C
-#define INT8_C(c) __INT8_C(c)
-#undef INT16_C
-#define INT16_C(c) __INT16_C(c)
-#undef INT32_C
-#define INT32_C(c) __INT32_C(c)
-#undef INT64_C
-#define INT64_C(c) __INT64_C(c)
-#undef UINT8_C
-#define UINT8_C(c) __UINT8_C(c)
-#undef UINT16_C
-#define UINT16_C(c) __UINT16_C(c)
-#undef UINT32_C
-#define UINT32_C(c) __UINT32_C(c)
-#undef UINT64_C
-#define UINT64_C(c) __UINT64_C(c)
-#undef INTMAX_C
-#define INTMAX_C(c) __INTMAX_C(c)
-#undef UINTMAX_C
-#define UINTMAX_C(c) __UINTMAX_C(c)
-
-#endif /* (!defined __cplusplus || __cplusplus >= 201103L
-	   || defined __STDC_CONSTANT_MACROS) */
-
-#endif /* _GCC_STDINT_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdint.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdint.h	(revision 1046)
+++ 	(revision )
@@ -1,14 +1,0 @@
-#ifndef _GCC_WRAP_STDINT_H
-#if __STDC_HOSTED__
-# if defined __cplusplus && __cplusplus >= 201103L
-#  undef __STDC_LIMIT_MACROS
-#  define __STDC_LIMIT_MACROS
-#  undef __STDC_CONSTANT_MACROS
-#  define __STDC_CONSTANT_MACROS
-# endif
-# include_next <stdint.h>
-#else
-# include "stdint-gcc.h"
-#endif
-#define _GCC_WRAP_STDINT_H
-#endif
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdnoreturn.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/stdnoreturn.h	(revision 1046)
+++ 	(revision )
@@ -1,35 +1,0 @@
-/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* ISO C1X: 7.23 _Noreturn <stdnoreturn.h>.  */
-
-#ifndef _STDNORETURN_H
-#define _STDNORETURN_H
-
-#ifndef __cplusplus
-
-#define noreturn _Noreturn
-
-#endif
-
-#endif	/* stdnoreturn.h */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/tbmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/tbmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,180 +1,0 @@
-/* Copyright (C) 2010-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _TBMINTRIN_H_INCLUDED
-#define _TBMINTRIN_H_INCLUDED
-
-#ifndef __TBM__
-#pragma GCC push_options
-#pragma GCC target("tbm")
-#define __DISABLE_TBM__
-#endif /* __TBM__ */
-
-#ifdef __OPTIMIZE__
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bextri_u32 (unsigned int __X, const unsigned int __I)
-{
-	return __builtin_ia32_bextri_u32 (__X, __I);
-}
-#else
-#define __bextri_u32(X, I)                                           \
-        ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \
-	                                          (unsigned int)(I)))
-#endif /*__OPTIMIZE__ */
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcfill_u32 (unsigned int __X)
-{
-  return __X & (__X + 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blci_u32 (unsigned int __X)
-{
-  return __X | ~(__X + 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcic_u32 (unsigned int __X)
-{
-  return ~__X & (__X + 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcmsk_u32 (unsigned int __X)
-{
-  return __X ^ (__X + 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcs_u32 (unsigned int __X)
-{
-  return __X | (__X + 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsfill_u32 (unsigned int __X)
-{
-  return __X | (__X - 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsic_u32 (unsigned int __X)
-{
-  return ~__X | (__X - 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__t1mskc_u32 (unsigned int __X)
-{
-  return ~__X | (__X + 1);
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzmsk_u32 (unsigned int __X)
-{
-  return ~__X & (__X - 1);
-}
-
-
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__bextri_u64 (unsigned long long __X, const unsigned int __I)
-{
-  return __builtin_ia32_bextri_u64 (__X, __I);
-}
-#else
-#define __bextri_u64(X, I)						   \
-  ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long long)(X), \
-						  (unsigned long long)(I)))
-#endif /*__OPTIMIZE__ */
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcfill_u64 (unsigned long long __X)
-{
-  return __X & (__X + 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blci_u64 (unsigned long long __X)
-{
-  return __X | ~(__X + 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcic_u64 (unsigned long long __X)
-{
-  return ~__X & (__X + 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcmsk_u64 (unsigned long long __X)
-{
-  return __X ^ (__X + 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blcs_u64 (unsigned long long __X)
-{
-  return __X | (__X + 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsfill_u64 (unsigned long long __X)
-{
-  return __X | (__X - 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__blsic_u64 (unsigned long long __X)
-{
-  return ~__X | (__X - 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__t1mskc_u64 (unsigned long long __X)
-{
-  return ~__X | (__X + 1);
-}
-
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__tzmsk_u64 (unsigned long long __X)
-{
-  return ~__X & (__X - 1);
-}
-
-
-#endif /* __x86_64__  */
-
-#ifdef __DISABLE_TBM__
-#undef __DISABLE_TBM__
-#pragma GCC pop_options
-#endif /* __DISABLE_TBM__ */
-
-#endif /* _TBMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/tgmath.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/tgmath.h	(revision 1046)
+++ 	(revision )
@@ -1,171 +1,0 @@
-/* Copyright (C) 2004-2015 Free Software Foundation, Inc.
-   Contributed by Apple, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/*
- * ISO C Standard:  7.22  Type-generic math <tgmath.h>
- */
-
-#ifndef _TGMATH_H
-#define _TGMATH_H
-
-#include <math.h>
-
-#ifndef __cplusplus
-#include <complex.h>
-
-/* Naming convention: generic macros are defining using
-   __TGMATH_CPLX*, __TGMATH_REAL*, and __TGMATH_CPLX_ONLY.  _CPLX
-   means the generic argument(s) may be real or complex, _REAL means
-   real only, _CPLX means complex only.  If there is no suffix, we are
-   defining a function of one generic argument.  If the suffix is _n
-   it is a function of n generic arguments.  If the suffix is _m_n it
-   is a function of n arguments, the first m of which are generic.  We
-   only define these macros for values of n and/or m that are needed. */
-
-/* The general rules for generic macros are given in 7.22 paragraphs 1 and 2.
-   If any generic parameter is complex, we use a complex version.  Otherwise
-   we use a real version.  If the real part of any generic parameter is long
-   double, we use the long double version.  Otherwise if the real part of any
-   generic parameter is double or of integer type, we use the double version.
-   Otherwise we use the float version. */
-
-#define __tg_cplx(expr) \
-  __builtin_classify_type(expr) == 9
-
-#define __tg_ldbl(expr) \
-  __builtin_types_compatible_p(__typeof__(expr), long double)
-
-#define __tg_dbl(expr)                                       \
-  (__builtin_types_compatible_p(__typeof__(expr), double)    \
-   || __builtin_classify_type(expr) == 1)
-
-#define __tg_choose(x,f,d,l)                                  \
-  __builtin_choose_expr(__tg_ldbl(x), l,                      \
-                        __builtin_choose_expr(__tg_dbl(x), d, \
-                                              f))
-
-#define __tg_choose_2(x,y,f,d,l)                                             \
-  __builtin_choose_expr(__tg_ldbl(x) || __tg_ldbl(y), l,                     \
-                        __builtin_choose_expr(__tg_dbl(x) || __tg_dbl(y), d, \
-                                              f))
-
-#define __tg_choose_3(x,y,z,f,d,l)                                        \
-   __builtin_choose_expr(__tg_ldbl(x) || __tg_ldbl(y) || __tg_ldbl(z), l, \
-                        __builtin_choose_expr(__tg_dbl(x) || __tg_dbl(y)  \
-                                              || __tg_dbl(z), d,          \
-                                              f))
-
-#define __TGMATH_CPLX(z,R,C)                                                  \
-  __builtin_choose_expr (__tg_cplx(z),                                        \
-                         __tg_choose (__real__(z), C##f(z), (C)(z), C##l(z)), \
-                         __tg_choose (z, R##f(z), (R)(z), R##l(z)))
-
-#define __TGMATH_CPLX_2(z1,z2,R,C)                                             \
-  __builtin_choose_expr (__tg_cplx(z1) || __tg_cplx(z2),                       \
-                         __tg_choose_2 (__real__(z1), __real__(z2),            \
-                                        C##f(z1,z2), (C)(z1,z2), C##l(z1,z2)), \
-                         __tg_choose_2 (z1, z2,                                \
-                                        R##f(z1,z2), (R)(z1,z2), R##l(z1,z2)))
-
-#define __TGMATH_REAL(x,R) \
-  __tg_choose (x, R##f(x), (R)(x), R##l(x))
-#define __TGMATH_REAL_2(x,y,R) \
-  __tg_choose_2 (x, y, R##f(x,y), (R)(x,y), R##l(x,y))
-#define __TGMATH_REAL_3(x,y,z,R) \
-  __tg_choose_3 (x, y, z, R##f(x,y,z), (R)(x,y,z), R##l(x,y,z))
-#define __TGMATH_REAL_1_2(x,y,R) \
-  __tg_choose (x, R##f(x,y), (R)(x,y), R##l(x,y))
-#define __TGMATH_REAL_2_3(x,y,z,R) \
-  __tg_choose_2 (x, y, R##f(x,y,z), (R)(x,y,z), R##l(x,y,z))
-#define __TGMATH_CPLX_ONLY(z,C) \
-  __tg_choose (__real__(z), C##f(z), (C)(z), C##l(z))
-
-/* Functions defined in both <math.h> and <complex.h> (7.22p4) */
-#define acos(z)          __TGMATH_CPLX(z, acos, cacos)
-#define asin(z)          __TGMATH_CPLX(z, asin, casin)
-#define atan(z)          __TGMATH_CPLX(z, atan, catan)
-#define acosh(z)         __TGMATH_CPLX(z, acosh, cacosh)
-#define asinh(z)         __TGMATH_CPLX(z, asinh, casinh)
-#define atanh(z)         __TGMATH_CPLX(z, atanh, catanh)
-#define cos(z)           __TGMATH_CPLX(z, cos, ccos)
-#define sin(z)           __TGMATH_CPLX(z, sin, csin)
-#define tan(z)           __TGMATH_CPLX(z, tan, ctan)
-#define cosh(z)          __TGMATH_CPLX(z, cosh, ccosh)
-#define sinh(z)          __TGMATH_CPLX(z, sinh, csinh)
-#define tanh(z)          __TGMATH_CPLX(z, tanh, ctanh)
-#define exp(z)           __TGMATH_CPLX(z, exp, cexp)
-#define log(z)           __TGMATH_CPLX(z, log, clog)
-#define pow(z1,z2)       __TGMATH_CPLX_2(z1, z2, pow, cpow)
-#define sqrt(z)          __TGMATH_CPLX(z, sqrt, csqrt)
-#define fabs(z)          __TGMATH_CPLX(z, fabs, cabs)
-
-/* Functions defined in <math.h> only (7.22p5) */
-#define atan2(x,y)       __TGMATH_REAL_2(x, y, atan2)
-#define cbrt(x)          __TGMATH_REAL(x, cbrt)
-#define ceil(x)          __TGMATH_REAL(x, ceil)
-#define copysign(x,y)    __TGMATH_REAL_2(x, y, copysign)
-#define erf(x)           __TGMATH_REAL(x, erf)
-#define erfc(x)          __TGMATH_REAL(x, erfc)
-#define exp2(x)          __TGMATH_REAL(x, exp2)
-#define expm1(x)         __TGMATH_REAL(x, expm1)
-#define fdim(x,y)        __TGMATH_REAL_2(x, y, fdim)
-#define floor(x)         __TGMATH_REAL(x, floor)
-#define fma(x,y,z)       __TGMATH_REAL_3(x, y, z, fma)
-#define fmax(x,y)        __TGMATH_REAL_2(x, y, fmax)
-#define fmin(x,y)        __TGMATH_REAL_2(x, y, fmin)
-#define fmod(x,y)        __TGMATH_REAL_2(x, y, fmod)
-#define frexp(x,y)       __TGMATH_REAL_1_2(x, y, frexp)
-#define hypot(x,y)       __TGMATH_REAL_2(x, y, hypot)
-#define ilogb(x)         __TGMATH_REAL(x, ilogb)
-#define ldexp(x,y)       __TGMATH_REAL_1_2(x, y, ldexp)
-#define lgamma(x)        __TGMATH_REAL(x, lgamma)
-#define llrint(x)        __TGMATH_REAL(x, llrint)
-#define llround(x)       __TGMATH_REAL(x, llround)
-#define log10(x)         __TGMATH_REAL(x, log10)
-#define log1p(x)         __TGMATH_REAL(x, log1p)
-#define log2(x)          __TGMATH_REAL(x, log2)
-#define logb(x)          __TGMATH_REAL(x, logb)
-#define lrint(x)         __TGMATH_REAL(x, lrint)
-#define lround(x)        __TGMATH_REAL(x, lround)
-#define nearbyint(x)     __TGMATH_REAL(x, nearbyint)
-#define nextafter(x,y)   __TGMATH_REAL_2(x, y, nextafter)
-#define nexttoward(x,y)  __TGMATH_REAL_1_2(x, y, nexttoward)
-#define remainder(x,y)   __TGMATH_REAL_2(x, y, remainder)
-#define remquo(x,y,z)    __TGMATH_REAL_2_3(x, y, z, remquo)
-#define rint(x)          __TGMATH_REAL(x, rint)
-#define round(x)         __TGMATH_REAL(x, round)
-#define scalbn(x,y)      __TGMATH_REAL_1_2(x, y, scalbn)
-#define scalbln(x,y)     __TGMATH_REAL_1_2(x, y, scalbln)
-#define tgamma(x)        __TGMATH_REAL(x, tgamma)
-#define trunc(x)         __TGMATH_REAL(x, trunc)
-
-/* Functions defined in <complex.h> only (7.22p6) */
-#define carg(z)          __TGMATH_CPLX_ONLY(z, carg)
-#define cimag(z)         __TGMATH_CPLX_ONLY(z, cimag)
-#define conj(z)          __TGMATH_CPLX_ONLY(z, conj)
-#define cproj(z)         __TGMATH_CPLX_ONLY(z, cproj)
-#define creal(z)         __TGMATH_CPLX_ONLY(z, creal)
-
-#endif /* __cplusplus */
-#endif /* _TGMATH_H */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/tmmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/tmmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,249 +1,0 @@
-/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 9.1.  */
-
-#ifndef _TMMINTRIN_H_INCLUDED
-#define _TMMINTRIN_H_INCLUDED
-
-/* We need definitions from the SSE3, SSE2 and SSE header files*/
-#include <pmmintrin.h>
-
-#ifndef __SSSE3__
-#pragma GCC push_options
-#pragma GCC target("ssse3")
-#define __DISABLE_SSSE3__
-#endif /* __SSSE3__ */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadd_epi16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadd_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadds_epi16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadd_pi16 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadd_pi32 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hadds_pi16 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsub_epi16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsub_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsub_pi16 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsub_pi32 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sign_epi8 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sign_epi16 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sign_epi32 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sign_pi8 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sign_pi16 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sign_pi32 (__m64 __X, __m64 __Y)
-{
-  return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
-{
-  return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
-					      (__v2di)__Y, __N * 8);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
-{
-  return (__m64) __builtin_ia32_palignr ((__v1di)__X,
-					 (__v1di)__Y, __N * 8);
-}
-#else
-#define _mm_alignr_epi8(X, Y, N)					\
-  ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X),		\
-					(__v2di)(__m128i)(Y),		\
-					(int)(N) * 8))
-#define _mm_alignr_pi8(X, Y, N)						\
-  ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X),			\
-				   (__v1di)(__m64)(Y),			\
-				   (int)(N) * 8))
-#endif
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_abs_epi8 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_abs_epi16 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_abs_epi32 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_abs_pi8 (__m64 __X)
-{
-  return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_abs_pi16 (__m64 __X)
-{
-  return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_abs_pi32 (__m64 __X)
-{
-  return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
-}
-
-#ifdef __DISABLE_SSSE3__
-#undef __DISABLE_SSSE3__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSSE3__ */
-
-#endif /* _TMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/unwind.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/unwind.h	(revision 1046)
+++ 	(revision )
@@ -1,293 +1,0 @@
-/* Exception handling and frame unwind runtime interface routines.
-   Copyright (C) 2001-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* This is derived from the C++ ABI for IA-64.  Where we diverge
-   for cross-architecture compatibility are noted with "@@@".  */
-
-#ifndef _UNWIND_H
-#define _UNWIND_H
-
-#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
-/* Only for _GCC_specific_handler.  */
-#include <windows.h>
-#endif
-
-#ifndef HIDE_EXPORTS
-#pragma GCC visibility push(default)
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Level 1: Base ABI  */
-
-/* @@@ The IA-64 ABI uses uint64 throughout.  Most places this is
-   inefficient for 32-bit and smaller machines.  */
-typedef unsigned _Unwind_Word __attribute__((__mode__(__unwind_word__)));
-typedef signed _Unwind_Sword __attribute__((__mode__(__unwind_word__)));
-#if defined(__ia64__) && defined(__hpux__)
-typedef unsigned _Unwind_Ptr __attribute__((__mode__(__word__)));
-#else
-typedef unsigned _Unwind_Ptr __attribute__((__mode__(__pointer__)));
-#endif
-typedef unsigned _Unwind_Internal_Ptr __attribute__((__mode__(__pointer__)));
-
-/* @@@ The IA-64 ABI uses a 64-bit word to identify the producer and
-   consumer of an exception.  We'll go along with this for now even on
-   32-bit machines.  We'll need to provide some other option for
-   16-bit machines and for machines with > 8 bits per byte.  */
-typedef unsigned _Unwind_Exception_Class __attribute__((__mode__(__DI__)));
-
-/* The unwind interface uses reason codes in several contexts to
-   identify the reasons for failures or other actions.  */
-typedef enum
-{
-  _URC_NO_REASON = 0,
-  _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
-  _URC_FATAL_PHASE2_ERROR = 2,
-  _URC_FATAL_PHASE1_ERROR = 3,
-  _URC_NORMAL_STOP = 4,
-  _URC_END_OF_STACK = 5,
-  _URC_HANDLER_FOUND = 6,
-  _URC_INSTALL_CONTEXT = 7,
-  _URC_CONTINUE_UNWIND = 8
-} _Unwind_Reason_Code;
-
-
-/* The unwind interface uses a pointer to an exception header object
-   as its representation of an exception being thrown. In general, the
-   full representation of an exception object is language- and
-   implementation-specific, but it will be prefixed by a header
-   understood by the unwind interface.  */
-
-struct _Unwind_Exception;
-
-typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
-					      struct _Unwind_Exception *);
-
-struct _Unwind_Exception
-{
-  _Unwind_Exception_Class exception_class;
-  _Unwind_Exception_Cleanup_Fn exception_cleanup;
-
-#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)
-  _Unwind_Word private_[6];
-#else
-  _Unwind_Word private_1;
-  _Unwind_Word private_2;
-#endif
-
-  /* @@@ The IA-64 ABI says that this structure must be double-word aligned.
-     Taking that literally does not make much sense generically.  Instead we
-     provide the maximum alignment required by any type for the machine.  */
-} __attribute__((__aligned__));
-
-
-/* The ACTIONS argument to the personality routine is a bitwise OR of one
-   or more of the following constants.  */
-typedef int _Unwind_Action;
-
-#define _UA_SEARCH_PHASE	1
-#define _UA_CLEANUP_PHASE	2
-#define _UA_HANDLER_FRAME	4
-#define _UA_FORCE_UNWIND	8
-#define _UA_END_OF_STACK	16
-
-/* The target can override this macro to define any back-end-specific
-   attributes required for the lowest-level stack frame.  */
-#ifndef LIBGCC2_UNWIND_ATTRIBUTE
-#define LIBGCC2_UNWIND_ATTRIBUTE
-#endif
-
-/* This is an opaque type used to refer to a system-specific data
-   structure used by the system unwinder. This context is created and
-   destroyed by the system, and passed to the personality routine
-   during unwinding.  */
-struct _Unwind_Context;
-
-/* Raise an exception, passing along the given exception object.  */
-extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_RaiseException (struct _Unwind_Exception *);
-
-/* Raise an exception for forced unwinding.  */
-
-typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
-     (int, _Unwind_Action, _Unwind_Exception_Class,
-      struct _Unwind_Exception *, struct _Unwind_Context *, void *);
-
-extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_ForcedUnwind (struct _Unwind_Exception *, _Unwind_Stop_Fn, void *);
-
-/* Helper to invoke the exception_cleanup routine.  */
-extern void _Unwind_DeleteException (struct _Unwind_Exception *);
-
-/* Resume propagation of an existing exception.  This is used after
-   e.g. executing cleanup code, and not to implement rethrowing.  */
-extern void LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_Resume (struct _Unwind_Exception *);
-
-/* @@@ Resume propagation of a FORCE_UNWIND exception, or to rethrow
-   a normal exception that was handled.  */
-extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_Resume_or_Rethrow (struct _Unwind_Exception *);
-
-/* @@@ Use unwind data to perform a stack backtrace.  The trace callback
-   is called for every stack frame in the call chain, but no cleanup
-   actions are performed.  */
-typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)
-     (struct _Unwind_Context *, void *);
-
-extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_Backtrace (_Unwind_Trace_Fn, void *);
-
-/* These functions are used for communicating information about the unwind
-   context (i.e. the unwind descriptors and the user register state) between
-   the unwind library and the personality routine and landing pad.  Only
-   selected registers may be manipulated.  */
-
-extern _Unwind_Word _Unwind_GetGR (struct _Unwind_Context *, int);
-extern void _Unwind_SetGR (struct _Unwind_Context *, int, _Unwind_Word);
-
-extern _Unwind_Ptr _Unwind_GetIP (struct _Unwind_Context *);
-extern _Unwind_Ptr _Unwind_GetIPInfo (struct _Unwind_Context *, int *);
-extern void _Unwind_SetIP (struct _Unwind_Context *, _Unwind_Ptr);
-
-/* @@@ Retrieve the CFA of the given context.  */
-extern _Unwind_Word _Unwind_GetCFA (struct _Unwind_Context *);
-
-extern void *_Unwind_GetLanguageSpecificData (struct _Unwind_Context *);
-
-extern _Unwind_Ptr _Unwind_GetRegionStart (struct _Unwind_Context *);
-
-
-/* The personality routine is the function in the C++ (or other language)
-   runtime library which serves as an interface between the system unwind
-   library and language-specific exception handling semantics.  It is
-   specific to the code fragment described by an unwind info block, and
-   it is always referenced via the pointer in the unwind info block, and
-   hence it has no ABI-specified name.
-
-   Note that this implies that two different C++ implementations can
-   use different names, and have different contents in the language
-   specific data area.  Moreover, that the language specific data
-   area contains no version info because name of the function invoked
-   provides more effective versioning by detecting at link time the
-   lack of code to handle the different data format.  */
-
-typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)
-     (int, _Unwind_Action, _Unwind_Exception_Class,
-      struct _Unwind_Exception *, struct _Unwind_Context *);
-
-/* @@@ The following alternate entry points are for setjmp/longjmp
-   based unwinding.  */
-
-struct SjLj_Function_Context;
-extern void _Unwind_SjLj_Register (struct SjLj_Function_Context *);
-extern void _Unwind_SjLj_Unregister (struct SjLj_Function_Context *);
-
-extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_SjLj_RaiseException (struct _Unwind_Exception *);
-extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_SjLj_ForcedUnwind (struct _Unwind_Exception *, _Unwind_Stop_Fn, void *);
-extern void LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_SjLj_Resume (struct _Unwind_Exception *);
-extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE
-_Unwind_SjLj_Resume_or_Rethrow (struct _Unwind_Exception *);
-
-/* @@@ The following provide access to the base addresses for text
-   and data-relative addressing in the LDSA.  In order to stay link
-   compatible with the standard ABI for IA-64, we inline these.  */
-
-#ifdef __ia64__
-#include <stdlib.h>
-
-static inline _Unwind_Ptr
-_Unwind_GetDataRelBase (struct _Unwind_Context *_C)
-{
-  /* The GP is stored in R1.  */
-  return _Unwind_GetGR (_C, 1);
-}
-
-static inline _Unwind_Ptr
-_Unwind_GetTextRelBase (struct _Unwind_Context *_C __attribute__ ((__unused__)))
-{
-  abort ();
-  return 0;
-}
-
-/* @@@ Retrieve the Backing Store Pointer of the given context.  */
-extern _Unwind_Word _Unwind_GetBSP (struct _Unwind_Context *);
-#else
-extern _Unwind_Ptr _Unwind_GetDataRelBase (struct _Unwind_Context *);
-extern _Unwind_Ptr _Unwind_GetTextRelBase (struct _Unwind_Context *);
-#endif
-
-/* @@@ Given an address, return the entry point of the function that
-   contains it.  */
-extern void * _Unwind_FindEnclosingFunction (void *pc);
-
-#ifndef __SIZEOF_LONG__
-  #error "__SIZEOF_LONG__ macro not defined"
-#endif
-
-#ifndef __SIZEOF_POINTER__
-  #error "__SIZEOF_POINTER__ macro not defined"
-#endif
-
-
-/* leb128 type numbers have a potentially unlimited size.
-   The target of the following definitions of _sleb128_t and _uleb128_t
-   is to have efficient data types large enough to hold the leb128 type
-   numbers used in the unwind code.
-   Mostly these types will simply be defined to long and unsigned long
-   except when a unsigned long data type on the target machine is not
-   capable of storing a pointer.  */
-
-#if __SIZEOF_LONG__ >= __SIZEOF_POINTER__
-  typedef long _sleb128_t;
-  typedef unsigned long _uleb128_t;
-#elif __SIZEOF_LONG_LONG__ >= __SIZEOF_POINTER__
-  typedef long long _sleb128_t;
-  typedef unsigned long long _uleb128_t;
-#else
-# error "What type shall we use for _sleb128_t?"
-#endif
-
-#if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__)
-/* Handles the mapping from SEH to GCC interfaces.  */
-EXCEPTION_DISPOSITION _GCC_specific_handler (PEXCEPTION_RECORD, void *,
-					     PCONTEXT, PDISPATCHER_CONTEXT,
-					     _Unwind_Personality_Fn);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#ifndef HIDE_EXPORTS
-#pragma GCC visibility pop
-#endif
-
-#endif /* unwind.h */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/varargs.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/varargs.h	(revision 1046)
+++ 	(revision )
@@ -1,7 +1,0 @@
-#ifndef _VARARGS_H
-#define _VARARGS_H
-
-#error "GCC no longer implements <varargs.h>."
-#error "Revise your code to use <stdarg.h>."
-
-#endif
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/wmmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/wmmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,132 +1,0 @@
-/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 10.1.  */
-
-#ifndef _WMMINTRIN_H_INCLUDED
-#define _WMMINTRIN_H_INCLUDED
-
-/* We need definitions from the SSE2 header file.  */
-#include <emmintrin.h>
-
-/* AES */
-
-#ifndef __AES__
-#pragma GCC push_options
-#pragma GCC target("aes")
-#define __DISABLE_AES__
-#endif /* __AES__ */
-
-/* Performs 1 round of AES decryption of the first m128i using 
-   the second m128i as a round key.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesdec_si128 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
-}
-
-/* Performs the last round of AES decryption of the first m128i 
-   using the second m128i as a round key.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
-						 (__v2di)__Y);
-}
-
-/* Performs 1 round of AES encryption of the first m128i using 
-   the second m128i as a round key.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesenc_si128 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
-}
-
-/* Performs the last round of AES encryption of the first m128i
-   using the second m128i as a round key.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
-{
-  return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
-}
-
-/* Performs the InverseMixColumn operation on the source m128i 
-   and stores the result into m128i destination.  */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesimc_si128 (__m128i __X)
-{
-  return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
-}
-
-/* Generates a m128i round key for the input m128i AES cipher key and
-   byte round constant.  The second parameter must be a compile time
-   constant.  */
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
-{
-  return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
-}
-#else
-#define _mm_aeskeygenassist_si128(X, C)					\
-  ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X),	\
-						(int)(C)))
-#endif
-
-#ifdef __DISABLE_AES__
-#undef __DISABLE_AES__
-#pragma GCC pop_options
-#endif /* __DISABLE_AES__ */
-
-/* PCLMUL */
-
-#ifndef __PCLMUL__
-#pragma GCC push_options
-#pragma GCC target("pclmul")
-#define __DISABLE_PCLMUL__
-#endif /* __PCLMUL__ */
-
-/* Performs carry-less integer multiplication of 64-bit halves of
-   128-bit input operands.  The third parameter inducates which 64-bit
-   haves of the input parameters v1 and v2 should be used. It must be
-   a compile time constant.  */
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
-{
-  return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
-						(__v2di)__Y, __I);
-}
-#else
-#define _mm_clmulepi64_si128(X, Y, I)					\
-  ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X),		\
-					  (__v2di)(__m128i)(Y), (int)(I)))
-#endif
-
-#ifdef __DISABLE_PCLMUL__
-#undef __DISABLE_PCLMUL__
-#pragma GCC pop_options
-#endif /* __DISABLE_PCLMUL__ */
-
-#endif /* _WMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/x86intrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/x86intrin.h	(revision 1046)
+++ 	(revision )
@@ -1,89 +1,0 @@
-/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _X86INTRIN_H_INCLUDED
-#define _X86INTRIN_H_INCLUDED
-
-#include <ia32intrin.h>
-
-#include <mmintrin.h>
-
-#include <xmmintrin.h>
-
-#include <emmintrin.h>
-
-#include <pmmintrin.h>
-
-#include <tmmintrin.h>
-
-#include <ammintrin.h>
-
-#include <smmintrin.h>
-
-#include <wmmintrin.h>
-
-/* For including AVX instructions */
-#include <immintrin.h>
-
-#include <mm3dnow.h>
-
-#include <fma4intrin.h>
-
-#include <xopintrin.h>
-
-#include <lwpintrin.h>
-
-#include <bmiintrin.h>
-
-#include <bmi2intrin.h>
-
-#include <tbmintrin.h>
-
-#include <lzcntintrin.h>
-
-#include <popcntintrin.h>
-
-#include <rdseedintrin.h>
-
-#include <prfchwintrin.h>
-
-#include <fxsrintrin.h>
-
-#include <xsaveintrin.h>
-
-#include <xsaveoptintrin.h>
-
-#include <adxintrin.h>
-
-#include <clwbintrin.h>
-
-#include <pcommitintrin.h>
-
-#include <clflushoptintrin.h>
-
-#include <xsavesintrin.h>
-
-#include <xsavecintrin.h>
-
-#include <mwaitxintrin.h>
-#endif /* _X86INTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xmmintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xmmintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,1267 +1,0 @@
-/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 9.0.  */
-
-#ifndef _XMMINTRIN_H_INCLUDED
-#define _XMMINTRIN_H_INCLUDED
-
-/* We need type definitions from the MMX header file.  */
-#include <mmintrin.h>
-
-/* Get _mm_malloc () and _mm_free ().  */
-#include <mm_malloc.h>
-
-/* Constants for use with _mm_prefetch.  */
-enum _mm_hint
-{
-  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
-  _MM_HINT_ET0 = 7,
-  _MM_HINT_ET1 = 6,
-  _MM_HINT_T0 = 3,
-  _MM_HINT_T1 = 2,
-  _MM_HINT_T2 = 1,
-  _MM_HINT_NTA = 0
-};
-
-/* Loads one cache line from address P to a location "closer" to the
-   processor.  The selector I specifies the type of prefetch operation.  */
-#ifdef __OPTIMIZE__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_prefetch (const void *__P, enum _mm_hint __I)
-{
-  __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
-}
-#else
-#define _mm_prefetch(P, I) \
-  __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
-#endif
-
-#ifndef __SSE__
-#pragma GCC push_options
-#pragma GCC target("sse")
-#define __DISABLE_SSE__
-#endif /* __SSE__ */
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
-
-/* Internal data types for implementing the intrinsics.  */
-typedef float __v4sf __attribute__ ((__vector_size__ (16)));
-
-/* Create a selector for use with the SHUFPS instruction.  */
-#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
- (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
-
-/* Bits in the MXCSR.  */
-#define _MM_EXCEPT_MASK       0x003f
-#define _MM_EXCEPT_INVALID    0x0001
-#define _MM_EXCEPT_DENORM     0x0002
-#define _MM_EXCEPT_DIV_ZERO   0x0004
-#define _MM_EXCEPT_OVERFLOW   0x0008
-#define _MM_EXCEPT_UNDERFLOW  0x0010
-#define _MM_EXCEPT_INEXACT    0x0020
-
-#define _MM_MASK_MASK         0x1f80
-#define _MM_MASK_INVALID      0x0080
-#define _MM_MASK_DENORM       0x0100
-#define _MM_MASK_DIV_ZERO     0x0200
-#define _MM_MASK_OVERFLOW     0x0400
-#define _MM_MASK_UNDERFLOW    0x0800
-#define _MM_MASK_INEXACT      0x1000
-
-#define _MM_ROUND_MASK        0x6000
-#define _MM_ROUND_NEAREST     0x0000
-#define _MM_ROUND_DOWN        0x2000
-#define _MM_ROUND_UP          0x4000
-#define _MM_ROUND_TOWARD_ZERO 0x6000
-
-#define _MM_FLUSH_ZERO_MASK   0x8000
-#define _MM_FLUSH_ZERO_ON     0x8000
-#define _MM_FLUSH_ZERO_OFF    0x0000
-
-/* Create an undefined vector.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_undefined_ps (void)
-{
-  __m128 __Y = __Y;
-  return __Y;
-}
-
-/* Create a vector of zeros.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_ps (void)
-{
-  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
-}
-
-/* Perform the respective operation on the lower SPFP (single-precision
-   floating-point) values of A and B; the upper three SPFP values are
-   passed through from A.  */
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_ss (__m128 __A)
-{
-  return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp_ss (__m128 __A)
-{
-  return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt_ss (__m128 __A)
-{
-  return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Perform the respective operation on the four SPFP values in A and B.  */
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A + (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A - (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A * (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A / (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_ps (__m128 __A)
-{
-  return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp_ps (__m128 __A)
-{
-  return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt_ps (__m128 __A)
-{
-  return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Perform logical bit-wise operations on 128-bit values.  */
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_and_ps (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_andps (__A, __B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_andnot_ps (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_andnps (__A, __B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_or_ps (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_orps (__A, __B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_xor_ps (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_xorps (__A, __B);
-}
-
-/* Perform a comparison on the lower SPFP values of A and B.  If the
-   comparison is true, place a mask of all ones in the result, otherwise a
-   mask of zeros.  The upper three SPFP values are passed through from A.  */
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
-					(__v4sf)
-					__builtin_ia32_cmpltss ((__v4sf) __B,
-								(__v4sf)
-								__A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
-					(__v4sf)
-					__builtin_ia32_cmpless ((__v4sf) __B,
-								(__v4sf)
-								__A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnlt_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnle_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpngt_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
-					(__v4sf)
-					__builtin_ia32_cmpnltss ((__v4sf) __B,
-								 (__v4sf)
-								 __A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnge_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
-					(__v4sf)
-					__builtin_ia32_cmpnless ((__v4sf) __B,
-								 (__v4sf)
-								 __A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpord_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpunord_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Perform a comparison on the four SPFP values of A and B.  For each
-   element, if the comparison is true, place a mask of all ones in the
-   result, otherwise a mask of zeros.  */
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnlt_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnle_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpngt_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnge_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpord_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpunord_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Compare the lower SPFP values of A and B and return 1 if true
-   and 0 if false.  */
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comieq_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comilt_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comile_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comigt_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comige_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comineq_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomieq_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomilt_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomile_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomigt_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomige_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomineq_ss (__m128 __A, __m128 __B)
-{
-  return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Convert the lower SPFP value to a 32-bit integer according to the current
-   rounding mode.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_si32 (__m128 __A)
-{
-  return __builtin_ia32_cvtss2si ((__v4sf) __A);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_ss2si (__m128 __A)
-{
-  return _mm_cvtss_si32 (__A);
-}
-
-#ifdef __x86_64__
-/* Convert the lower SPFP value to a 32-bit integer according to the
-   current rounding mode.  */
-
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_si64 (__m128 __A)
-{
-  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_si64x (__m128 __A)
-{
-  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
-}
-#endif
-
-/* Convert the two lower SPFP values to 32-bit integers according to the
-   current rounding mode.  Return the integers in packed form.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_pi32 (__m128 __A)
-{
-  return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_ps2pi (__m128 __A)
-{
-  return _mm_cvtps_pi32 (__A);
-}
-
-/* Truncate the lower SPFP value to a 32-bit integer.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_si32 (__m128 __A)
-{
-  return __builtin_ia32_cvttss2si ((__v4sf) __A);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_ss2si (__m128 __A)
-{
-  return _mm_cvttss_si32 (__A);
-}
-
-#ifdef __x86_64__
-/* Truncate the lower SPFP value to a 32-bit integer.  */
-
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_si64 (__m128 __A)
-{
-  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_si64x (__m128 __A)
-{
-  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
-}
-#endif
-
-/* Truncate the two lower SPFP values to 32-bit integers.  Return the
-   integers in packed form.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttps_pi32 (__m128 __A)
-{
-  return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_ps2pi (__m128 __A)
-{
-  return _mm_cvttps_pi32 (__A);
-}
-
-/* Convert B to a SPFP value and insert it as element zero in A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi32_ss (__m128 __A, int __B)
-{
-  return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_si2ss (__m128 __A, int __B)
-{
-  return _mm_cvtsi32_ss (__A, __B);
-}
-
-#ifdef __x86_64__
-/* Convert B to a SPFP value and insert it as element zero in A.  */
-
-/* Intel intrinsic.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64_ss (__m128 __A, long long __B)
-{
-  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64x_ss (__m128 __A, long long __B)
-{
-  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
-}
-#endif
-
-/* Convert the two 32-bit values in B to SPFP form and insert them
-   as the two lower elements in A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi32_ps (__m128 __A, __m64 __B)
-{
-  return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_pi2ps (__m128 __A, __m64 __B)
-{
-  return _mm_cvtpi32_ps (__A, __B);
-}
-
-/* Convert the four signed 16-bit values in A to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi16_ps (__m64 __A)
-{
-  __v4hi __sign;
-  __v2si __hisi, __losi;
-  __v4sf __zero, __ra, __rb;
-
-  /* This comparison against zero gives us a mask that can be used to
-     fill in the missing sign bits in the unpack operations below, so
-     that we get signed values after unpacking.  */
-  __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
-
-  /* Convert the four words to doublewords.  */
-  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
-  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
-
-  /* Convert the doublewords to floating point two at a time.  */
-  __zero = (__v4sf) _mm_setzero_ps ();
-  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
-  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
-
-  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
-}
-
-/* Convert the four unsigned 16-bit values in A to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpu16_ps (__m64 __A)
-{
-  __v2si __hisi, __losi;
-  __v4sf __zero, __ra, __rb;
-
-  /* Convert the four words to doublewords.  */
-  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
-  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
-
-  /* Convert the doublewords to floating point two at a time.  */
-  __zero = (__v4sf) _mm_setzero_ps ();
-  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
-  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
-
-  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
-}
-
-/* Convert the low four signed 8-bit values in A to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi8_ps (__m64 __A)
-{
-  __v8qi __sign;
-
-  /* This comparison against zero gives us a mask that can be used to
-     fill in the missing sign bits in the unpack operations below, so
-     that we get signed values after unpacking.  */
-  __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
-
-  /* Convert the four low bytes to words.  */
-  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
-
-  return _mm_cvtpi16_ps(__A);
-}
-
-/* Convert the low four unsigned 8-bit values in A to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpu8_ps(__m64 __A)
-{
-  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
-  return _mm_cvtpu16_ps(__A);
-}
-
-/* Convert the four signed 32-bit values in A and B to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
-{
-  __v4sf __zero = (__v4sf) _mm_setzero_ps ();
-  __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
-  __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B);
-  return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
-}
-
-/* Convert the four SPFP values in A to four signed 16-bit integers.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_pi16(__m128 __A)
-{
-  __v4sf __hisf = (__v4sf)__A;
-  __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
-  __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
-  __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
-  return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
-}
-
-/* Convert the four SPFP values in A to four signed 8-bit integers.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_pi8(__m128 __A)
-{
-  __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
-  return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
-}
-
-/* Selects four specific SPFP values from A and B based on MASK.  */
-#ifdef __OPTIMIZE__
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
-{
-  return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
-}
-#else
-#define _mm_shuffle_ps(A, B, MASK)					\
-  ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A),			\
-				   (__v4sf)(__m128)(B), (int)(MASK)))
-#endif
-
-/* Selects and interleaves the upper two SPFP values from A and B.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Selects and interleaves the lower two SPFP values from A and B.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Sets the upper two SPFP values with 64-bits of data loaded from P;
-   the lower two values are passed through from A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadh_pi (__m128 __A, __m64 const *__P)
-{
-  return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
-}
-
-/* Stores the upper two SPFP values of A into P.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storeh_pi (__m64 *__P, __m128 __A)
-{
-  __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A);
-}
-
-/* Moves the upper two values of B into the lower two values of A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movehl_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Moves the lower two values of B into the upper two values of A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movelh_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Sets the lower two SPFP values with 64-bits of data loaded from P;
-   the upper two values are passed through from A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadl_pi (__m128 __A, __m64 const *__P)
-{
-  return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
-}
-
-/* Stores the lower two SPFP values of A into P.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storel_pi (__m64 *__P, __m128 __A)
-{
-  __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A);
-}
-
-/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_ps (__m128 __A)
-{
-  return __builtin_ia32_movmskps ((__v4sf)__A);
-}
-
-/* Return the contents of the control register.  */
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getcsr (void)
-{
-  return __builtin_ia32_stmxcsr ();
-}
-
-/* Read exception bits from the control register.  */
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_GET_EXCEPTION_STATE (void)
-{
-  return _mm_getcsr() & _MM_EXCEPT_MASK;
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_GET_EXCEPTION_MASK (void)
-{
-  return _mm_getcsr() & _MM_MASK_MASK;
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_GET_ROUNDING_MODE (void)
-{
-  return _mm_getcsr() & _MM_ROUND_MASK;
-}
-
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_GET_FLUSH_ZERO_MODE (void)
-{
-  return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
-}
-
-/* Set the control register to I.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setcsr (unsigned int __I)
-{
-  __builtin_ia32_ldmxcsr (__I);
-}
-
-/* Set exception bits in the control register.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_SET_EXCEPTION_STATE(unsigned int __mask)
-{
-  _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_SET_EXCEPTION_MASK (unsigned int __mask)
-{
-  _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_SET_ROUNDING_MODE (unsigned int __mode)
-{
-  _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
-{
-  _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
-}
-
-/* Create a vector with element 0 as F and the rest zero.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_ss (float __F)
-{
-  return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
-}
-
-/* Create a vector with all four elements equal to F.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_ps (float __F)
-{
-  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_ps1 (float __F)
-{
-  return _mm_set1_ps (__F);
-}
-
-/* Create a vector with element 0 as *P and the rest zero.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_ss (float const *__P)
-{
-  return _mm_set_ss (*__P);
-}
-
-/* Create a vector with all four elements equal to *P.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load1_ps (float const *__P)
-{
-  return _mm_set1_ps (*__P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_ps1 (float const *__P)
-{
-  return _mm_load1_ps (__P);
-}
-
-/* Load four SPFP values from P.  The address must be 16-byte aligned.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_ps (float const *__P)
-{
-  return (__m128) *(__v4sf *)__P;
-}
-
-/* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadu_ps (float const *__P)
-{
-  return (__m128) __builtin_ia32_loadups (__P);
-}
-
-/* Load four SPFP values in reverse order.  The address must be aligned.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadr_ps (float const *__P)
-{
-  __v4sf __tmp = *(__v4sf *)__P;
-  return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
-}
-
-/* Create the vector [Z Y X W].  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
-{
-  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
-}
-
-/* Create the vector [W X Y Z].  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_ps (float __Z, float __Y, float __X, float __W)
-{
-  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
-}
-
-/* Stores the lower SPFP value.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_ss (float *__P, __m128 __A)
-{
-  *__P = ((__v4sf)__A)[0];
-}
-
-extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_f32 (__m128 __A)
-{
-  return ((__v4sf)__A)[0];
-}
-
-/* Store four SPFP values.  The address must be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_ps (float *__P, __m128 __A)
-{
-  *(__v4sf *)__P = (__v4sf)__A;
-}
-
-/* Store four SPFP values.  The address need not be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storeu_ps (float *__P, __m128 __A)
-{
-  __builtin_ia32_storeups (__P, (__v4sf)__A);
-}
-
-/* Store the lower SPFP value across four words.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store1_ps (float *__P, __m128 __A)
-{
-  __v4sf __va = (__v4sf)__A;
-  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
-  _mm_storeu_ps (__P, __tmp);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_ps1 (float *__P, __m128 __A)
-{
-  _mm_store1_ps (__P, __A);
-}
-
-/* Store four SPFP values in reverse order.  The address must be aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storer_ps (float *__P, __m128 __A)
-{
-  __v4sf __va = (__v4sf)__A;
-  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
-  _mm_store_ps (__P, __tmp);
-}
-
-/* Sets the low SPFP value of A from the low value of B.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_move_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
-}
-
-/* Extracts one of the four words of A.  The selector N must be immediate.  */
-#ifdef __OPTIMIZE__
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_pi16 (__m64 const __A, int const __N)
-{
-  return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pextrw (__m64 const __A, int const __N)
-{
-  return _mm_extract_pi16 (__A, __N);
-}
-#else
-#define _mm_extract_pi16(A, N)	\
-  ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
-
-#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
-#endif
-
-/* Inserts word D into one of four words of A.  The selector N must be
-   immediate.  */
-#ifdef __OPTIMIZE__
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
-{
-  return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pinsrw (__m64 const __A, int const __D, int const __N)
-{
-  return _mm_insert_pi16 (__A, __D, __N);
-}
-#else
-#define _mm_insert_pi16(A, D, N)				\
-  ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A),	\
-					(int)(D), (int)(N)))
-
-#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
-#endif
-
-/* Compute the element-wise maximum of signed 16-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_pi16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmaxsw (__m64 __A, __m64 __B)
-{
-  return _mm_max_pi16 (__A, __B);
-}
-
-/* Compute the element-wise maximum of unsigned 8-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmaxub (__m64 __A, __m64 __B)
-{
-  return _mm_max_pu8 (__A, __B);
-}
-
-/* Compute the element-wise minimum of signed 16-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_pi16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pminsw (__m64 __A, __m64 __B)
-{
-  return _mm_min_pi16 (__A, __B);
-}
-
-/* Compute the element-wise minimum of unsigned 8-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pminub (__m64 __A, __m64 __B)
-{
-  return _mm_min_pu8 (__A, __B);
-}
-
-/* Create an 8-bit mask of the signs of 8-bit values.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_pi8 (__m64 __A)
-{
-  return __builtin_ia32_pmovmskb ((__v8qi)__A);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmovmskb (__m64 __A)
-{
-  return _mm_movemask_pi8 (__A);
-}
-
-/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
-   in B and produce the high 16 bits of the 32-bit results.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmulhuw (__m64 __A, __m64 __B)
-{
-  return _mm_mulhi_pu16 (__A, __B);
-}
-
-/* Return a combination of the four 16-bit values in A.  The selector
-   must be an immediate.  */
-#ifdef __OPTIMIZE__
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi16 (__m64 __A, int const __N)
-{
-  return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pshufw (__m64 __A, int const __N)
-{
-  return _mm_shuffle_pi16 (__A, __N);
-}
-#else
-#define _mm_shuffle_pi16(A, N) \
-  ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
-
-#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
-#endif
-
-/* Conditionally store byte elements of A into P.  The high bit of each
-   byte in the selector N determines whether the corresponding byte from
-   A is stored.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
-{
-  __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_maskmovq (__m64 __A, __m64 __N, char *__P)
-{
-  _mm_maskmove_si64 (__A, __N, __P);
-}
-
-/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_avg_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pavgb (__m64 __A, __m64 __B)
-{
-  return _mm_avg_pu8 (__A, __B);
-}
-
-/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_avg_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pavgw (__m64 __A, __m64 __B)
-{
-  return _mm_avg_pu16 (__A, __B);
-}
-
-/* Compute the sum of the absolute differences of the unsigned 8-bit
-   values in A and B.  Return the value in the lower 16-bit word; the
-   upper words are cleared.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sad_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psadbw (__m64 __A, __m64 __B)
-{
-  return _mm_sad_pu8 (__A, __B);
-}
-
-/* Stores the data in A to the address P without polluting the caches.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_pi (__m64 *__P, __m64 __A)
-{
-  __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
-}
-
-/* Likewise.  The address must be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_ps (float *__P, __m128 __A)
-{
-  __builtin_ia32_movntps (__P, (__v4sf)__A);
-}
-
-/* Guarantees that every preceding store is globally visible before
-   any subsequent store.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sfence (void)
-{
-  __builtin_ia32_sfence ();
-}
-
-/* Transpose the 4x4 matrix composed of row[0-3].  */
-#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
-do {									\
-  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
-  __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1);			\
-  __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3);			\
-  __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1);			\
-  __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3);			\
-  (row0) = __builtin_ia32_movlhps (__t0, __t1);				\
-  (row1) = __builtin_ia32_movhlps (__t1, __t0);				\
-  (row2) = __builtin_ia32_movlhps (__t2, __t3);				\
-  (row3) = __builtin_ia32_movhlps (__t3, __t2);				\
-} while (0)
-
-/* For backward source compatibility.  */
-# include <emmintrin.h>
-
-#ifdef __DISABLE_SSE__
-#undef __DISABLE_SSE__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE__ */
-
-/* The execution of the next instruction is delayed by an implementation
-   specific amount of time.  The instruction does not modify the
-   architectural state.  This is after the pop_options pragma because
-   it does not require SSE support in the processor--the encoding is a
-   nop on processors that do not support it.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_pause (void)
-{
-  __builtin_ia32_pause ();
-}
-
-#endif /* _XMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xopintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xopintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,844 +1,0 @@
-/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _XOPMMINTRIN_H_INCLUDED
-#define _XOPMMINTRIN_H_INCLUDED
-
-#include <fma4intrin.h>
-
-#ifndef __XOP__
-#pragma GCC push_options
-#pragma GCC target("xop")
-#define __DISABLE_XOP__
-#endif /* __XOP__ */
-
-/* Integer multiply/add intructions. */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
-{
-  return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
-{
-  return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
-}
-
-/* Packed Integer Horizontal Add and Subtract */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddw_epi8(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddd_epi8(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddq_epi8(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddd_epi16(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddq_epi16(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddq_epi32(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddw_epu8(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddd_epu8(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddq_epu8(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddd_epu16(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddq_epu16(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_haddq_epu32(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsubw_epi8(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsubd_epi16(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_hsubq_epi32(__m128i __A)
-{
-  return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
-}
-
-/* Vector conditional move and permute */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
-{
-  return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
-}
-
-/* Packed Integer Rotates and Shifts
-   Rotates - Non-Immediate form */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rot_epi8(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rot_epi16(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rot_epi32(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rot_epi64(__m128i __A,  __m128i __B)
-{
-  return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
-}
-
-/* Rotates - Immediate form */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roti_epi8(__m128i __A, const int __B)
-{
-  return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roti_epi16(__m128i __A, const int __B)
-{
-  return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roti_epi32(__m128i __A, const int __B)
-{
-  return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roti_epi64(__m128i __A, const int __B)
-{
-  return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
-}
-#else
-#define _mm_roti_epi8(A, N) \
-  ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
-#define _mm_roti_epi16(A, N) \
-  ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
-#define _mm_roti_epi32(A, N) \
-  ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
-#define _mm_roti_epi64(A, N) \
-  ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
-#endif
-
-/* Shifts */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shl_epi8(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shl_epi16(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shl_epi32(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shl_epi64(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
-}
-
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha_epi8(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha_epi16(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha_epi32(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sha_epi64(__m128i __A,  __m128i __B)
-{
-  return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
-}
-
-/* Compare and Predicate Generation
-   pcom (integer, unsinged bytes) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epu8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
-}
-
-/*pcom (integer, unsinged words) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epu16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
-}
-
-/*pcom (integer, unsinged double words) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epu32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
-}
-
-/*pcom (integer, unsinged quad words) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epu64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
-}
-
-/*pcom (integer, signed bytes) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epi8(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
-}
-
-/*pcom (integer, signed words) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epi16(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
-}
-
-/*pcom (integer, signed double words) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epi32(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
-}
-
-/*pcom (integer, signed quad words) */
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comfalse_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
-}
-
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comtrue_epi64(__m128i __A, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
-}
-
-/* FRCZ */
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_frcz_ps (__m128 __A)
-{
-  return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_frcz_pd (__m128d __A)
-{
-  return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_frcz_ss (__m128 __A, __m128 __B)
-{
-  return (__m128) __builtin_ia32_movss ((__v4sf)__A,
-					(__v4sf)
-					__builtin_ia32_vfrczss ((__v4sf)__B));
-}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_frcz_sd (__m128d __A, __m128d __B)
-{
-  return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
-					 (__v2df)
-					 __builtin_ia32_vfrczsd ((__v2df)__B));
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_frcz_ps (__m256 __A)
-{
-  return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_frcz_pd (__m256d __A)
-{
-  return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
-}
-
-/* PERMIL2 */
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
-{
-  return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
-					      (__v2df)__Y,
-					      (__v2di)__C,
-					      __I);
-}
-
-extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
-{
-  return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
-						 (__v4df)__Y,
-						 (__v4di)__C,
-						 __I);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
-{
-  return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
-					     (__v4sf)__Y,
-					     (__v4si)__C,
-					     __I);
-}
-
-extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
-{
-  return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
-						(__v8sf)__Y,
-						(__v8si)__C,
-						__I);
-}
-#else
-#define _mm_permute2_pd(X, Y, C, I)					\
-  ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
-					(__v2df)(__m128d)(Y),		\
-					(__v2di)(__m128d)(C),		\
-					(int)(I)))
-
-#define _mm256_permute2_pd(X, Y, C, I)					\
-  ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
-					   (__v4df)(__m256d)(Y),	\
-					   (__v4di)(__m256d)(C),	\
-					   (int)(I)))
-
-#define _mm_permute2_ps(X, Y, C, I)					\
-  ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
-				       (__v4sf)(__m128)(Y),		\
-				       (__v4si)(__m128)(C),		\
-				       (int)(I)))
-
-#define _mm256_permute2_ps(X, Y, C, I)					\
-  ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
-					  (__v8sf)(__m256)(Y),  	\
-					  (__v8si)(__m256)(C),		\
- 					  (int)(I)))
-#endif /* __OPTIMIZE__ */
-
-#ifdef __DISABLE_XOP__
-#undef __DISABLE_XOP__
-#pragma GCC pop_options
-#endif /* __DISABLE_XOP__ */
-
-#endif /* _XOPMMINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsavecintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsavecintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,58 +1,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <xsavecintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _XSAVECINTRIN_H_INCLUDED
-#define _XSAVECINTRIN_H_INCLUDED
-
-#ifndef __XSAVEC__
-#pragma GCC push_options
-#pragma GCC target("xsavec")
-#define __DISABLE_XSAVEC__
-#endif /* __XSAVEC__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsavec (void *__P, long long __M)
-{
-  __builtin_ia32_xsavec (__P, __M);
-}
-
-#ifdef __x86_64__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsavec64 (void *__P, long long __M)
-{
-  __builtin_ia32_xsavec64 (__P, __M);
-}
-#endif
-
-#ifdef __DISABLE_XSAVEC__
-#undef __DISABLE_XSAVEC__
-#pragma GCC pop_options
-#endif /* __DISABLE_XSAVEC__ */
-
-#endif /* _XSAVECINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsaveintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsaveintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,72 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
-/* # error "Never use <xsaveintrin.h> directly; include <x86intrin.h> instead." */
-/* #endif */
-
-#ifndef _XSAVEINTRIN_H_INCLUDED
-#define _XSAVEINTRIN_H_INCLUDED
-
-#ifndef __XSAVE__
-#pragma GCC push_options
-#pragma GCC target("xsave")
-#define __DISABLE_XSAVE__
-#endif /* __XSAVE__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsave (void *__P, long long __M)
-{
-  return __builtin_ia32_xsave (__P, __M);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xrstor (void *__P, long long __M)
-{
-  return __builtin_ia32_xrstor (__P, __M);
-}
-
-#ifdef __x86_64__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsave64 (void *__P, long long __M)
-{
-  return __builtin_ia32_xsave64 (__P, __M);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xrstor64 (void *__P, long long __M)
-{
-  return __builtin_ia32_xrstor64 (__P, __M);
-}
-#endif
-
-#ifdef __DISABLE_XSAVE__
-#undef __DISABLE_XSAVE__
-#pragma GCC pop_options
-#endif /* __DISABLE_XSAVE__ */
-
-#endif /* _XSAVEINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsaveoptintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsaveoptintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,58 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */
-/* # error "Never use <xsaveoptintrin.h> directly; include <x86intrin.h> instead." */
-/* #endif */
-
-#ifndef _XSAVEOPTINTRIN_H_INCLUDED
-#define _XSAVEOPTINTRIN_H_INCLUDED
-
-#ifndef __XSAVEOPT__
-#pragma GCC push_options
-#pragma GCC target("xsaveopt")
-#define __DISABLE_XSAVEOPT__
-#endif /* __XSAVEOPT__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsaveopt (void *__P, long long __M)
-{
-  return __builtin_ia32_xsaveopt (__P, __M);
-}
-
-#ifdef __x86_64__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsaveopt64 (void *__P, long long __M)
-{
-  return __builtin_ia32_xsaveopt64 (__P, __M);
-}
-#endif
-
-#ifdef __DISABLE_XSAVEOPT__
-#undef __DISABLE_XSAVEOPT__
-#pragma GCC pop_options
-#endif /* __DISABLE_XSAVEOPT__ */
-
-#endif /* _XSAVEOPTINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsavesintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xsavesintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,72 +1,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _X86INTRIN_H_INCLUDED
-# error "Never use <xsavesintrin.h> directly; include <x86intrin.h> instead."
-#endif
-
-#ifndef _XSAVESINTRIN_H_INCLUDED
-#define _XSAVESINTRIN_H_INCLUDED
-
-#ifndef __XSAVES__
-#pragma GCC push_options
-#pragma GCC target("xsaves")
-#define __DISABLE_XSAVES__
-#endif /* __XSAVES__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsaves (void *__P, long long __M)
-{
-  __builtin_ia32_xsaves (__P, __M);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xrstors (void *__P, long long __M)
-{
-  __builtin_ia32_xrstors (__P, __M);
-}
-
-#ifdef __x86_64__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xrstors64 (void *__P, long long __M)
-{
-  __builtin_ia32_xrstors64 (__P, __M);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xsaves64 (void *__P, long long __M)
-{
-  __builtin_ia32_xsaves64 (__P, __M);
-}
-#endif
-
-#ifdef __DISABLE_XSAVES__
-#undef __DISABLE_XSAVES__
-#pragma GCC pop_options
-#endif /* __DISABLE_XSAVES__ */
-
-#endif /* _XSAVESINTRIN_H_INCLUDED */
Index: Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xtestintrin.h
===================================================================
--- Daodan/MinGW/lib/gcc/mingw32/5.3.0/include/xtestintrin.h	(revision 1046)
+++ 	(revision )
@@ -1,51 +1,0 @@
-/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _XTESTINTRIN_H_INCLUDED
-#define _XTESTINTRIN_H_INCLUDED
-
-#ifndef __RTM__
-#pragma GCC push_options
-#pragma GCC target("rtm")
-#define __DISABLE_RTM__
-#endif /* __RTM__ */
-
-/* Return non-zero if the instruction executes inside an RTM or HLE code
-   region.  Return zero otherwise.   */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xtest (void)
-{
-  return __builtin_ia32_xtest ();
-}
-
-#ifdef __DISABLE_RTM__
-#undef __DISABLE_RTM__
-#pragma GCC pop_options
-#endif /* __DISABLE_RTM__ */
-
-#endif /* _XTESTINTRIN_H_INCLUDED */
