| 1 | // Optimizations for random number extensions, x86 version -*- C++ -*-
 | 
|---|
| 2 | 
 | 
|---|
| 3 | // Copyright (C) 2012-2021 Free Software Foundation, Inc.
 | 
|---|
| 4 | //
 | 
|---|
| 5 | // This file is part of the GNU ISO C++ Library.  This library is free
 | 
|---|
| 6 | // software; you can redistribute it and/or modify it under the
 | 
|---|
| 7 | // terms of the GNU General Public License as published by the
 | 
|---|
| 8 | // Free Software Foundation; either version 3, or (at your option)
 | 
|---|
| 9 | // any later version.
 | 
|---|
| 10 | 
 | 
|---|
| 11 | // This library is distributed in the hope that it will be useful,
 | 
|---|
| 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
| 14 | // GNU General Public License for more details.
 | 
|---|
| 15 | 
 | 
|---|
| 16 | // Under Section 7 of GPL version 3, you are granted additional
 | 
|---|
| 17 | // permissions described in the GCC Runtime Library Exception, version
 | 
|---|
| 18 | // 3.1, as published by the Free Software Foundation.
 | 
|---|
| 19 | 
 | 
|---|
| 20 | // You should have received a copy of the GNU General Public License and
 | 
|---|
| 21 | // a copy of the GCC Runtime Library Exception along with this program;
 | 
|---|
| 22 | // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 | 
|---|
| 23 | // <http://www.gnu.org/licenses/>.
 | 
|---|
| 24 | 
 | 
|---|
| 25 | /** @file ext/random.tcc
 | 
|---|
| 26 |  *  This is an internal header file, included by other library headers.
 | 
|---|
| 27 |  *  Do not attempt to use it directly. @headername{ext/random}
 | 
|---|
| 28 |  */
 | 
|---|
| 29 | 
 | 
|---|
| 30 | #ifndef _EXT_OPT_RANDOM_H
 | 
|---|
| 31 | #define _EXT_OPT_RANDOM_H 1
 | 
|---|
| 32 | 
 | 
|---|
| 33 | #pragma GCC system_header
 | 
|---|
| 34 | 
 | 
|---|
| 35 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 | 
|---|
| 36 | 
 | 
|---|
| 37 | #ifdef __SSE2__
 | 
|---|
| 38 | 
 | 
|---|
| 39 | namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
 | 
|---|
| 40 | {
 | 
|---|
| 41 | _GLIBCXX_BEGIN_NAMESPACE_VERSION
 | 
|---|
| 42 | 
 | 
|---|
| 43 |   namespace {
 | 
|---|
| 44 | 
 | 
|---|
| 45 |     template<size_t __sl1, size_t __sl2, size_t __sr1, size_t __sr2,
 | 
|---|
| 46 |              uint32_t __msk1, uint32_t __msk2, uint32_t __msk3, uint32_t __msk4>
 | 
|---|
| 47 |       inline __m128i __sse2_recursion(__m128i __a, __m128i __b,
 | 
|---|
| 48 |                                       __m128i __c, __m128i __d)
 | 
|---|
| 49 |       {
 | 
|---|
| 50 |         __m128i __y = _mm_srli_epi32(__b, __sr1);
 | 
|---|
| 51 |         __m128i __z = _mm_srli_si128(__c, __sr2);
 | 
|---|
| 52 |         __m128i __v = _mm_slli_epi32(__d, __sl1);
 | 
|---|
| 53 |         __z = _mm_xor_si128(__z, __a);
 | 
|---|
| 54 |         __z = _mm_xor_si128(__z, __v);
 | 
|---|
| 55 |         __m128i __x = _mm_slli_si128(__a, __sl2);
 | 
|---|
| 56 |         __y = _mm_and_si128(__y, _mm_set_epi32(__msk4, __msk3, __msk2, __msk1));
 | 
|---|
| 57 |         __z = _mm_xor_si128(__z, __x);
 | 
|---|
| 58 |         return _mm_xor_si128(__z, __y);
 | 
|---|
| 59 |       }
 | 
|---|
| 60 | 
 | 
|---|
| 61 |   }
 | 
|---|
| 62 | 
 | 
|---|
| 63 | 
 | 
|---|
| 64 | #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ  1
 | 
|---|
| 65 |   template<typename _UIntType, size_t __m,
 | 
|---|
| 66 |            size_t __pos1, size_t __sl1, size_t __sl2,
 | 
|---|
| 67 |            size_t __sr1, size_t __sr2,
 | 
|---|
| 68 |            uint32_t __msk1, uint32_t __msk2,
 | 
|---|
| 69 |            uint32_t __msk3, uint32_t __msk4,
 | 
|---|
| 70 |            uint32_t __parity1, uint32_t __parity2,
 | 
|---|
| 71 |            uint32_t __parity3, uint32_t __parity4>
 | 
|---|
| 72 |     void simd_fast_mersenne_twister_engine<_UIntType, __m,
 | 
|---|
| 73 |                                            __pos1, __sl1, __sl2, __sr1, __sr2,
 | 
|---|
| 74 |                                            __msk1, __msk2, __msk3, __msk4,
 | 
|---|
| 75 |                                            __parity1, __parity2, __parity3,
 | 
|---|
| 76 |                                            __parity4>::
 | 
|---|
| 77 |     _M_gen_rand(void)
 | 
|---|
| 78 |     {
 | 
|---|
| 79 |       __m128i __r1 = _mm_load_si128(&_M_state[_M_nstate - 2]);
 | 
|---|
| 80 |       __m128i __r2 = _mm_load_si128(&_M_state[_M_nstate - 1]);
 | 
|---|
| 81 | 
 | 
|---|
| 82 |       size_t __i;
 | 
|---|
| 83 |       for (__i = 0; __i < _M_nstate - __pos1; ++__i)
 | 
|---|
| 84 |         {
 | 
|---|
| 85 |           __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
 | 
|---|
| 86 |                                          __msk1, __msk2, __msk3, __msk4>
 | 
|---|
| 87 |             (_M_state[__i], _M_state[__i + __pos1], __r1, __r2);
 | 
|---|
| 88 |           _mm_store_si128(&_M_state[__i], __r);
 | 
|---|
| 89 |           __r1 = __r2;
 | 
|---|
| 90 |           __r2 = __r;
 | 
|---|
| 91 |         }
 | 
|---|
| 92 |       for (; __i < _M_nstate; ++__i)
 | 
|---|
| 93 |         {
 | 
|---|
| 94 |           __m128i __r = __sse2_recursion<__sl1, __sl2, __sr1, __sr2,
 | 
|---|
| 95 |                                          __msk1, __msk2, __msk3, __msk4>
 | 
|---|
| 96 |             (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2);
 | 
|---|
| 97 |           _mm_store_si128(&_M_state[__i], __r);
 | 
|---|
| 98 |           __r1 = __r2;
 | 
|---|
| 99 |           __r2 = __r;
 | 
|---|
| 100 |         }
 | 
|---|
| 101 | 
 | 
|---|
| 102 |       _M_pos = 0;
 | 
|---|
| 103 |     }
 | 
|---|
| 104 | 
 | 
|---|
| 105 | 
 | 
|---|
| 106 | #define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL     1
 | 
|---|
| 107 |   template<typename _UIntType, size_t __m,
 | 
|---|
| 108 |            size_t __pos1, size_t __sl1, size_t __sl2,
 | 
|---|
| 109 |            size_t __sr1, size_t __sr2,
 | 
|---|
| 110 |            uint32_t __msk1, uint32_t __msk2,
 | 
|---|
| 111 |            uint32_t __msk3, uint32_t __msk4,
 | 
|---|
| 112 |            uint32_t __parity1, uint32_t __parity2,
 | 
|---|
| 113 |            uint32_t __parity3, uint32_t __parity4>
 | 
|---|
| 114 |     bool
 | 
|---|
| 115 |     operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
 | 
|---|
| 116 |                __m, __pos1, __sl1, __sl2, __sr1, __sr2,
 | 
|---|
| 117 |                __msk1, __msk2, __msk3, __msk4,
 | 
|---|
| 118 |                __parity1, __parity2, __parity3, __parity4>& __lhs,
 | 
|---|
| 119 |                const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType,
 | 
|---|
| 120 |                __m, __pos1, __sl1, __sl2, __sr1, __sr2,
 | 
|---|
| 121 |                __msk1, __msk2, __msk3, __msk4,
 | 
|---|
| 122 |                __parity1, __parity2, __parity3, __parity4>& __rhs)
 | 
|---|
| 123 |     {
 | 
|---|
| 124 |       __m128i __res = _mm_cmpeq_epi8(__lhs._M_state[0], __rhs._M_state[0]);
 | 
|---|
| 125 |       for (size_t __i = 1; __i < __lhs._M_nstate; ++__i)
 | 
|---|
| 126 |         __res = _mm_and_si128(__res, _mm_cmpeq_epi8(__lhs._M_state[__i],
 | 
|---|
| 127 |                                                     __rhs._M_state[__i]));
 | 
|---|
| 128 |       return (_mm_movemask_epi8(__res) == 0xffff
 | 
|---|
| 129 |               && __lhs._M_pos == __rhs._M_pos);
 | 
|---|
| 130 |     }
 | 
|---|
| 131 | 
 | 
|---|
| 132 | 
 | 
|---|
| 133 | _GLIBCXX_END_NAMESPACE_VERSION
 | 
|---|
| 134 | } // namespace
 | 
|---|
| 135 | 
 | 
|---|
| 136 | #endif // __SSE2__
 | 
|---|
| 137 | 
 | 
|---|
| 138 | #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 | 
|---|
| 139 | 
 | 
|---|
| 140 | #endif // _EXT_OPT_RANDOM_H
 | 
|---|