[567] | 1 | /**
|
---|
| 2 | * @file dSFMT.c
|
---|
| 3 | * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT)
|
---|
| 4 | * based on IEEE 754 format.
|
---|
| 5 | *
|
---|
| 6 | * @author Mutsuo Saito (Hiroshima University)
|
---|
| 7 | * @author Makoto Matsumoto (Hiroshima University)
|
---|
| 8 | *
|
---|
| 9 | * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima
|
---|
| 10 | * University. All rights reserved.
|
---|
| 11 | *
|
---|
| 12 | * The new BSD License is applied to this software, see LICENSE.txt
|
---|
| 13 | */
|
---|
| 14 | #include <stdio.h>
|
---|
| 15 | #include <string.h>
|
---|
| 16 | #include <stdlib.h>
|
---|
| 17 | #include "dSFMT-params.h"
|
---|
| 18 |
|
---|
| 19 | /** dsfmt internal state vector */
|
---|
| 20 | dsfmt_t dsfmt_global_data;
|
---|
| 21 | /** dsfmt mexp for check */
|
---|
| 22 | static const int dsfmt_mexp = DSFMT_MEXP;
|
---|
| 23 |
|
---|
| 24 | /*----------------
|
---|
| 25 | STATIC FUNCTIONS
|
---|
| 26 | ----------------*/
|
---|
| 27 | inline static uint32_t ini_func1(uint32_t x);
|
---|
| 28 | inline static uint32_t ini_func2(uint32_t x);
|
---|
| 29 | inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 30 | int size);
|
---|
| 31 | inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 32 | int size);
|
---|
| 33 | inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 34 | int size);
|
---|
| 35 | inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 36 | int size);
|
---|
| 37 | inline static int idxof(int i);
|
---|
| 38 | static void initial_mask(dsfmt_t *dsfmt);
|
---|
| 39 | static void period_certification(dsfmt_t *dsfmt);
|
---|
| 40 |
|
---|
| 41 | #if defined(HAVE_SSE2)
|
---|
| 42 | # include <emmintrin.h>
|
---|
| 43 | /** mask data for sse2 */
|
---|
| 44 | static __m128i sse2_param_mask;
|
---|
| 45 | /** 1 in 64bit for sse2 */
|
---|
| 46 | static __m128i sse2_int_one;
|
---|
| 47 | /** 2.0 double for sse2 */
|
---|
| 48 | static __m128d sse2_double_two;
|
---|
| 49 | /** -1.0 double for sse2 */
|
---|
| 50 | static __m128d sse2_double_m_one;
|
---|
| 51 |
|
---|
| 52 | static void setup_const(void);
|
---|
| 53 | #endif
|
---|
| 54 |
|
---|
| 55 | /**
|
---|
| 56 | * This function simulate a 32-bit array index overlapped to 64-bit
|
---|
| 57 | * array of LITTLE ENDIAN in BIG ENDIAN machine.
|
---|
| 58 | */
|
---|
| 59 | #if defined(DSFMT_BIG_ENDIAN)
|
---|
| 60 | inline static int idxof(int i) {
|
---|
| 61 | return i ^ 1;
|
---|
| 62 | }
|
---|
| 63 | #else
|
---|
| 64 | inline static int idxof(int i) {
|
---|
| 65 | return i;
|
---|
| 66 | }
|
---|
| 67 | #endif
|
---|
| 68 |
|
---|
| 69 | /**
|
---|
| 70 | * This function represents the recursion formula.
|
---|
| 71 | * @param r output
|
---|
| 72 | * @param a a 128-bit part of the internal state array
|
---|
| 73 | * @param b a 128-bit part of the internal state array
|
---|
| 74 | * @param lung a 128-bit part of the internal state array
|
---|
| 75 | */
|
---|
| 76 | #if defined(HAVE_ALTIVEC)
|
---|
| 77 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
|
---|
| 78 | w128_t *lung) {
|
---|
| 79 | const vector unsigned char sl1 = ALTI_SL1;
|
---|
| 80 | const vector unsigned char sl1_perm = ALTI_SL1_PERM;
|
---|
| 81 | const vector unsigned int sl1_msk = ALTI_SL1_MSK;
|
---|
| 82 | const vector unsigned char sr1 = ALTI_SR;
|
---|
| 83 | const vector unsigned char sr1_perm = ALTI_SR_PERM;
|
---|
| 84 | const vector unsigned int sr1_msk = ALTI_SR_MSK;
|
---|
| 85 | const vector unsigned char perm = ALTI_PERM;
|
---|
| 86 | const vector unsigned int msk1 = ALTI_MSK;
|
---|
| 87 | vector unsigned int w, x, y, z;
|
---|
| 88 |
|
---|
| 89 | z = a->s;
|
---|
| 90 | w = lung->s;
|
---|
| 91 | x = vec_perm(w, (vector unsigned int)perm, perm);
|
---|
| 92 | y = vec_perm(z, sl1_perm, sl1_perm);
|
---|
| 93 | y = vec_sll(y, sl1);
|
---|
| 94 | y = vec_and(y, sl1_msk);
|
---|
| 95 | w = vec_xor(x, b->s);
|
---|
| 96 | w = vec_xor(w, y);
|
---|
| 97 | x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm);
|
---|
| 98 | x = vec_srl(x, sr1);
|
---|
| 99 | x = vec_and(x, sr1_msk);
|
---|
| 100 | y = vec_and(w, msk1);
|
---|
| 101 | z = vec_xor(z, y);
|
---|
| 102 | r->s = vec_xor(z, x);
|
---|
| 103 | lung->s = w;
|
---|
| 104 | }
|
---|
| 105 | #elif defined(HAVE_SSE2)
|
---|
| 106 | /**
|
---|
| 107 | * This function setup some constant variables for SSE2.
|
---|
| 108 | */
|
---|
| 109 | static void setup_const(void) {
|
---|
| 110 | static int first = 1;
|
---|
| 111 | if (!first) {
|
---|
| 112 | return;
|
---|
| 113 | }
|
---|
| 114 | sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4,
|
---|
| 115 | DSFMT_MSK32_1, DSFMT_MSK32_2);
|
---|
| 116 | sse2_int_one = _mm_set_epi32(0, 1, 0, 1);
|
---|
| 117 | sse2_double_two = _mm_set_pd(2.0, 2.0);
|
---|
| 118 | sse2_double_m_one = _mm_set_pd(-1.0, -1.0);
|
---|
| 119 | first = 0;
|
---|
| 120 | }
|
---|
| 121 |
|
---|
| 122 | /**
|
---|
| 123 | * This function represents the recursion formula.
|
---|
| 124 | * @param r output 128-bit
|
---|
| 125 | * @param a a 128-bit part of the internal state array
|
---|
| 126 | * @param b a 128-bit part of the internal state array
|
---|
| 127 | * @param d a 128-bit part of the internal state array (I/O)
|
---|
| 128 | */
|
---|
| 129 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) {
|
---|
| 130 | __m128i v, w, x, y, z;
|
---|
| 131 |
|
---|
| 132 | x = a->si;
|
---|
| 133 | z = _mm_slli_epi64(x, DSFMT_SL1);
|
---|
| 134 | y = _mm_shuffle_epi32(u->si, SSE2_SHUFF);
|
---|
| 135 | z = _mm_xor_si128(z, b->si);
|
---|
| 136 | y = _mm_xor_si128(y, z);
|
---|
| 137 |
|
---|
| 138 | v = _mm_srli_epi64(y, DSFMT_SR);
|
---|
| 139 | w = _mm_and_si128(y, sse2_param_mask);
|
---|
| 140 | v = _mm_xor_si128(v, x);
|
---|
| 141 | v = _mm_xor_si128(v, w);
|
---|
| 142 | r->si = v;
|
---|
| 143 | u->si = y;
|
---|
| 144 | }
|
---|
| 145 | #else /* standard C */
|
---|
| 146 | /**
|
---|
| 147 | * This function represents the recursion formula.
|
---|
| 148 | * @param r output 128-bit
|
---|
| 149 | * @param a a 128-bit part of the internal state array
|
---|
| 150 | * @param b a 128-bit part of the internal state array
|
---|
| 151 | * @param lung a 128-bit part of the internal state array (I/O)
|
---|
| 152 | */
|
---|
| 153 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
|
---|
| 154 | w128_t *lung) {
|
---|
| 155 | uint64_t t0, t1, L0, L1;
|
---|
| 156 |
|
---|
| 157 | t0 = a->u[0];
|
---|
| 158 | t1 = a->u[1];
|
---|
| 159 | L0 = lung->u[0];
|
---|
| 160 | L1 = lung->u[1];
|
---|
| 161 | lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0];
|
---|
| 162 | lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1];
|
---|
| 163 | r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0;
|
---|
| 164 | r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1;
|
---|
| 165 | }
|
---|
| 166 | #endif
|
---|
| 167 |
|
---|
| 168 | #if defined(HAVE_SSE2)
|
---|
| 169 | /**
|
---|
| 170 | * This function converts the double precision floating point numbers which
|
---|
| 171 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
| 172 | * in the range [0, 1).
|
---|
| 173 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
| 174 | */
|
---|
| 175 | inline static void convert_c0o1(w128_t *w) {
|
---|
| 176 | w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
|
---|
| 177 | }
|
---|
| 178 |
|
---|
| 179 | /**
|
---|
| 180 | * This function converts the double precision floating point numbers which
|
---|
| 181 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
| 182 | * in the range (0, 1].
|
---|
| 183 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
| 184 | */
|
---|
| 185 | inline static void convert_o0c1(w128_t *w) {
|
---|
| 186 | w->sd = _mm_sub_pd(sse2_double_two, w->sd);
|
---|
| 187 | }
|
---|
| 188 |
|
---|
| 189 | /**
|
---|
| 190 | * This function converts the double precision floating point numbers which
|
---|
| 191 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
| 192 | * in the range (0, 1).
|
---|
| 193 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
| 194 | */
|
---|
| 195 | inline static void convert_o0o1(w128_t *w) {
|
---|
| 196 | w->si = _mm_or_si128(w->si, sse2_int_one);
|
---|
| 197 | w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
|
---|
| 198 | }
|
---|
| 199 | #else /* standard C and altivec */
|
---|
| 200 | /**
|
---|
| 201 | * This function converts the double precision floating point numbers which
|
---|
| 202 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
| 203 | * in the range [0, 1).
|
---|
| 204 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
| 205 | */
|
---|
| 206 | inline static void convert_c0o1(w128_t *w) {
|
---|
| 207 | w->d[0] -= 1.0;
|
---|
| 208 | w->d[1] -= 1.0;
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | /**
|
---|
| 212 | * This function converts the double precision floating point numbers which
|
---|
| 213 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
| 214 | * in the range (0, 1].
|
---|
| 215 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
| 216 | */
|
---|
| 217 | inline static void convert_o0c1(w128_t *w) {
|
---|
| 218 | w->d[0] = 2.0 - w->d[0];
|
---|
| 219 | w->d[1] = 2.0 - w->d[1];
|
---|
| 220 | }
|
---|
| 221 |
|
---|
| 222 | /**
|
---|
| 223 | * This function converts the double precision floating point numbers which
|
---|
| 224 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
| 225 | * in the range (0, 1).
|
---|
| 226 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
| 227 | */
|
---|
| 228 | inline static void convert_o0o1(w128_t *w) {
|
---|
| 229 | w->u[0] |= 1;
|
---|
| 230 | w->u[1] |= 1;
|
---|
| 231 | w->d[0] -= 1.0;
|
---|
| 232 | w->d[1] -= 1.0;
|
---|
| 233 | }
|
---|
| 234 | #endif
|
---|
| 235 |
|
---|
| 236 | /**
|
---|
| 237 | * This function fills the user-specified array with double precision
|
---|
| 238 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
| 239 | * @param dsfmt dsfmt state vector.
|
---|
| 240 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
| 241 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
| 242 | */
|
---|
| 243 | inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 244 | int size) {
|
---|
| 245 | int i, j;
|
---|
| 246 | w128_t lung;
|
---|
| 247 |
|
---|
| 248 | lung = dsfmt->status[DSFMT_N];
|
---|
| 249 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
| 250 | &lung);
|
---|
| 251 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
| 252 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 253 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
| 254 | }
|
---|
| 255 | for (; i < DSFMT_N; i++) {
|
---|
| 256 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 257 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 258 | }
|
---|
| 259 | for (; i < size - DSFMT_N; i++) {
|
---|
| 260 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 261 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 262 | }
|
---|
| 263 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
| 264 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
| 265 | }
|
---|
| 266 | for (; i < size; i++, j++) {
|
---|
| 267 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 268 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 269 | dsfmt->status[j] = array[i];
|
---|
| 270 | }
|
---|
| 271 | dsfmt->status[DSFMT_N] = lung;
|
---|
| 272 | }
|
---|
| 273 |
|
---|
| 274 | /**
|
---|
| 275 | * This function fills the user-specified array with double precision
|
---|
| 276 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
| 277 | * @param dsfmt dsfmt state vector.
|
---|
| 278 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
| 279 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
| 280 | */
|
---|
| 281 | inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 282 | int size) {
|
---|
| 283 | int i, j;
|
---|
| 284 | w128_t lung;
|
---|
| 285 |
|
---|
| 286 | lung = dsfmt->status[DSFMT_N];
|
---|
| 287 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
| 288 | &lung);
|
---|
| 289 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
| 290 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 291 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
| 292 | }
|
---|
| 293 | for (; i < DSFMT_N; i++) {
|
---|
| 294 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 295 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 296 | }
|
---|
| 297 | for (; i < size - DSFMT_N; i++) {
|
---|
| 298 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 299 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 300 | convert_c0o1(&array[i - DSFMT_N]);
|
---|
| 301 | }
|
---|
| 302 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
| 303 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
| 304 | }
|
---|
| 305 | for (; i < size; i++, j++) {
|
---|
| 306 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 307 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 308 | dsfmt->status[j] = array[i];
|
---|
| 309 | convert_c0o1(&array[i - DSFMT_N]);
|
---|
| 310 | }
|
---|
| 311 | for (i = size - DSFMT_N; i < size; i++) {
|
---|
| 312 | convert_c0o1(&array[i]);
|
---|
| 313 | }
|
---|
| 314 | dsfmt->status[DSFMT_N] = lung;
|
---|
| 315 | }
|
---|
| 316 |
|
---|
| 317 | /**
|
---|
| 318 | * This function fills the user-specified array with double precision
|
---|
| 319 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
| 320 | * @param dsfmt dsfmt state vector.
|
---|
| 321 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
| 322 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
| 323 | */
|
---|
| 324 | inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 325 | int size) {
|
---|
| 326 | int i, j;
|
---|
| 327 | w128_t lung;
|
---|
| 328 |
|
---|
| 329 | lung = dsfmt->status[DSFMT_N];
|
---|
| 330 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
| 331 | &lung);
|
---|
| 332 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
| 333 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 334 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
| 335 | }
|
---|
| 336 | for (; i < DSFMT_N; i++) {
|
---|
| 337 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 338 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 339 | }
|
---|
| 340 | for (; i < size - DSFMT_N; i++) {
|
---|
| 341 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 342 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 343 | convert_o0o1(&array[i - DSFMT_N]);
|
---|
| 344 | }
|
---|
| 345 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
| 346 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
| 347 | }
|
---|
| 348 | for (; i < size; i++, j++) {
|
---|
| 349 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 350 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 351 | dsfmt->status[j] = array[i];
|
---|
| 352 | convert_o0o1(&array[i - DSFMT_N]);
|
---|
| 353 | }
|
---|
| 354 | for (i = size - DSFMT_N; i < size; i++) {
|
---|
| 355 | convert_o0o1(&array[i]);
|
---|
| 356 | }
|
---|
| 357 | dsfmt->status[DSFMT_N] = lung;
|
---|
| 358 | }
|
---|
| 359 |
|
---|
| 360 | /**
|
---|
| 361 | * This function fills the user-specified array with double precision
|
---|
| 362 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
| 363 | * @param dsfmt dsfmt state vector.
|
---|
| 364 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
| 365 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
| 366 | */
|
---|
| 367 | inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
|
---|
| 368 | int size) {
|
---|
| 369 | int i, j;
|
---|
| 370 | w128_t lung;
|
---|
| 371 |
|
---|
| 372 | lung = dsfmt->status[DSFMT_N];
|
---|
| 373 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
| 374 | &lung);
|
---|
| 375 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
| 376 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 377 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
| 378 | }
|
---|
| 379 | for (; i < DSFMT_N; i++) {
|
---|
| 380 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
| 381 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 382 | }
|
---|
| 383 | for (; i < size - DSFMT_N; i++) {
|
---|
| 384 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 385 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 386 | convert_o0c1(&array[i - DSFMT_N]);
|
---|
| 387 | }
|
---|
| 388 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
| 389 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
| 390 | }
|
---|
| 391 | for (; i < size; i++, j++) {
|
---|
| 392 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
| 393 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 394 | dsfmt->status[j] = array[i];
|
---|
| 395 | convert_o0c1(&array[i - DSFMT_N]);
|
---|
| 396 | }
|
---|
| 397 | for (i = size - DSFMT_N; i < size; i++) {
|
---|
| 398 | convert_o0c1(&array[i]);
|
---|
| 399 | }
|
---|
| 400 | dsfmt->status[DSFMT_N] = lung;
|
---|
| 401 | }
|
---|
| 402 |
|
---|
| 403 | /**
|
---|
| 404 | * This function represents a function used in the initialization
|
---|
| 405 | * by init_by_array
|
---|
| 406 | * @param x 32-bit integer
|
---|
| 407 | * @return 32-bit integer
|
---|
| 408 | */
|
---|
| 409 | static uint32_t ini_func1(uint32_t x) {
|
---|
| 410 | return (x ^ (x >> 27)) * (uint32_t)1664525UL;
|
---|
| 411 | }
|
---|
| 412 |
|
---|
| 413 | /**
|
---|
| 414 | * This function represents a function used in the initialization
|
---|
| 415 | * by init_by_array
|
---|
| 416 | * @param x 32-bit integer
|
---|
| 417 | * @return 32-bit integer
|
---|
| 418 | */
|
---|
| 419 | static uint32_t ini_func2(uint32_t x) {
|
---|
| 420 | return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
|
---|
| 421 | }
|
---|
| 422 |
|
---|
| 423 | /**
|
---|
| 424 | * This function initializes the internal state array to fit the IEEE
|
---|
| 425 | * 754 format.
|
---|
| 426 | * @param dsfmt dsfmt state vector.
|
---|
| 427 | */
|
---|
| 428 | static void initial_mask(dsfmt_t *dsfmt) {
|
---|
| 429 | int i;
|
---|
| 430 | uint64_t *psfmt;
|
---|
| 431 |
|
---|
| 432 | psfmt = &dsfmt->status[0].u[0];
|
---|
| 433 | for (i = 0; i < DSFMT_N * 2; i++) {
|
---|
| 434 | psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST;
|
---|
| 435 | }
|
---|
| 436 | }
|
---|
| 437 |
|
---|
| 438 | /**
|
---|
| 439 | * This function certificate the period of 2^{SFMT_MEXP}-1.
|
---|
| 440 | * @param dsfmt dsfmt state vector.
|
---|
| 441 | */
|
---|
| 442 | static void period_certification(dsfmt_t *dsfmt) {
|
---|
| 443 | uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2};
|
---|
| 444 | uint64_t tmp[2];
|
---|
| 445 | uint64_t inner;
|
---|
| 446 | int i;
|
---|
| 447 | #if (DSFMT_PCV2 & 1) != 1
|
---|
| 448 | int j;
|
---|
| 449 | uint64_t work;
|
---|
| 450 | #endif
|
---|
| 451 |
|
---|
| 452 | tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1);
|
---|
| 453 | tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2);
|
---|
| 454 |
|
---|
| 455 | inner = tmp[0] & pcv[0];
|
---|
| 456 | inner ^= tmp[1] & pcv[1];
|
---|
| 457 | for (i = 32; i > 0; i >>= 1) {
|
---|
| 458 | inner ^= inner >> i;
|
---|
| 459 | }
|
---|
| 460 | inner &= 1;
|
---|
| 461 | /* check OK */
|
---|
| 462 | if (inner == 1) {
|
---|
| 463 | return;
|
---|
| 464 | }
|
---|
| 465 | /* check NG, and modification */
|
---|
| 466 | #if (DSFMT_PCV2 & 1) == 1
|
---|
| 467 | dsfmt->status[DSFMT_N].u[1] ^= 1;
|
---|
| 468 | #else
|
---|
| 469 | for (i = 1; i >= 0; i--) {
|
---|
| 470 | work = 1;
|
---|
| 471 | for (j = 0; j < 64; j++) {
|
---|
| 472 | if ((work & pcv[i]) != 0) {
|
---|
| 473 | dsfmt->status[DSFMT_N].u[i] ^= work;
|
---|
| 474 | return;
|
---|
| 475 | }
|
---|
| 476 | work = work << 1;
|
---|
| 477 | }
|
---|
| 478 | }
|
---|
| 479 | #endif
|
---|
| 480 | return;
|
---|
| 481 | }
|
---|
| 482 |
|
---|
| 483 | /*----------------
|
---|
| 484 | PUBLIC FUNCTIONS
|
---|
| 485 | ----------------*/
|
---|
| 486 | /**
|
---|
| 487 | * This function returns the identification string. The string shows
|
---|
| 488 | * the Mersenne exponent, and all parameters of this generator.
|
---|
| 489 | * @return id string.
|
---|
| 490 | */
|
---|
| 491 | const char *dsfmt_get_idstring(void) {
|
---|
| 492 | return DSFMT_IDSTR;
|
---|
| 493 | }
|
---|
| 494 |
|
---|
| 495 | /**
|
---|
| 496 | * This function returns the minimum size of array used for \b
|
---|
| 497 | * fill_array functions.
|
---|
| 498 | * @return minimum size of array used for fill_array functions.
|
---|
| 499 | */
|
---|
| 500 | int dsfmt_get_min_array_size(void) {
|
---|
| 501 | return DSFMT_N64;
|
---|
| 502 | }
|
---|
| 503 |
|
---|
| 504 | /**
|
---|
| 505 | * This function fills the internal state array with double precision
|
---|
| 506 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
| 507 | * @param dsfmt dsfmt state vector.
|
---|
| 508 | */
|
---|
| 509 | void dsfmt_gen_rand_all(dsfmt_t *dsfmt) {
|
---|
| 510 | int i;
|
---|
| 511 | w128_t lung;
|
---|
| 512 |
|
---|
| 513 | lung = dsfmt->status[DSFMT_N];
|
---|
| 514 | do_recursion(&dsfmt->status[0], &dsfmt->status[0],
|
---|
| 515 | &dsfmt->status[DSFMT_POS1], &lung);
|
---|
| 516 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
| 517 | do_recursion(&dsfmt->status[i], &dsfmt->status[i],
|
---|
| 518 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
| 519 | }
|
---|
| 520 | for (; i < DSFMT_N; i++) {
|
---|
| 521 | do_recursion(&dsfmt->status[i], &dsfmt->status[i],
|
---|
| 522 | &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
| 523 | }
|
---|
| 524 | dsfmt->status[DSFMT_N] = lung;
|
---|
| 525 | }
|
---|
| 526 |
|
---|
| 527 | /**
|
---|
| 528 | * This function generates double precision floating point
|
---|
| 529 | * pseudorandom numbers which distribute in the range [1, 2) to the
|
---|
| 530 | * specified array[] by one call. The number of pseudorandom numbers
|
---|
| 531 | * is specified by the argument \b size, which must be at least (SFMT_MEXP
|
---|
| 532 | * / 128) * 2 and a multiple of two. The function
|
---|
| 533 | * get_min_array_size() returns this minimum size. The generation by
|
---|
| 534 | * this function is much faster than the following fill_array_xxx functions.
|
---|
| 535 | *
|
---|
| 536 | * For initialization, init_gen_rand() or init_by_array() must be called
|
---|
| 537 | * before the first call of this function. This function can not be
|
---|
| 538 | * used after calling genrand_xxx functions, without initialization.
|
---|
| 539 | *
|
---|
| 540 | * @param dsfmt dsfmt state vector.
|
---|
| 541 | * @param array an array where pseudorandom numbers are filled
|
---|
| 542 | * by this function. The pointer to the array must be "aligned"
|
---|
| 543 | * (namely, must be a multiple of 16) in the SIMD version, since it
|
---|
| 544 | * refers to the address of a 128-bit integer. In the standard C
|
---|
| 545 | * version, the pointer is arbitrary.
|
---|
| 546 | *
|
---|
| 547 | * @param size the number of 64-bit pseudorandom integers to be
|
---|
| 548 | * generated. size must be a multiple of 2, and greater than or equal
|
---|
| 549 | * to (SFMT_MEXP / 128) * 2.
|
---|
| 550 | *
|
---|
| 551 | * @note \b memalign or \b posix_memalign is available to get aligned
|
---|
| 552 | * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
|
---|
| 553 | * returns the pointer to the aligned memory block.
|
---|
| 554 | */
|
---|
| 555 | void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) {
|
---|
| 556 | assert(size % 2 == 0);
|
---|
| 557 | assert(size >= DSFMT_N64);
|
---|
| 558 | gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2);
|
---|
| 559 | }
|
---|
| 560 |
|
---|
| 561 | /**
|
---|
| 562 | * This function generates double precision floating point
|
---|
| 563 | * pseudorandom numbers which distribute in the range (0, 1] to the
|
---|
| 564 | * specified array[] by one call. This function is the same as
|
---|
| 565 | * fill_array_close1_open2() except the distribution range.
|
---|
| 566 | *
|
---|
| 567 | * @param dsfmt dsfmt state vector.
|
---|
| 568 | * @param array an array where pseudorandom numbers are filled
|
---|
| 569 | * by this function.
|
---|
| 570 | * @param size the number of pseudorandom numbers to be generated.
|
---|
| 571 | * see also \sa fill_array_close1_open2()
|
---|
| 572 | */
|
---|
| 573 | void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) {
|
---|
| 574 | assert(size % 2 == 0);
|
---|
| 575 | assert(size >= DSFMT_N64);
|
---|
| 576 | gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2);
|
---|
| 577 | }
|
---|
| 578 |
|
---|
| 579 | /**
|
---|
| 580 | * This function generates double precision floating point
|
---|
| 581 | * pseudorandom numbers which distribute in the range [0, 1) to the
|
---|
| 582 | * specified array[] by one call. This function is the same as
|
---|
| 583 | * fill_array_close1_open2() except the distribution range.
|
---|
| 584 | *
|
---|
| 585 | * @param array an array where pseudorandom numbers are filled
|
---|
| 586 | * by this function.
|
---|
| 587 | * @param dsfmt dsfmt state vector.
|
---|
| 588 | * @param size the number of pseudorandom numbers to be generated.
|
---|
| 589 | * see also \sa fill_array_close1_open2()
|
---|
| 590 | */
|
---|
| 591 | void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) {
|
---|
| 592 | assert(size % 2 == 0);
|
---|
| 593 | assert(size >= DSFMT_N64);
|
---|
| 594 | gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2);
|
---|
| 595 | }
|
---|
| 596 |
|
---|
| 597 | /**
|
---|
| 598 | * This function generates double precision floating point
|
---|
| 599 | * pseudorandom numbers which distribute in the range (0, 1) to the
|
---|
| 600 | * specified array[] by one call. This function is the same as
|
---|
| 601 | * fill_array_close1_open2() except the distribution range.
|
---|
| 602 | *
|
---|
| 603 | * @param dsfmt dsfmt state vector.
|
---|
| 604 | * @param array an array where pseudorandom numbers are filled
|
---|
| 605 | * by this function.
|
---|
| 606 | * @param size the number of pseudorandom numbers to be generated.
|
---|
| 607 | * see also \sa fill_array_close1_open2()
|
---|
| 608 | */
|
---|
| 609 | void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) {
|
---|
| 610 | assert(size % 2 == 0);
|
---|
| 611 | assert(size >= DSFMT_N64);
|
---|
| 612 | gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2);
|
---|
| 613 | }
|
---|
| 614 |
|
---|
| 615 | #if defined(__INTEL_COMPILER)
|
---|
| 616 | # pragma warning(disable:981)
|
---|
| 617 | #endif
|
---|
| 618 | /**
|
---|
| 619 | * This function initializes the internal state array with a 32-bit
|
---|
| 620 | * integer seed.
|
---|
| 621 | * @param dsfmt dsfmt state vector.
|
---|
| 622 | * @param seed a 32-bit integer used as the seed.
|
---|
| 623 | * @param mexp caller's mersenne expornent
|
---|
| 624 | */
|
---|
| 625 | void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) {
|
---|
| 626 | int i;
|
---|
| 627 | uint32_t *psfmt;
|
---|
| 628 |
|
---|
| 629 | /* make sure caller program is compiled with the same MEXP */
|
---|
| 630 | if (mexp != dsfmt_mexp) {
|
---|
| 631 | fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
|
---|
| 632 | exit(1);
|
---|
| 633 | }
|
---|
| 634 | psfmt = &dsfmt->status[0].u32[0];
|
---|
| 635 | psfmt[idxof(0)] = seed;
|
---|
| 636 | for (i = 1; i < (DSFMT_N + 1) * 4; i++) {
|
---|
| 637 | psfmt[idxof(i)] = 1812433253UL
|
---|
| 638 | * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i;
|
---|
| 639 | }
|
---|
| 640 | initial_mask(dsfmt);
|
---|
| 641 | period_certification(dsfmt);
|
---|
| 642 | dsfmt->idx = DSFMT_N64;
|
---|
| 643 | #if defined(HAVE_SSE2)
|
---|
| 644 | setup_const();
|
---|
| 645 | #endif
|
---|
| 646 | }
|
---|
| 647 |
|
---|
| 648 | /**
|
---|
| 649 | * This function initializes the internal state array,
|
---|
| 650 | * with an array of 32-bit integers used as the seeds
|
---|
| 651 | * @param dsfmt dsfmt state vector.
|
---|
| 652 | * @param init_key the array of 32-bit integers, used as a seed.
|
---|
| 653 | * @param key_length the length of init_key.
|
---|
| 654 | * @param mexp caller's mersenne expornent
|
---|
| 655 | */
|
---|
| 656 | void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
|
---|
| 657 | int key_length, int mexp) {
|
---|
| 658 | int i, j, count;
|
---|
| 659 | uint32_t r;
|
---|
| 660 | uint32_t *psfmt32;
|
---|
| 661 | int lag;
|
---|
| 662 | int mid;
|
---|
| 663 | int size = (DSFMT_N + 1) * 4; /* pulmonary */
|
---|
| 664 |
|
---|
| 665 | /* make sure caller program is compiled with the same MEXP */
|
---|
| 666 | if (mexp != dsfmt_mexp) {
|
---|
| 667 | fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
|
---|
| 668 | exit(1);
|
---|
| 669 | }
|
---|
| 670 | if (size >= 623) {
|
---|
| 671 | lag = 11;
|
---|
| 672 | } else if (size >= 68) {
|
---|
| 673 | lag = 7;
|
---|
| 674 | } else if (size >= 39) {
|
---|
| 675 | lag = 5;
|
---|
| 676 | } else {
|
---|
| 677 | lag = 3;
|
---|
| 678 | }
|
---|
| 679 | mid = (size - lag) / 2;
|
---|
| 680 |
|
---|
| 681 | psfmt32 = &dsfmt->status[0].u32[0];
|
---|
| 682 | memset(dsfmt->status, 0x8b, sizeof(dsfmt->status));
|
---|
| 683 | if (key_length + 1 > size) {
|
---|
| 684 | count = key_length + 1;
|
---|
| 685 | } else {
|
---|
| 686 | count = size;
|
---|
| 687 | }
|
---|
| 688 | r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)]
|
---|
| 689 | ^ psfmt32[idxof((size - 1) % size)]);
|
---|
| 690 | psfmt32[idxof(mid % size)] += r;
|
---|
| 691 | r += key_length;
|
---|
| 692 | psfmt32[idxof((mid + lag) % size)] += r;
|
---|
| 693 | psfmt32[idxof(0)] = r;
|
---|
| 694 | count--;
|
---|
| 695 | for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
|
---|
| 696 | r = ini_func1(psfmt32[idxof(i)]
|
---|
| 697 | ^ psfmt32[idxof((i + mid) % size)]
|
---|
| 698 | ^ psfmt32[idxof((i + size - 1) % size)]);
|
---|
| 699 | psfmt32[idxof((i + mid) % size)] += r;
|
---|
| 700 | r += init_key[j] + i;
|
---|
| 701 | psfmt32[idxof((i + mid + lag) % size)] += r;
|
---|
| 702 | psfmt32[idxof(i)] = r;
|
---|
| 703 | i = (i + 1) % size;
|
---|
| 704 | }
|
---|
| 705 | for (; j < count; j++) {
|
---|
| 706 | r = ini_func1(psfmt32[idxof(i)]
|
---|
| 707 | ^ psfmt32[idxof((i + mid) % size)]
|
---|
| 708 | ^ psfmt32[idxof((i + size - 1) % size)]);
|
---|
| 709 | psfmt32[idxof((i + mid) % size)] += r;
|
---|
| 710 | r += i;
|
---|
| 711 | psfmt32[idxof((i + mid + lag) % size)] += r;
|
---|
| 712 | psfmt32[idxof(i)] = r;
|
---|
| 713 | i = (i + 1) % size;
|
---|
| 714 | }
|
---|
| 715 | for (j = 0; j < size; j++) {
|
---|
| 716 | r = ini_func2(psfmt32[idxof(i)]
|
---|
| 717 | + psfmt32[idxof((i + mid) % size)]
|
---|
| 718 | + psfmt32[idxof((i + size - 1) % size)]);
|
---|
| 719 | psfmt32[idxof((i + mid) % size)] ^= r;
|
---|
| 720 | r -= i;
|
---|
| 721 | psfmt32[idxof((i + mid + lag) % size)] ^= r;
|
---|
| 722 | psfmt32[idxof(i)] = r;
|
---|
| 723 | i = (i + 1) % size;
|
---|
| 724 | }
|
---|
| 725 | initial_mask(dsfmt);
|
---|
| 726 | period_certification(dsfmt);
|
---|
| 727 | dsfmt->idx = DSFMT_N64;
|
---|
| 728 | #if defined(HAVE_SSE2)
|
---|
| 729 | setup_const();
|
---|
| 730 | #endif
|
---|
| 731 | }
|
---|
| 732 | #if defined(__INTEL_COMPILER)
|
---|
| 733 | # pragma warning(default:981)
|
---|
| 734 | #endif
|
---|