1 | /**
|
---|
2 | * @file dSFMT.c
|
---|
3 | * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT)
|
---|
4 | * based on IEEE 754 format.
|
---|
5 | *
|
---|
6 | * @author Mutsuo Saito (Hiroshima University)
|
---|
7 | * @author Makoto Matsumoto (Hiroshima University)
|
---|
8 | *
|
---|
9 | * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima
|
---|
10 | * University. All rights reserved.
|
---|
11 | *
|
---|
12 | * The new BSD License is applied to this software, see LICENSE.txt
|
---|
13 | */
|
---|
14 | #include <stdio.h>
|
---|
15 | #include <string.h>
|
---|
16 | #include <stdlib.h>
|
---|
17 | #include "dSFMT-params.h"
|
---|
18 |
|
---|
19 | /** dsfmt internal state vector */
|
---|
20 | dsfmt_t dsfmt_global_data;
|
---|
21 | /** dsfmt mexp for check */
|
---|
22 | static const int dsfmt_mexp = DSFMT_MEXP;
|
---|
23 |
|
---|
24 | /*----------------
|
---|
25 | STATIC FUNCTIONS
|
---|
26 | ----------------*/
|
---|
27 | inline static uint32_t ini_func1(uint32_t x);
|
---|
28 | inline static uint32_t ini_func2(uint32_t x);
|
---|
29 | inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
|
---|
30 | int size);
|
---|
31 | inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
32 | int size);
|
---|
33 | inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
|
---|
34 | int size);
|
---|
35 | inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
36 | int size);
|
---|
37 | inline static int idxof(int i);
|
---|
38 | static void initial_mask(dsfmt_t *dsfmt);
|
---|
39 | static void period_certification(dsfmt_t *dsfmt);
|
---|
40 |
|
---|
41 | #if defined(HAVE_SSE2)
|
---|
42 | # include <emmintrin.h>
|
---|
43 | /** mask data for sse2 */
|
---|
44 | static __m128i sse2_param_mask;
|
---|
45 | /** 1 in 64bit for sse2 */
|
---|
46 | static __m128i sse2_int_one;
|
---|
47 | /** 2.0 double for sse2 */
|
---|
48 | static __m128d sse2_double_two;
|
---|
49 | /** -1.0 double for sse2 */
|
---|
50 | static __m128d sse2_double_m_one;
|
---|
51 |
|
---|
52 | static void setup_const(void);
|
---|
53 | #endif
|
---|
54 |
|
---|
55 | /**
|
---|
56 | * This function simulate a 32-bit array index overlapped to 64-bit
|
---|
57 | * array of LITTLE ENDIAN in BIG ENDIAN machine.
|
---|
58 | */
|
---|
59 | #if defined(DSFMT_BIG_ENDIAN)
|
---|
60 | inline static int idxof(int i) {
|
---|
61 | return i ^ 1;
|
---|
62 | }
|
---|
63 | #else
|
---|
64 | inline static int idxof(int i) {
|
---|
65 | return i;
|
---|
66 | }
|
---|
67 | #endif
|
---|
68 |
|
---|
69 | /**
|
---|
70 | * This function represents the recursion formula.
|
---|
71 | * @param r output
|
---|
72 | * @param a a 128-bit part of the internal state array
|
---|
73 | * @param b a 128-bit part of the internal state array
|
---|
74 | * @param lung a 128-bit part of the internal state array
|
---|
75 | */
|
---|
76 | #if defined(HAVE_ALTIVEC)
|
---|
77 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
|
---|
78 | w128_t *lung) {
|
---|
79 | const vector unsigned char sl1 = ALTI_SL1;
|
---|
80 | const vector unsigned char sl1_perm = ALTI_SL1_PERM;
|
---|
81 | const vector unsigned int sl1_msk = ALTI_SL1_MSK;
|
---|
82 | const vector unsigned char sr1 = ALTI_SR;
|
---|
83 | const vector unsigned char sr1_perm = ALTI_SR_PERM;
|
---|
84 | const vector unsigned int sr1_msk = ALTI_SR_MSK;
|
---|
85 | const vector unsigned char perm = ALTI_PERM;
|
---|
86 | const vector unsigned int msk1 = ALTI_MSK;
|
---|
87 | vector unsigned int w, x, y, z;
|
---|
88 |
|
---|
89 | z = a->s;
|
---|
90 | w = lung->s;
|
---|
91 | x = vec_perm(w, (vector unsigned int)perm, perm);
|
---|
92 | y = vec_perm(z, sl1_perm, sl1_perm);
|
---|
93 | y = vec_sll(y, sl1);
|
---|
94 | y = vec_and(y, sl1_msk);
|
---|
95 | w = vec_xor(x, b->s);
|
---|
96 | w = vec_xor(w, y);
|
---|
97 | x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm);
|
---|
98 | x = vec_srl(x, sr1);
|
---|
99 | x = vec_and(x, sr1_msk);
|
---|
100 | y = vec_and(w, msk1);
|
---|
101 | z = vec_xor(z, y);
|
---|
102 | r->s = vec_xor(z, x);
|
---|
103 | lung->s = w;
|
---|
104 | }
|
---|
105 | #elif defined(HAVE_SSE2)
|
---|
106 | /**
|
---|
107 | * This function setup some constant variables for SSE2.
|
---|
108 | */
|
---|
109 | static void setup_const(void) {
|
---|
110 | static int first = 1;
|
---|
111 | if (!first) {
|
---|
112 | return;
|
---|
113 | }
|
---|
114 | sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4,
|
---|
115 | DSFMT_MSK32_1, DSFMT_MSK32_2);
|
---|
116 | sse2_int_one = _mm_set_epi32(0, 1, 0, 1);
|
---|
117 | sse2_double_two = _mm_set_pd(2.0, 2.0);
|
---|
118 | sse2_double_m_one = _mm_set_pd(-1.0, -1.0);
|
---|
119 | first = 0;
|
---|
120 | }
|
---|
121 |
|
---|
122 | /**
|
---|
123 | * This function represents the recursion formula.
|
---|
124 | * @param r output 128-bit
|
---|
125 | * @param a a 128-bit part of the internal state array
|
---|
126 | * @param b a 128-bit part of the internal state array
|
---|
127 | * @param d a 128-bit part of the internal state array (I/O)
|
---|
128 | */
|
---|
129 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) {
|
---|
130 | __m128i v, w, x, y, z;
|
---|
131 |
|
---|
132 | x = a->si;
|
---|
133 | z = _mm_slli_epi64(x, DSFMT_SL1);
|
---|
134 | y = _mm_shuffle_epi32(u->si, SSE2_SHUFF);
|
---|
135 | z = _mm_xor_si128(z, b->si);
|
---|
136 | y = _mm_xor_si128(y, z);
|
---|
137 |
|
---|
138 | v = _mm_srli_epi64(y, DSFMT_SR);
|
---|
139 | w = _mm_and_si128(y, sse2_param_mask);
|
---|
140 | v = _mm_xor_si128(v, x);
|
---|
141 | v = _mm_xor_si128(v, w);
|
---|
142 | r->si = v;
|
---|
143 | u->si = y;
|
---|
144 | }
|
---|
145 | #else /* standard C */
|
---|
146 | /**
|
---|
147 | * This function represents the recursion formula.
|
---|
148 | * @param r output 128-bit
|
---|
149 | * @param a a 128-bit part of the internal state array
|
---|
150 | * @param b a 128-bit part of the internal state array
|
---|
151 | * @param lung a 128-bit part of the internal state array (I/O)
|
---|
152 | */
|
---|
153 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
|
---|
154 | w128_t *lung) {
|
---|
155 | uint64_t t0, t1, L0, L1;
|
---|
156 |
|
---|
157 | t0 = a->u[0];
|
---|
158 | t1 = a->u[1];
|
---|
159 | L0 = lung->u[0];
|
---|
160 | L1 = lung->u[1];
|
---|
161 | lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0];
|
---|
162 | lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1];
|
---|
163 | r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0;
|
---|
164 | r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1;
|
---|
165 | }
|
---|
166 | #endif
|
---|
167 |
|
---|
168 | #if defined(HAVE_SSE2)
|
---|
169 | /**
|
---|
170 | * This function converts the double precision floating point numbers which
|
---|
171 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
172 | * in the range [0, 1).
|
---|
173 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
174 | */
|
---|
175 | inline static void convert_c0o1(w128_t *w) {
|
---|
176 | w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
|
---|
177 | }
|
---|
178 |
|
---|
179 | /**
|
---|
180 | * This function converts the double precision floating point numbers which
|
---|
181 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
182 | * in the range (0, 1].
|
---|
183 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
184 | */
|
---|
185 | inline static void convert_o0c1(w128_t *w) {
|
---|
186 | w->sd = _mm_sub_pd(sse2_double_two, w->sd);
|
---|
187 | }
|
---|
188 |
|
---|
189 | /**
|
---|
190 | * This function converts the double precision floating point numbers which
|
---|
191 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
192 | * in the range (0, 1).
|
---|
193 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
194 | */
|
---|
195 | inline static void convert_o0o1(w128_t *w) {
|
---|
196 | w->si = _mm_or_si128(w->si, sse2_int_one);
|
---|
197 | w->sd = _mm_add_pd(w->sd, sse2_double_m_one);
|
---|
198 | }
|
---|
199 | #else /* standard C and altivec */
|
---|
200 | /**
|
---|
201 | * This function converts the double precision floating point numbers which
|
---|
202 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
203 | * in the range [0, 1).
|
---|
204 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
205 | */
|
---|
206 | inline static void convert_c0o1(w128_t *w) {
|
---|
207 | w->d[0] -= 1.0;
|
---|
208 | w->d[1] -= 1.0;
|
---|
209 | }
|
---|
210 |
|
---|
211 | /**
|
---|
212 | * This function converts the double precision floating point numbers which
|
---|
213 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
214 | * in the range (0, 1].
|
---|
215 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
216 | */
|
---|
217 | inline static void convert_o0c1(w128_t *w) {
|
---|
218 | w->d[0] = 2.0 - w->d[0];
|
---|
219 | w->d[1] = 2.0 - w->d[1];
|
---|
220 | }
|
---|
221 |
|
---|
222 | /**
|
---|
223 | * This function converts the double precision floating point numbers which
|
---|
224 | * distribute uniformly in the range [1, 2) to those which distribute uniformly
|
---|
225 | * in the range (0, 1).
|
---|
226 | * @param w 128bit stracture of double precision floating point numbers (I/O)
|
---|
227 | */
|
---|
228 | inline static void convert_o0o1(w128_t *w) {
|
---|
229 | w->u[0] |= 1;
|
---|
230 | w->u[1] |= 1;
|
---|
231 | w->d[0] -= 1.0;
|
---|
232 | w->d[1] -= 1.0;
|
---|
233 | }
|
---|
234 | #endif
|
---|
235 |
|
---|
236 | /**
|
---|
237 | * This function fills the user-specified array with double precision
|
---|
238 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
239 | * @param dsfmt dsfmt state vector.
|
---|
240 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
241 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
242 | */
|
---|
243 | inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
|
---|
244 | int size) {
|
---|
245 | int i, j;
|
---|
246 | w128_t lung;
|
---|
247 |
|
---|
248 | lung = dsfmt->status[DSFMT_N];
|
---|
249 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
250 | &lung);
|
---|
251 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
252 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
253 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
254 | }
|
---|
255 | for (; i < DSFMT_N; i++) {
|
---|
256 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
257 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
258 | }
|
---|
259 | for (; i < size - DSFMT_N; i++) {
|
---|
260 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
261 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
262 | }
|
---|
263 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
264 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
265 | }
|
---|
266 | for (; i < size; i++, j++) {
|
---|
267 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
268 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
269 | dsfmt->status[j] = array[i];
|
---|
270 | }
|
---|
271 | dsfmt->status[DSFMT_N] = lung;
|
---|
272 | }
|
---|
273 |
|
---|
274 | /**
|
---|
275 | * This function fills the user-specified array with double precision
|
---|
276 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
277 | * @param dsfmt dsfmt state vector.
|
---|
278 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
279 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
280 | */
|
---|
281 | inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
282 | int size) {
|
---|
283 | int i, j;
|
---|
284 | w128_t lung;
|
---|
285 |
|
---|
286 | lung = dsfmt->status[DSFMT_N];
|
---|
287 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
288 | &lung);
|
---|
289 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
290 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
291 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
292 | }
|
---|
293 | for (; i < DSFMT_N; i++) {
|
---|
294 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
295 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
296 | }
|
---|
297 | for (; i < size - DSFMT_N; i++) {
|
---|
298 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
299 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
300 | convert_c0o1(&array[i - DSFMT_N]);
|
---|
301 | }
|
---|
302 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
303 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
304 | }
|
---|
305 | for (; i < size; i++, j++) {
|
---|
306 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
307 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
308 | dsfmt->status[j] = array[i];
|
---|
309 | convert_c0o1(&array[i - DSFMT_N]);
|
---|
310 | }
|
---|
311 | for (i = size - DSFMT_N; i < size; i++) {
|
---|
312 | convert_c0o1(&array[i]);
|
---|
313 | }
|
---|
314 | dsfmt->status[DSFMT_N] = lung;
|
---|
315 | }
|
---|
316 |
|
---|
317 | /**
|
---|
318 | * This function fills the user-specified array with double precision
|
---|
319 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
320 | * @param dsfmt dsfmt state vector.
|
---|
321 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
322 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
323 | */
|
---|
324 | inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
|
---|
325 | int size) {
|
---|
326 | int i, j;
|
---|
327 | w128_t lung;
|
---|
328 |
|
---|
329 | lung = dsfmt->status[DSFMT_N];
|
---|
330 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
331 | &lung);
|
---|
332 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
333 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
334 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
335 | }
|
---|
336 | for (; i < DSFMT_N; i++) {
|
---|
337 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
338 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
339 | }
|
---|
340 | for (; i < size - DSFMT_N; i++) {
|
---|
341 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
342 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
343 | convert_o0o1(&array[i - DSFMT_N]);
|
---|
344 | }
|
---|
345 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
346 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
347 | }
|
---|
348 | for (; i < size; i++, j++) {
|
---|
349 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
350 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
351 | dsfmt->status[j] = array[i];
|
---|
352 | convert_o0o1(&array[i - DSFMT_N]);
|
---|
353 | }
|
---|
354 | for (i = size - DSFMT_N; i < size; i++) {
|
---|
355 | convert_o0o1(&array[i]);
|
---|
356 | }
|
---|
357 | dsfmt->status[DSFMT_N] = lung;
|
---|
358 | }
|
---|
359 |
|
---|
360 | /**
|
---|
361 | * This function fills the user-specified array with double precision
|
---|
362 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
363 | * @param dsfmt dsfmt state vector.
|
---|
364 | * @param array an 128-bit array to be filled by pseudorandom numbers.
|
---|
365 | * @param size number of 128-bit pseudorandom numbers to be generated.
|
---|
366 | */
|
---|
367 | inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
|
---|
368 | int size) {
|
---|
369 | int i, j;
|
---|
370 | w128_t lung;
|
---|
371 |
|
---|
372 | lung = dsfmt->status[DSFMT_N];
|
---|
373 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1],
|
---|
374 | &lung);
|
---|
375 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
376 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
377 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
378 | }
|
---|
379 | for (; i < DSFMT_N; i++) {
|
---|
380 | do_recursion(&array[i], &dsfmt->status[i],
|
---|
381 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
382 | }
|
---|
383 | for (; i < size - DSFMT_N; i++) {
|
---|
384 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
385 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
386 | convert_o0c1(&array[i - DSFMT_N]);
|
---|
387 | }
|
---|
388 | for (j = 0; j < 2 * DSFMT_N - size; j++) {
|
---|
389 | dsfmt->status[j] = array[j + size - DSFMT_N];
|
---|
390 | }
|
---|
391 | for (; i < size; i++, j++) {
|
---|
392 | do_recursion(&array[i], &array[i - DSFMT_N],
|
---|
393 | &array[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
394 | dsfmt->status[j] = array[i];
|
---|
395 | convert_o0c1(&array[i - DSFMT_N]);
|
---|
396 | }
|
---|
397 | for (i = size - DSFMT_N; i < size; i++) {
|
---|
398 | convert_o0c1(&array[i]);
|
---|
399 | }
|
---|
400 | dsfmt->status[DSFMT_N] = lung;
|
---|
401 | }
|
---|
402 |
|
---|
403 | /**
|
---|
404 | * This function represents a function used in the initialization
|
---|
405 | * by init_by_array
|
---|
406 | * @param x 32-bit integer
|
---|
407 | * @return 32-bit integer
|
---|
408 | */
|
---|
409 | static uint32_t ini_func1(uint32_t x) {
|
---|
410 | return (x ^ (x >> 27)) * (uint32_t)1664525UL;
|
---|
411 | }
|
---|
412 |
|
---|
413 | /**
|
---|
414 | * This function represents a function used in the initialization
|
---|
415 | * by init_by_array
|
---|
416 | * @param x 32-bit integer
|
---|
417 | * @return 32-bit integer
|
---|
418 | */
|
---|
419 | static uint32_t ini_func2(uint32_t x) {
|
---|
420 | return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
|
---|
421 | }
|
---|
422 |
|
---|
423 | /**
|
---|
424 | * This function initializes the internal state array to fit the IEEE
|
---|
425 | * 754 format.
|
---|
426 | * @param dsfmt dsfmt state vector.
|
---|
427 | */
|
---|
428 | static void initial_mask(dsfmt_t *dsfmt) {
|
---|
429 | int i;
|
---|
430 | uint64_t *psfmt;
|
---|
431 |
|
---|
432 | psfmt = &dsfmt->status[0].u[0];
|
---|
433 | for (i = 0; i < DSFMT_N * 2; i++) {
|
---|
434 | psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST;
|
---|
435 | }
|
---|
436 | }
|
---|
437 |
|
---|
438 | /**
|
---|
439 | * This function certificate the period of 2^{SFMT_MEXP}-1.
|
---|
440 | * @param dsfmt dsfmt state vector.
|
---|
441 | */
|
---|
442 | static void period_certification(dsfmt_t *dsfmt) {
|
---|
443 | uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2};
|
---|
444 | uint64_t tmp[2];
|
---|
445 | uint64_t inner;
|
---|
446 | int i;
|
---|
447 | #if (DSFMT_PCV2 & 1) != 1
|
---|
448 | int j;
|
---|
449 | uint64_t work;
|
---|
450 | #endif
|
---|
451 |
|
---|
452 | tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1);
|
---|
453 | tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2);
|
---|
454 |
|
---|
455 | inner = tmp[0] & pcv[0];
|
---|
456 | inner ^= tmp[1] & pcv[1];
|
---|
457 | for (i = 32; i > 0; i >>= 1) {
|
---|
458 | inner ^= inner >> i;
|
---|
459 | }
|
---|
460 | inner &= 1;
|
---|
461 | /* check OK */
|
---|
462 | if (inner == 1) {
|
---|
463 | return;
|
---|
464 | }
|
---|
465 | /* check NG, and modification */
|
---|
466 | #if (DSFMT_PCV2 & 1) == 1
|
---|
467 | dsfmt->status[DSFMT_N].u[1] ^= 1;
|
---|
468 | #else
|
---|
469 | for (i = 1; i >= 0; i--) {
|
---|
470 | work = 1;
|
---|
471 | for (j = 0; j < 64; j++) {
|
---|
472 | if ((work & pcv[i]) != 0) {
|
---|
473 | dsfmt->status[DSFMT_N].u[i] ^= work;
|
---|
474 | return;
|
---|
475 | }
|
---|
476 | work = work << 1;
|
---|
477 | }
|
---|
478 | }
|
---|
479 | #endif
|
---|
480 | return;
|
---|
481 | }
|
---|
482 |
|
---|
483 | /*----------------
|
---|
484 | PUBLIC FUNCTIONS
|
---|
485 | ----------------*/
|
---|
486 | /**
|
---|
487 | * This function returns the identification string. The string shows
|
---|
488 | * the Mersenne exponent, and all parameters of this generator.
|
---|
489 | * @return id string.
|
---|
490 | */
|
---|
491 | const char *dsfmt_get_idstring(void) {
|
---|
492 | return DSFMT_IDSTR;
|
---|
493 | }
|
---|
494 |
|
---|
495 | /**
|
---|
496 | * This function returns the minimum size of array used for \b
|
---|
497 | * fill_array functions.
|
---|
498 | * @return minimum size of array used for fill_array functions.
|
---|
499 | */
|
---|
500 | int dsfmt_get_min_array_size(void) {
|
---|
501 | return DSFMT_N64;
|
---|
502 | }
|
---|
503 |
|
---|
504 | /**
|
---|
505 | * This function fills the internal state array with double precision
|
---|
506 | * floating point pseudorandom numbers of the IEEE 754 format.
|
---|
507 | * @param dsfmt dsfmt state vector.
|
---|
508 | */
|
---|
509 | void dsfmt_gen_rand_all(dsfmt_t *dsfmt) {
|
---|
510 | int i;
|
---|
511 | w128_t lung;
|
---|
512 |
|
---|
513 | lung = dsfmt->status[DSFMT_N];
|
---|
514 | do_recursion(&dsfmt->status[0], &dsfmt->status[0],
|
---|
515 | &dsfmt->status[DSFMT_POS1], &lung);
|
---|
516 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) {
|
---|
517 | do_recursion(&dsfmt->status[i], &dsfmt->status[i],
|
---|
518 | &dsfmt->status[i + DSFMT_POS1], &lung);
|
---|
519 | }
|
---|
520 | for (; i < DSFMT_N; i++) {
|
---|
521 | do_recursion(&dsfmt->status[i], &dsfmt->status[i],
|
---|
522 | &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung);
|
---|
523 | }
|
---|
524 | dsfmt->status[DSFMT_N] = lung;
|
---|
525 | }
|
---|
526 |
|
---|
527 | /**
|
---|
528 | * This function generates double precision floating point
|
---|
529 | * pseudorandom numbers which distribute in the range [1, 2) to the
|
---|
530 | * specified array[] by one call. The number of pseudorandom numbers
|
---|
531 | * is specified by the argument \b size, which must be at least (SFMT_MEXP
|
---|
532 | * / 128) * 2 and a multiple of two. The function
|
---|
533 | * get_min_array_size() returns this minimum size. The generation by
|
---|
534 | * this function is much faster than the following fill_array_xxx functions.
|
---|
535 | *
|
---|
536 | * For initialization, init_gen_rand() or init_by_array() must be called
|
---|
537 | * before the first call of this function. This function can not be
|
---|
538 | * used after calling genrand_xxx functions, without initialization.
|
---|
539 | *
|
---|
540 | * @param dsfmt dsfmt state vector.
|
---|
541 | * @param array an array where pseudorandom numbers are filled
|
---|
542 | * by this function. The pointer to the array must be "aligned"
|
---|
543 | * (namely, must be a multiple of 16) in the SIMD version, since it
|
---|
544 | * refers to the address of a 128-bit integer. In the standard C
|
---|
545 | * version, the pointer is arbitrary.
|
---|
546 | *
|
---|
547 | * @param size the number of 64-bit pseudorandom integers to be
|
---|
548 | * generated. size must be a multiple of 2, and greater than or equal
|
---|
549 | * to (SFMT_MEXP / 128) * 2.
|
---|
550 | *
|
---|
551 | * @note \b memalign or \b posix_memalign is available to get aligned
|
---|
552 | * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
|
---|
553 | * returns the pointer to the aligned memory block.
|
---|
554 | */
|
---|
555 | void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) {
|
---|
556 | assert(size % 2 == 0);
|
---|
557 | assert(size >= DSFMT_N64);
|
---|
558 | gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2);
|
---|
559 | }
|
---|
560 |
|
---|
561 | /**
|
---|
562 | * This function generates double precision floating point
|
---|
563 | * pseudorandom numbers which distribute in the range (0, 1] to the
|
---|
564 | * specified array[] by one call. This function is the same as
|
---|
565 | * fill_array_close1_open2() except the distribution range.
|
---|
566 | *
|
---|
567 | * @param dsfmt dsfmt state vector.
|
---|
568 | * @param array an array where pseudorandom numbers are filled
|
---|
569 | * by this function.
|
---|
570 | * @param size the number of pseudorandom numbers to be generated.
|
---|
571 | * see also \sa fill_array_close1_open2()
|
---|
572 | */
|
---|
573 | void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) {
|
---|
574 | assert(size % 2 == 0);
|
---|
575 | assert(size >= DSFMT_N64);
|
---|
576 | gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2);
|
---|
577 | }
|
---|
578 |
|
---|
579 | /**
|
---|
580 | * This function generates double precision floating point
|
---|
581 | * pseudorandom numbers which distribute in the range [0, 1) to the
|
---|
582 | * specified array[] by one call. This function is the same as
|
---|
583 | * fill_array_close1_open2() except the distribution range.
|
---|
584 | *
|
---|
585 | * @param array an array where pseudorandom numbers are filled
|
---|
586 | * by this function.
|
---|
587 | * @param dsfmt dsfmt state vector.
|
---|
588 | * @param size the number of pseudorandom numbers to be generated.
|
---|
589 | * see also \sa fill_array_close1_open2()
|
---|
590 | */
|
---|
591 | void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) {
|
---|
592 | assert(size % 2 == 0);
|
---|
593 | assert(size >= DSFMT_N64);
|
---|
594 | gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2);
|
---|
595 | }
|
---|
596 |
|
---|
597 | /**
|
---|
598 | * This function generates double precision floating point
|
---|
599 | * pseudorandom numbers which distribute in the range (0, 1) to the
|
---|
600 | * specified array[] by one call. This function is the same as
|
---|
601 | * fill_array_close1_open2() except the distribution range.
|
---|
602 | *
|
---|
603 | * @param dsfmt dsfmt state vector.
|
---|
604 | * @param array an array where pseudorandom numbers are filled
|
---|
605 | * by this function.
|
---|
606 | * @param size the number of pseudorandom numbers to be generated.
|
---|
607 | * see also \sa fill_array_close1_open2()
|
---|
608 | */
|
---|
609 | void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) {
|
---|
610 | assert(size % 2 == 0);
|
---|
611 | assert(size >= DSFMT_N64);
|
---|
612 | gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2);
|
---|
613 | }
|
---|
614 |
|
---|
615 | #if defined(__INTEL_COMPILER)
|
---|
616 | # pragma warning(disable:981)
|
---|
617 | #endif
|
---|
618 | /**
|
---|
619 | * This function initializes the internal state array with a 32-bit
|
---|
620 | * integer seed.
|
---|
621 | * @param dsfmt dsfmt state vector.
|
---|
622 | * @param seed a 32-bit integer used as the seed.
|
---|
623 | * @param mexp caller's mersenne expornent
|
---|
624 | */
|
---|
625 | void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) {
|
---|
626 | int i;
|
---|
627 | uint32_t *psfmt;
|
---|
628 |
|
---|
629 | /* make sure caller program is compiled with the same MEXP */
|
---|
630 | if (mexp != dsfmt_mexp) {
|
---|
631 | fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
|
---|
632 | exit(1);
|
---|
633 | }
|
---|
634 | psfmt = &dsfmt->status[0].u32[0];
|
---|
635 | psfmt[idxof(0)] = seed;
|
---|
636 | for (i = 1; i < (DSFMT_N + 1) * 4; i++) {
|
---|
637 | psfmt[idxof(i)] = 1812433253UL
|
---|
638 | * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i;
|
---|
639 | }
|
---|
640 | initial_mask(dsfmt);
|
---|
641 | period_certification(dsfmt);
|
---|
642 | dsfmt->idx = DSFMT_N64;
|
---|
643 | #if defined(HAVE_SSE2)
|
---|
644 | setup_const();
|
---|
645 | #endif
|
---|
646 | }
|
---|
647 |
|
---|
648 | /**
|
---|
649 | * This function initializes the internal state array,
|
---|
650 | * with an array of 32-bit integers used as the seeds
|
---|
651 | * @param dsfmt dsfmt state vector.
|
---|
652 | * @param init_key the array of 32-bit integers, used as a seed.
|
---|
653 | * @param key_length the length of init_key.
|
---|
654 | * @param mexp caller's mersenne expornent
|
---|
655 | */
|
---|
656 | void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
|
---|
657 | int key_length, int mexp) {
|
---|
658 | int i, j, count;
|
---|
659 | uint32_t r;
|
---|
660 | uint32_t *psfmt32;
|
---|
661 | int lag;
|
---|
662 | int mid;
|
---|
663 | int size = (DSFMT_N + 1) * 4; /* pulmonary */
|
---|
664 |
|
---|
665 | /* make sure caller program is compiled with the same MEXP */
|
---|
666 | if (mexp != dsfmt_mexp) {
|
---|
667 | fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n");
|
---|
668 | exit(1);
|
---|
669 | }
|
---|
670 | if (size >= 623) {
|
---|
671 | lag = 11;
|
---|
672 | } else if (size >= 68) {
|
---|
673 | lag = 7;
|
---|
674 | } else if (size >= 39) {
|
---|
675 | lag = 5;
|
---|
676 | } else {
|
---|
677 | lag = 3;
|
---|
678 | }
|
---|
679 | mid = (size - lag) / 2;
|
---|
680 |
|
---|
681 | psfmt32 = &dsfmt->status[0].u32[0];
|
---|
682 | memset(dsfmt->status, 0x8b, sizeof(dsfmt->status));
|
---|
683 | if (key_length + 1 > size) {
|
---|
684 | count = key_length + 1;
|
---|
685 | } else {
|
---|
686 | count = size;
|
---|
687 | }
|
---|
688 | r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)]
|
---|
689 | ^ psfmt32[idxof((size - 1) % size)]);
|
---|
690 | psfmt32[idxof(mid % size)] += r;
|
---|
691 | r += key_length;
|
---|
692 | psfmt32[idxof((mid + lag) % size)] += r;
|
---|
693 | psfmt32[idxof(0)] = r;
|
---|
694 | count--;
|
---|
695 | for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
|
---|
696 | r = ini_func1(psfmt32[idxof(i)]
|
---|
697 | ^ psfmt32[idxof((i + mid) % size)]
|
---|
698 | ^ psfmt32[idxof((i + size - 1) % size)]);
|
---|
699 | psfmt32[idxof((i + mid) % size)] += r;
|
---|
700 | r += init_key[j] + i;
|
---|
701 | psfmt32[idxof((i + mid + lag) % size)] += r;
|
---|
702 | psfmt32[idxof(i)] = r;
|
---|
703 | i = (i + 1) % size;
|
---|
704 | }
|
---|
705 | for (; j < count; j++) {
|
---|
706 | r = ini_func1(psfmt32[idxof(i)]
|
---|
707 | ^ psfmt32[idxof((i + mid) % size)]
|
---|
708 | ^ psfmt32[idxof((i + size - 1) % size)]);
|
---|
709 | psfmt32[idxof((i + mid) % size)] += r;
|
---|
710 | r += i;
|
---|
711 | psfmt32[idxof((i + mid + lag) % size)] += r;
|
---|
712 | psfmt32[idxof(i)] = r;
|
---|
713 | i = (i + 1) % size;
|
---|
714 | }
|
---|
715 | for (j = 0; j < size; j++) {
|
---|
716 | r = ini_func2(psfmt32[idxof(i)]
|
---|
717 | + psfmt32[idxof((i + mid) % size)]
|
---|
718 | + psfmt32[idxof((i + size - 1) % size)]);
|
---|
719 | psfmt32[idxof((i + mid) % size)] ^= r;
|
---|
720 | r -= i;
|
---|
721 | psfmt32[idxof((i + mid + lag) % size)] ^= r;
|
---|
722 | psfmt32[idxof(i)] = r;
|
---|
723 | i = (i + 1) % size;
|
---|
724 | }
|
---|
725 | initial_mask(dsfmt);
|
---|
726 | period_certification(dsfmt);
|
---|
727 | dsfmt->idx = DSFMT_N64;
|
---|
728 | #if defined(HAVE_SSE2)
|
---|
729 | setup_const();
|
---|
730 | #endif
|
---|
731 | }
|
---|
732 | #if defined(__INTEL_COMPILER)
|
---|
733 | # pragma warning(default:981)
|
---|
734 | #endif
|
---|