cloudy  trunk
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TestVectorize.cpp
Go to the documentation of this file.
1 #include "cdstd.h"
2 #include <UnitTest++.h>
3 #include "cddefines.h"
4 #include "thirdparty.h"
5 #include "vectorize.h"
6 
7 namespace {
8 
9  TEST(TestAvxPool)
10  {
11  // there is not a lot we can check here since the interesting bits are hidden
12  // behind the interface, so the real testing will be done with valgrind-test
13  t_avx_pool pool;
14  init_genrand( 376356017L );
15  for( int i=0; i < 2048; ++i )
16  {
17  size_t sz = genrand_int32()%200000;
18  char *p = static_cast<char*>(pool.avx_alloc(sz));
19  size_t ip = reinterpret_cast<size_t>(p);
20  // test for correct alignment
21  CHECK( p != NULL && (ip/CD_ALIGN)*CD_ALIGN == ip );
22  for( size_t j=0; j < sz; ++j )
23  p[j] = 'c';
24  pool.avx_free(p);
25  }
26  // check that many simultaneous requests work correctly
27  const int ntest = 200;
28  char* pp[ntest];
29  for( int i=0; i < ntest; ++i )
30  {
31  pp[i] = static_cast<char*>(pool.avx_alloc(4000));
32  size_t ip = reinterpret_cast<size_t>(pp[i]);
33  CHECK( pp[i] != NULL && (ip/CD_ALIGN)*CD_ALIGN == ip );
34  for( size_t j=0; j < 4000; ++j )
35  pp[i][j] = 'c';
36  }
37  for( int i=0; i < ntest; ++i )
38  pool.avx_free(pp[i]);
39  }
40 
41  TEST(TestAvxPtr)
42  {
43  avx_ptr<double,true> p1(100);
44  CHECK( p1.data() != NULL && p1.data() == p1.ptr0() );
45  for( int i=0; i < 100; ++i )
46  p1[i] = 1.;
47  CHECK_THROW(( p1[-1] = 1. ),out_of_range);
48  CHECK_THROW(( p1[100] = 1. ),out_of_range);
49  avx_ptr<double> p2(0);
50  CHECK( p2.data() == NULL );
51  avx_ptr<double> p3(-10);
52  CHECK( p3.data() == NULL );
53  avx_ptr<double,true> p4(-10,100);
54  CHECK( p4.data() != NULL && p4.data() == p4.ptr0()-10 );
55  for( int i=-10; i < 100; ++i )
56  p4[i] = 1.;
57  CHECK_THROW(( p4[-11] = 1. ),out_of_range);
58  CHECK_THROW(( p1[100] = 1. ),out_of_range);
59  avx_ptr<double> p5(10,10);
60  CHECK( p5.data() == NULL );
61  avx_ptr<double> p6(10,-10);
62  CHECK( p6.data() == NULL );
63  }
64 
65  TEST(TestAllocatorAvx)
66  {
67  // there is not a lot we can check here since the interesting bits are hidden
68  // behind the interface, so the real testing will be done with valgrind-test
69  init_genrand( 376833017L );
70  for( int i=0; i < 2048; ++i )
71  {
72  size_t sz = genrand_int32()%200000;
73  vector< int, allocator_avx<int> > a(sz);
74  size_t ip = reinterpret_cast<size_t>(get_ptr(a));
75  CHECK( ip != 0 && (ip/CD_ALIGN)*CD_ALIGN == ip );
76  for( size_t j=0; j < sz; ++j )
77  a[j] = 3;
78  }
79  }
80 
81  TEST(TestMallocAvx)
82  {
83  // there is not a lot we can check here since the interesting bits are hidden
84  // behind the interface, so the real testing will be done with valgrind-test
85  init_genrand( 376115017L );
86  for( int i=0; i < 2048; ++i )
87  {
88  size_t sz = genrand_int32()%200000;
89  int* a = (int*)MALLOC_AVX(sz*sizeof(int));
90  size_t ip = reinterpret_cast<size_t>(a);
91  CHECK( ip != 0 && (ip/CD_ALIGN)*CD_ALIGN == ip );
92  for( size_t j=0; j < sz; ++j )
93  a[j] = 3;
94  free(a);
95  }
96  }
97 
98  TEST(TestReduceAd)
99  {
100  double a[32];
101  for( int i=0; i < 32; ++i )
102  a[i] = double(i);
103  for( int ilo=0; ilo < 32; ilo++ )
104  {
105  for( int ihi=ilo+1; ihi <= 32; ihi++ )
106  {
107  double val = reduce_a( a, ilo, ihi );
108  double expect = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
109  CHECK( fp_equal( val, expect ) );
110  }
111  }
112  }
113 
114  TEST(TestReduceAf)
115  {
116  sys_float a[32];
117  for( int i=0; i < 32; ++i )
118  a[i] = sys_float(i);
119  for( int ilo=0; ilo < 32; ilo++ )
120  {
121  for( int ihi=ilo+1; ihi <= 32; ihi++ )
122  {
123  sys_float val = reduce_a( a, ilo, ihi );
124  sys_float expect = sys_float(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
125  CHECK( fp_equal( val, expect ) );
126  }
127  }
128  }
129 
130  TEST(TestReduceABd)
131  {
132  double a[32], b[32];
133  for( int i=0; i < 32; ++i )
134  {
135  a[i] = double(i);
136  b[i] = double(i);
137  }
138  for( int ilo=0; ilo < 32; ilo++ )
139  {
140  for( int ihi=ilo+1; ihi <= 32; ihi++ )
141  {
142  double val = reduce_ab( a, b, ilo, ihi );
143  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
144  CHECK( fp_equal( val, expect ) );
145  }
146  }
147  }
148 
149  TEST(TestReduceABdf)
150  {
151  double a[32];
152  sys_float b[32];
153  for( int i=0; i < 32; ++i )
154  {
155  a[i] = double(i);
156  b[i] = sys_float(i);
157  }
158  for( int ilo=0; ilo < 32; ilo++ )
159  {
160  for( int ihi=ilo+1; ihi <= 32; ihi++ )
161  {
162  double val = reduce_ab( a, b, ilo, ihi );
163  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
164  CHECK( fp_equal( val, expect ) );
165  }
166  }
167  }
168 
169  TEST(TestReduceABf)
170  {
171  sys_float a[32], b[32];
172  for( int i=0; i < 32; ++i )
173  {
174  a[i] = sys_float(i);
175  b[i] = sys_float(i);
176  }
177  for( int ilo=0; ilo < 32; ilo++ )
178  {
179  for( int ihi=ilo+1; ihi <= 32; ihi++ )
180  {
181  sys_float val = reduce_ab( a, b, ilo, ihi );
182  sys_float expect = sys_float(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
183  CHECK( fp_equal( val, expect ) );
184  }
185  }
186  }
187 
188  TEST(TestReduceABCd)
189  {
190  double a[32], b[32], c[32];
191  for( int i=0; i < 32; ++i )
192  {
193  a[i] = double(i);
194  b[i] = double(i);
195  c[i] = double(i);
196  }
197  for( int ilo=0; ilo < 32; ilo++ )
198  {
199  for( int ihi=ilo+1; ihi <= 32; ihi++ )
200  {
201  double val = reduce_abc( a, b, c, ilo, ihi );
202  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
203  CHECK( fp_equal( val, expect ) );
204  }
205  }
206  }
207 
208  TEST(TestReduceABCddf)
209  {
210  double a[32], b[32];
211  sys_float c[32];
212  for( int i=0; i < 32; ++i )
213  {
214  a[i] = double(i);
215  b[i] = double(i);
216  c[i] = sys_float(i);
217  }
218  for( int ilo=0; ilo < 32; ilo++ )
219  {
220  for( int ihi=ilo+1; ihi <= 32; ihi++ )
221  {
222  double val = reduce_abc( a, b, c, ilo, ihi );
223  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
224  CHECK( fp_equal( val, expect ) );
225  }
226  }
227  }
228 
229  TEST(TestReduceABCdff)
230  {
231  double a[32];
232  sys_float b[32], c[32];
233  for( int i=0; i < 32; ++i )
234  {
235  a[i] = double(i);
236  b[i] = sys_float(i);
237  c[i] = sys_float(i);
238  }
239  for( int ilo=0; ilo < 32; ilo++ )
240  {
241  for( int ihi=ilo+1; ihi <= 32; ihi++ )
242  {
243  double val = reduce_abc( a, b, c, ilo, ihi );
244  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
245  CHECK( fp_equal( val, expect ) );
246  }
247  }
248  }
249 
250  TEST(TestReduceABCf)
251  {
252  sys_float a[32], b[32], c[32];
253  for( int i=0; i < 32; ++i )
254  {
255  a[i] = sys_float(i);
256  b[i] = sys_float(i);
257  c[i] = sys_float(i);
258  }
259  for( int ilo=0; ilo < 32; ilo++ )
260  {
261  for( int ihi=ilo+1; ihi <= 32; ihi++ )
262  {
263  sys_float val = reduce_abc( a, b, c, ilo, ihi );
264  sys_float expect = sys_float(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
265  CHECK( fp_equal( val, expect ) );
266  }
267  }
268  }
269 
270  TEST(TestReduceAB_Ad)
271  {
272  double a[32], b[32];
273  for( int i=0; i < 32; ++i )
274  {
275  a[i] = double(i);
276  b[i] = double(i);
277  }
278  for( int ilo=0; ilo < 32; ilo++ )
279  {
280  for( int ihi=ilo+1; ihi <= 32; ihi++ )
281  {
282  double val2, val = reduce_ab_a( a, b, ilo, ihi, &val2 );
283  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
284  double expect2 = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
285  CHECK( fp_equal( val, expect ) );
286  CHECK( fp_equal( val2, expect2 ) );
287  }
288  }
289  }
290 
291  TEST(TestReduceAB_Adf)
292  {
293  double a[32];
294  sys_float b[32];
295  for( int i=0; i < 32; ++i )
296  {
297  a[i] = double(i);
298  b[i] = sys_float(i);
299  }
300  for( int ilo=0; ilo < 32; ilo++ )
301  {
302  for( int ihi=ilo+1; ihi <= 32; ihi++ )
303  {
304  double val2, val = reduce_ab_a( a, b, ilo, ihi, &val2 );
305  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
306  double expect2 = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
307  CHECK( fp_equal( val, expect ) );
308  CHECK( fp_equal( val2, expect2 ) );
309  val = reduce_ab_a( b, a, ilo, ihi, &val2 );
310  CHECK( fp_equal( val, expect ) );
311  CHECK( fp_equal( val2, expect2 ) );
312  }
313  }
314  }
315 
316  TEST(TestReduceAB_Af)
317  {
318  double a[32], b[32];
319  for( int i=0; i < 32; ++i )
320  {
321  a[i] = double(i);
322  b[i] = double(i);
323  }
324  for( int ilo=0; ilo < 32; ilo++ )
325  {
326  for( int ihi=ilo+1; ihi <= 32; ihi++ )
327  {
328  double val2, val = reduce_ab_a( a, b, ilo, ihi, &val2 );
329  double expect = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
330  double expect2 = double(ihi*(ihi-1)/2 - ilo*(ilo-1)/2);
331  CHECK( fp_equal( val, expect ) );
332  CHECK( fp_equal( val2, expect2 ) );
333  }
334  }
335  }
336 
337  TEST(TestReduceABC_ABd)
338  {
339  double a[32], b[32], c[32];
340  for( int i=0; i < 32; ++i )
341  {
342  a[i] = double(i);
343  b[i] = double(i);
344  c[i] = double(i);
345  }
346  for( int ilo=0; ilo < 32; ilo++ )
347  {
348  for( int ihi=ilo+1; ihi <= 32; ihi++ )
349  {
350  double val2, val = reduce_abc_ab( a, b, c, ilo, ihi, &val2 );
351  double expect = double(pow2(ihi*(ihi-1)/2) - pow2(ilo*(ilo-1)/2));
352  double expect2 = double(ihi*(ihi-1)*(2*ihi-1)/6 - ilo*(ilo-1)*(2*ilo-1)/6);
353  CHECK( fp_equal( val, expect ) );
354  CHECK( fp_equal( val2, expect2 ) );
355  }
356  }
357  }
358 
359  TEST(TestExp10d)
360  {
361  // exp10(double) is defined in cddefines.h
362  double maxarg = log10(DBL_MAX);
363  init_genrand( 392916017L );
364  for( int i=0; i < 2048; ++i )
365  {
366  double arg = genrand_real1()*633. - 324.;
367  if( arg < maxarg )
368  {
369  double val1 = exp10(arg);
370  double val2 = pow(10.,arg);
371  // inaccuracies in pow(10.,x) will contribute to the error as well...
372  // so this test may differ a bit from one glibc version to another.
373  CHECK( fp_equal( val1, val2, 10 ) );
374  }
375  }
376  }
377 
378  TEST(TestExp10f)
379  {
380  // exp10(sys_float) is defined in cddefines.h
381  sys_float maxarg = log10f(FLT_MAX);
382  init_genrand( 392977017L );
383  for( int i=0; i < 2048; ++i )
384  {
385  sys_float arg = sys_float(genrand_real1())*84.f - 45.f;
386  if( arg < maxarg )
387  {
388  sys_float val1 = exp10(arg);
389  sys_float val2 = sys_float(pow(10.,double(arg)));
390  CHECK( fp_equal( val1, val2, 6 ) );
391  }
392  }
393  }
394 
395  TEST(TestVexpd)
396  {
397  const int sz = 2048;
398  ALIGNED(CD_ALIGN) double arg[sz];
399  ALIGNED(CD_ALIGN) double val[sz];
400 #ifdef __AVX__
401  double minarg = log(DBL_MIN);
402  // check correct behavior near the underflow margin
403  // our implementation doesn't support gradual underflow
404  double a1 = nextafter(minarg,-DBL_MAX);
405  vexp(val,a1,a1,a1,a1);
406  CHECK( val[0] == 0. );
407  double a2 = nextafter(minarg,DBL_MAX);
408  vexp(val,a2,a2,a2,a2);
409  CHECK( val[0] > 0. );
410  // check correct behavior near the overflow margin
411  double maxarg = log(DBL_MAX);
412  a1 = nextafter(maxarg,-DBL_MAX);
413  vexp(val,a1,a1,a1,a1);
414  CHECK( val[0] < DBL_MAX );
415  a2 = nextafter(maxarg,DBL_MAX);
416  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
417  a2 = -numeric_limits<double>().infinity();
418  vexp(val,a2,a2,a2,a2);
419  CHECK( val[0] == 0. );
420  a2 = numeric_limits<double>().infinity();
421  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
422  a2 = numeric_limits<double>().quiet_NaN();
423  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
424  // now check results over the entire range
425  init_genrand( 395256017L );
426  for( int i=0; i < sz; )
427  {
428  double x = (genrand_real1()-0.5)*1420.;
429  if( x < maxarg )
430  arg[i++] = x;
431  }
432  vexp( arg, val, 0, sz );
433  for( int i=0; i < sz; ++i )
434  {
435  if( arg[i] < minarg )
436  CHECK( val[i] == 0. );
437  else
438  CHECK( fp_equal( val[i], exp(arg[i]) ) );
439  }
440 #endif
441  for( int i=0; i < 32; ++i )
442  arg[i] = double(i);
443  // finally check that non-aligned arrays are treated correctly
444  for( int i=0; i < 32; ++i )
445  {
446  for( int j=i+1; j <= 32; ++j )
447  {
448  invalidate_array( val, 32*sizeof(double) );
449  vexp( arg, val, i, j );
450  for( int k=0; k < 32; ++k )
451  {
452  if( k < i || k >= j )
453  CHECK( isnan(val[k]) );
454  else
455  CHECK( fp_equal( val[k], exp(arg[k]) ) );
456  }
457  invalidate_array( val, 32*sizeof(double) );
458  vexp( &arg[i], val, 0, j-i );
459  for( int k=0; k < 32; ++k )
460  {
461  if( k >= j-i )
462  CHECK( isnan(val[k]) );
463  else
464  CHECK( fp_equal( val[k], exp(arg[k+i]) ) );
465  }
466  }
467  }
468  }
469 
470  TEST(TestVexp10d)
471  {
472 #ifdef __AVX__
473  const int sz = 2048;
474  ALIGNED(CD_ALIGN) double arg[sz];
475  ALIGNED(CD_ALIGN) double val[sz];
476 
477  double minarg = log10(DBL_MIN);
478  double a1 = nextafter(minarg,-DBL_MAX);
479  vexp10(val,a1,a1,a1,a1);
480  CHECK( val[0] == 0. );
481  double a2 = nextafter(minarg,DBL_MAX);
482  vexp10(val,a2,a2,a2,a2);
483  CHECK( val[0] > 0. );
484  double maxarg = log10(DBL_MAX);
485  a1 = nextafter(maxarg,-DBL_MAX);
486  vexp10(val,a1,a1,a1,a1);
487  CHECK( val[0] < DBL_MAX );
488  a2 = nextafter(maxarg,DBL_MAX);
489  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
490  a2 = -numeric_limits<double>().infinity();
491  vexp10(val,a2,a2,a2,a2);
492  CHECK( val[0] == 0. );
493  a2 = numeric_limits<double>().infinity();
494  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
495  a2 = numeric_limits<double>().quiet_NaN();
496  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
497  init_genrand( 395316017L );
498  for( int i=0; i < sz; )
499  {
500  double x = (genrand_real1()-0.5)*617.;
501  if( x < maxarg )
502  arg[i++] = x;
503  }
504  vexp10( arg, val, 0, sz );
505  for( int i=0; i < sz; ++i )
506  {
507  if( arg[i] < minarg )
508  CHECK( val[i] == 0. );
509  else
510  CHECK( fp_equal( val[i], pow(10.,arg[i]), 10 ) );
511  }
512 #else
513  CHECK( fp_equal( 1., 1. ) );
514 #endif
515  }
516 
517  TEST(TestVexpm1d)
518  {
519 #ifdef __AVX__
520  const int sz = 2048;
521  ALIGNED(CD_ALIGN) double arg[sz];
522  ALIGNED(CD_ALIGN) double val[sz];
523 
524  double maxarg = log(DBL_MAX);
525  double a1 = nextafter(maxarg,-DBL_MAX);
526  vexpm1(val,a1,a1,a1,a1);
527  CHECK( val[0] < DBL_MAX );
528  double a2 = nextafter(maxarg,DBL_MAX);
529  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
530  a2 = -numeric_limits<double>().infinity();
531  vexpm1(val,a2,a2,a2,a2);
532  CHECK( val[0] == -1. );
533  a2 = numeric_limits<double>().infinity();
534  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
535  a2 = numeric_limits<double>().quiet_NaN();
536  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
537  double y[8];
538  vexpm1( y, 1.e-10, 8.e-8, 4.e-5, 6.e-3, 2.e-1, 1., 1., 1. );
539  // constants below were derived from Abramowitz & Stegun, Handbook of Mathematical Functions
540  CHECK( fp_equal( y[0], 1.000000000050000e-10 ) );
541  CHECK( fp_equal( y[1], 8.00000032000000853e-8 ) );
542  CHECK( fp_equal( y[2], 4.00008000106667733342e-5 ) );
543  CHECK( fp_equal( y[3], 6.0180360540648648555845e-3 ) );
544  CHECK( fp_equal( y[4], 2.214027581601698339210720e-1 ) );
545  CHECK( fp_equal( y[5], 1.7182818284590452353602875 ) );
546  init_genrand( 377216017L );
547  for( int i=0; i < sz; )
548  {
549  double x = genrand_real1()*749.-39.;
550  if( x < maxarg )
551  arg[i++] = x;
552  }
553  vexpm1( arg, val, 0, sz );
554  for( int i=0; i < sz; ++i )
555  CHECK( fp_equal( val[i], expm1(arg[i]) ) );
556 #else
557  CHECK( fp_equal( 1., 1. ) );
558 #endif
559  }
560 
561  TEST(TestVexpf)
562  {
563  const int sz = 2048;
564  ALIGNED(CD_ALIGN) sys_float arg[sz];
565  ALIGNED(CD_ALIGN) sys_float val[sz];
566 #ifdef __AVX__
567  sys_float minarg = logf(FLT_MIN);
568  sys_float a1 = nextafterf(minarg,-FLT_MAX);
569  vexp(val,a1,a1,a1,a1);
570  CHECK( val[0] == 0.f );
571  sys_float a2 = nextafterf(minarg,FLT_MAX);
572  vexp(val,a2,a2,a2,a2);
573  CHECK( val[0] > 0.f );
574  sys_float maxarg = logf(FLT_MAX);
575  a1 = nextafterf(maxarg,-FLT_MAX);
576  vexp(val,a1,a1,a1,a1);
577  CHECK( val[0] < FLT_MAX );
578  a2 = nextafterf(maxarg,FLT_MAX);
579  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
580  a2 = -numeric_limits<sys_float>().infinity();
581  vexp(val,a2,a2,a2,a2);
582  CHECK( val[0] == 0.f );
583  a2 = numeric_limits<sys_float>().infinity();
584  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
585  a2 = numeric_limits<sys_float>().quiet_NaN();
586  CHECK_THROW( vexp(val,a2,a2,a2,a2), domain_error );
587  init_genrand( 395288017L );
588  for( int i=0; i < sz; )
589  {
590  sys_float x = sys_float(genrand_real1()-0.5)*178.f;
591  if( x < maxarg )
592  arg[i++] = x;
593  }
594  vexp( arg, val, 0, sz );
595  for( int i=0; i < sz; ++i )
596  {
597  if( arg[i] < minarg )
598  CHECK( val[i] == 0.f );
599  else
600  CHECK( fp_equal( val[i], expf(arg[i]) ) );
601  }
602 #endif
603  for( int i=0; i < 32; ++i )
604  arg[i] = sys_float(i);
605  for( int i=0; i < 32; ++i )
606  {
607  for( int j=i+1; j <= 32; ++j )
608  {
609  invalidate_array( val, 32*sizeof(sys_float) );
610  vexp( arg, val, i, j );
611  for( int k=0; k < 32; ++k )
612  {
613  if( k < i || k >= j )
614  CHECK( isnan(val[k]) );
615  else
616  CHECK( fp_equal( val[k], expf(arg[k]) ) );
617  }
618  invalidate_array( val, 32*sizeof(sys_float) );
619  vexp( &arg[i], val, 0, j-i );
620  for( int k=0; k < 32; ++k )
621  {
622  if( k >= j-i )
623  CHECK( isnan(val[k]) );
624  else
625  CHECK( fp_equal( val[k], expf(arg[k+i]) ) );
626  }
627  }
628  }
629  }
630 
631  TEST(TestVexp10f)
632  {
633 #ifdef __AVX__
634  const int sz = 2048;
635  ALIGNED(CD_ALIGN) sys_float arg[sz];
636  ALIGNED(CD_ALIGN) sys_float val[sz];
637 
638  sys_float minarg = log10f(FLT_MIN);
639  sys_float a1 = nextafterf(minarg,-FLT_MAX);
640  vexp10(val,a1,a1,a1,a1);
641  CHECK( val[0] == 0.f );
642  sys_float a2 = nextafterf(minarg,FLT_MAX);
643  vexp10(val,a2,a2,a2,a2);
644  CHECK( val[0] > 0.f );
645  sys_float maxarg = log10f(FLT_MAX);
646  a1 = nextafterf(maxarg,-FLT_MAX);
647  vexp10(val,a1,a1,a1,a1);
648  CHECK( val[0] < FLT_MAX );
649  a2 = nextafterf(maxarg,FLT_MAX);
650  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
651  a2 = -numeric_limits<sys_float>().infinity();
652  vexp10(val,a2,a2,a2,a2);
653  CHECK( val[0] == 0.f );
654  a2 = numeric_limits<sys_float>().infinity();
655  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
656  a2 = numeric_limits<sys_float>().quiet_NaN();
657  CHECK_THROW( vexp10(val,a2,a2,a2,a2), domain_error );
658  init_genrand( 395313517L );
659  for( int i=0; i < sz; )
660  {
661  sys_float x = sys_float(genrand_real1()-0.5)*78.f;
662  if( x < maxarg )
663  arg[i++] = x;
664  }
665  vexp10( arg, val, 0, sz );
666  for( int i=0; i < sz; ++i )
667  {
668  if( arg[i] < minarg )
669  CHECK( val[i] == 0.f );
670  else
671  CHECK( fp_equal( val[i], powf(10.f,arg[i]), 8 ) );
672  }
673 #else
674  CHECK( fp_equal( 1., 1. ) );
675 #endif
676  }
677 
678  TEST(TestVexpm1f)
679  {
680 #ifdef __AVX__
681  const int sz = 2048;
682  ALIGNED(CD_ALIGN) sys_float arg[sz];
683  ALIGNED(CD_ALIGN) sys_float val[sz];
684 
685  sys_float maxarg = logf(FLT_MAX);
686  sys_float a1 = nextafterf(maxarg,-FLT_MAX);
687  vexpm1(val,a1,a1,a1,a1);
688  CHECK( val[0] < FLT_MAX );
689  sys_float a2 = nextafterf(maxarg,FLT_MAX);
690  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
691  a2 = -numeric_limits<sys_float>().infinity();
692  vexpm1(val,a2,a2,a2,a2);
693  CHECK( val[0] == -1.f );
694  a2 = numeric_limits<sys_float>().infinity();
695  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
696  a2 = numeric_limits<sys_float>().quiet_NaN();
697  CHECK_THROW( vexpm1(val,a2,a2,a2,a2), domain_error );
698  sys_float y[8];
699  vexpm1( y, 1.e-10f, 8.e-8f, 4.e-5f, 6.e-3f, 2.e-1f, 1.f, 1.f, 1.f );
700  // constants below were derived from Abramowitz & Stegun, Handbook of Mathematical Functions
701  CHECK( fp_equal( y[0], 1.000000000050000e-10f ) );
702  CHECK( fp_equal( y[1], 8.00000032000000853e-8f ) );
703  CHECK( fp_equal( y[2], 4.00008000106667733342e-5f ) );
704  CHECK( fp_equal( y[3], 6.0180360540648648555845e-3f ) );
705  CHECK( fp_equal( y[4], 2.214027581601698339210720e-1f ) );
706  CHECK( fp_equal( y[5], 1.7182818284590452353602875f ) );
707  init_genrand( 395324417L );
708  for( int i=0; i < sz; )
709  {
710  sys_float x = sys_float(genrand_real1())*108.f-19.f;
711  if( x < maxarg )
712  arg[i++] = x;
713  }
714  vexpm1( arg, val, 0, sz );
715  for( int i=0; i < sz; ++i )
716  CHECK( fp_equal( val[i], expm1f(arg[i]) ) );
717 #else
718  CHECK( fp_equal( 1., 1. ) );
719 #endif
720  }
721 
722  TEST(TestVexpdx4)
723  {
724  double y[4];
725  vexp(y,1.1,1.2,1.3,1.4);
726  CHECK( fp_equal( y[0], exp(1.1) ) );
727  CHECK( fp_equal( y[1], exp(1.2) ) );
728  CHECK( fp_equal( y[2], exp(1.3) ) );
729  CHECK( fp_equal( y[3], exp(1.4) ) );
730  }
731 
732  TEST(TestVexp10dx4)
733  {
734  double y[4];
735  vexp10(y,1.1,1.2,1.3,1.4);
736  CHECK( fp_equal( y[0], pow(10.,1.1) ) );
737  CHECK( fp_equal( y[1], pow(10.,1.2) ) );
738  CHECK( fp_equal( y[2], pow(10.,1.3) ) );
739  CHECK( fp_equal( y[3], pow(10.,1.4) ) );
740  }
741 
742  TEST(TestVexpm1dx4)
743  {
744  double y[4];
745  vexpm1(y,1.e-10,1.2,1.3,1.4);
746  CHECK( fp_equal( y[0], expm1(1.e-10) ) );
747  CHECK( fp_equal( y[1], expm1(1.2) ) );
748  CHECK( fp_equal( y[2], expm1(1.3) ) );
749  CHECK( fp_equal( y[3], expm1(1.4) ) );
750  }
751 
752  TEST(TestVexpdx8)
753  {
754  double y[8];
755  vexp(y,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8);
756  CHECK( fp_equal( y[0], exp(1.1) ) );
757  CHECK( fp_equal( y[1], exp(1.2) ) );
758  CHECK( fp_equal( y[2], exp(1.3) ) );
759  CHECK( fp_equal( y[3], exp(1.4) ) );
760  CHECK( fp_equal( y[4], exp(1.5) ) );
761  CHECK( fp_equal( y[5], exp(1.6) ) );
762  CHECK( fp_equal( y[6], exp(1.7) ) );
763  CHECK( fp_equal( y[7], exp(1.8) ) );
764  }
765 
766  TEST(TestVexp10dx8)
767  {
768  double y[8];
769  vexp10(y,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8);
770  CHECK( fp_equal( y[0], pow(10.,1.1) ) );
771  CHECK( fp_equal( y[1], pow(10.,1.2) ) );
772  CHECK( fp_equal( y[2], pow(10.,1.3) ) );
773  CHECK( fp_equal( y[3], pow(10.,1.4) ) );
774  CHECK( fp_equal( y[4], pow(10.,1.5) ) );
775  CHECK( fp_equal( y[5], pow(10.,1.6) ) );
776  CHECK( fp_equal( y[6], pow(10.,1.7) ) );
777  CHECK( fp_equal( y[7], pow(10.,1.8) ) );
778  }
779 
780  TEST(TestVexpm1dx8)
781  {
782  double y[8];
783  vexpm1(y,1.e-10,1.2,1.3,1.4,1.5,1.6,1.7,1.8);
784  CHECK( fp_equal( y[0], expm1(1.e-10) ) );
785  CHECK( fp_equal( y[1], expm1(1.2) ) );
786  CHECK( fp_equal( y[2], expm1(1.3) ) );
787  CHECK( fp_equal( y[3], expm1(1.4) ) );
788  CHECK( fp_equal( y[4], expm1(1.5) ) );
789  CHECK( fp_equal( y[5], expm1(1.6) ) );
790  CHECK( fp_equal( y[6], expm1(1.7) ) );
791  CHECK( fp_equal( y[7], expm1(1.8) ) );
792  }
793 
794  TEST(TestVexpfx4)
795  {
796  sys_float y[4];
797  vexp(y,1.1f,1.2f,1.3f,1.4f);
798  CHECK( fp_equal( y[0], expf(1.1f) ) );
799  CHECK( fp_equal( y[1], expf(1.2f) ) );
800  CHECK( fp_equal( y[2], expf(1.3f) ) );
801  CHECK( fp_equal( y[3], expf(1.4f) ) );
802  }
803 
804  TEST(TestVexp10fx4)
805  {
806  sys_float y[4];
807  vexp10(y,1.1f,1.2f,1.3f,1.4f);
808  CHECK( fp_equal( y[0], powf(10.f,1.1f) ) );
809  CHECK( fp_equal( y[1], powf(10.f,1.2f) ) );
810  CHECK( fp_equal( y[2], powf(10.f,1.3f) ) );
811  CHECK( fp_equal( y[3], powf(10.f,1.4f) ) );
812  }
813 
814  TEST(TestVexpm1fx4)
815  {
816  sys_float y[4];
817  vexpm1(y,1.e-10f,1.2f,1.3f,1.4f);
818  CHECK( fp_equal( y[0], expm1f(1.e-10f) ) );
819  CHECK( fp_equal( y[1], expm1f(1.2f) ) );
820  CHECK( fp_equal( y[2], expm1f(1.3f) ) );
821  CHECK( fp_equal( y[3], expm1f(1.4f) ) );
822  }
823 
824  TEST(TestVexpfx8)
825  {
826  sys_float y[8];
827  vexp(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f);
828  CHECK( fp_equal( y[0], expf(1.1f) ) );
829  CHECK( fp_equal( y[1], expf(1.2f) ) );
830  CHECK( fp_equal( y[2], expf(1.3f) ) );
831  CHECK( fp_equal( y[3], expf(1.4f) ) );
832  CHECK( fp_equal( y[4], expf(1.5f) ) );
833  CHECK( fp_equal( y[5], expf(1.6f) ) );
834  CHECK( fp_equal( y[6], expf(1.7f) ) );
835  CHECK( fp_equal( y[7], expf(1.8f) ) );
836  }
837 
838  TEST(TestVexp10fx8)
839  {
840  sys_float y[8];
841  vexp10(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f);
842  CHECK( fp_equal( y[0], powf(10.f,1.1f) ) );
843  CHECK( fp_equal( y[1], powf(10.f,1.2f) ) );
844  CHECK( fp_equal( y[2], powf(10.f,1.3f) ) );
845  CHECK( fp_equal( y[3], powf(10.f,1.4f) ) );
846  CHECK( fp_equal( y[4], powf(10.f,1.5f) ) );
847  CHECK( fp_equal( y[5], powf(10.f,1.6f) ) );
848  CHECK( fp_equal( y[6], powf(10.f,1.7f) ) );
849  CHECK( fp_equal( y[7], powf(10.f,1.8f) ) );
850  }
851 
852  TEST(TestVexpm1fx8)
853  {
854  sys_float y[8];
855  vexpm1(y,1.e-10f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f);
856  CHECK( fp_equal( y[0], expm1f(1.e-10f) ) );
857  CHECK( fp_equal( y[1], expm1f(1.2f) ) );
858  CHECK( fp_equal( y[2], expm1f(1.3f) ) );
859  CHECK( fp_equal( y[3], expm1f(1.4f) ) );
860  CHECK( fp_equal( y[4], expm1f(1.5f) ) );
861  CHECK( fp_equal( y[5], expm1f(1.6f) ) );
862  CHECK( fp_equal( y[6], expm1f(1.7f) ) );
863  CHECK( fp_equal( y[7], expm1f(1.8f) ) );
864  }
865 
866  TEST(TestVexpfx16)
867  {
868  sys_float y[16];
869  vexp(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f,2.11f,2.22f,2.33f,2.44f,2.55f,2.66f,2.77f,2.88f);
870  CHECK( fp_equal( y[0], expf(1.1f) ) );
871  CHECK( fp_equal( y[1], expf(1.2f) ) );
872  CHECK( fp_equal( y[2], expf(1.3f) ) );
873  CHECK( fp_equal( y[3], expf(1.4f) ) );
874  CHECK( fp_equal( y[4], expf(1.5f) ) );
875  CHECK( fp_equal( y[5], expf(1.6f) ) );
876  CHECK( fp_equal( y[6], expf(1.7f) ) );
877  CHECK( fp_equal( y[7], expf(1.8f) ) );
878  CHECK( fp_equal( y[8], expf(2.11f) ) );
879  CHECK( fp_equal( y[9], expf(2.22f) ) );
880  CHECK( fp_equal( y[10], expf(2.33f) ) );
881  CHECK( fp_equal( y[11], expf(2.44f) ) );
882  CHECK( fp_equal( y[12], expf(2.55f) ) );
883  CHECK( fp_equal( y[13], expf(2.66f) ) );
884  CHECK( fp_equal( y[14], expf(2.77f) ) );
885  CHECK( fp_equal( y[15], expf(2.88f) ) );
886  }
887 
888  TEST(TestVexp10fx16)
889  {
890  sys_float y[16];
891  vexp10(y,1.1f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f,2.11f,2.22f,2.33f,2.44f,2.55f,2.66f,2.77f,2.88f);
892  CHECK( fp_equal( y[0], powf(10.f,1.1f) ) );
893  CHECK( fp_equal( y[1], powf(10.f,1.2f) ) );
894  CHECK( fp_equal( y[2], powf(10.f,1.3f) ) );
895  CHECK( fp_equal( y[3], powf(10.f,1.4f) ) );
896  CHECK( fp_equal( y[4], powf(10.f,1.5f) ) );
897  CHECK( fp_equal( y[5], powf(10.f,1.6f) ) );
898  CHECK( fp_equal( y[6], powf(10.f,1.7f) ) );
899  CHECK( fp_equal( y[7], powf(10.f,1.8f) ) );
900  CHECK( fp_equal( y[8], powf(10.f,2.11f) ) );
901  CHECK( fp_equal( y[9], powf(10.f,2.22f) ) );
902  CHECK( fp_equal( y[10], powf(10.f,2.33f) ) );
903  CHECK( fp_equal( y[11], powf(10.f,2.44f) ) );
904  CHECK( fp_equal( y[12], powf(10.f,2.55f) ) );
905  CHECK( fp_equal( y[13], powf(10.f,2.66f) ) );
906  CHECK( fp_equal( y[14], powf(10.f,2.77f) ) );
907  CHECK( fp_equal( y[15], powf(10.f,2.88f) ) );
908  }
909 
910  TEST(TestVexpm1fx16)
911  {
912  sys_float y[16];
913  vexpm1(y,1.e-10f,1.2f,1.3f,1.4f,1.5f,1.6f,1.7f,1.8f,2.11f,2.22f,2.33f,2.44f,2.55f,2.66f,2.77f,2.88f);
914  CHECK( fp_equal( y[0], expm1f(1.e-10f) ) );
915  CHECK( fp_equal( y[1], expm1f(1.2f) ) );
916  CHECK( fp_equal( y[2], expm1f(1.3f) ) );
917  CHECK( fp_equal( y[3], expm1f(1.4f) ) );
918  CHECK( fp_equal( y[4], expm1f(1.5f) ) );
919  CHECK( fp_equal( y[5], expm1f(1.6f) ) );
920  CHECK( fp_equal( y[6], expm1f(1.7f) ) );
921  CHECK( fp_equal( y[7], expm1f(1.8f) ) );
922  CHECK( fp_equal( y[8], expm1f(2.11f) ) );
923  CHECK( fp_equal( y[9], expm1f(2.22f) ) );
924  CHECK( fp_equal( y[10], expm1f(2.33f) ) );
925  CHECK( fp_equal( y[11], expm1f(2.44f) ) );
926  CHECK( fp_equal( y[12], expm1f(2.55f) ) );
927  CHECK( fp_equal( y[13], expm1f(2.66f) ) );
928  CHECK( fp_equal( y[14], expm1f(2.77f) ) );
929  CHECK( fp_equal( y[15], expm1f(2.88f) ) );
930  }
931 
932  TEST(TestVlogd)
933  {
934 #ifdef __AVX__
935  double x, y[4];
936  x = DBL_MIN/10.;
937  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
938  x = 0.;
939  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
940  x = -1.;
941  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
942  x = numeric_limits<double>().infinity();
943  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
944  x = -numeric_limits<double>().infinity();
945  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
946  x = numeric_limits<double>().quiet_NaN();
947  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
948 
949  const int sz = 2048;
950  ALIGNED(CD_ALIGN) double arg[sz];
951  ALIGNED(CD_ALIGN) double val[sz];
952  init_genrand( 395317717L );
953  for( int i=0; i < sz; ++i )
954  arg[i] = exp(genrand_real1()*1418.17-708.39);
955  vlog( arg, val, 0, sz );
956  for( int i=0; i < sz; ++i )
957  CHECK( fp_equal( val[i], log(arg[i]) ) );
958 #else
959  CHECK( fp_equal( 1., 1. ) );
960 #endif
961  }
962 
963  TEST(TestVlog10d)
964  {
965 #ifdef __AVX__
966  double x, y[4];
967  x = DBL_MIN/10.;
968  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
969  x = 0.;
970  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
971  x = -1.;
972  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
973  x = numeric_limits<double>().infinity();
974  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
975  x = -numeric_limits<double>().infinity();
976  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
977  x = numeric_limits<double>().quiet_NaN();
978  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
979 
980  const int sz = 2048;
981  ALIGNED(CD_ALIGN) double arg[sz];
982  ALIGNED(CD_ALIGN) double val[sz];
983  init_genrand( 395288717L );
984  for( int i=0; i < sz; ++i )
985  arg[i] = exp(genrand_real1()*1418.17-708.39);
986  vlog10( arg, val, 0, sz );
987  for( int i=0; i < sz; ++i )
988  CHECK( fp_equal( val[i], log10(arg[i]) ) );
989 #else
990  CHECK( fp_equal( 1., 1. ) );
991 #endif
992  }
993 
994  TEST(TestVlog1pd)
995  {
996 #ifdef __AVX__
997  double x, y[4];
998  x = -1.;
999  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1000  x = -2.;
1001  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1002  x = numeric_limits<double>().infinity();
1003  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1004  x = -numeric_limits<double>().infinity();
1005  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1006  x = numeric_limits<double>().quiet_NaN();
1007  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1008 
1009  const int sz = 2048;
1010  ALIGNED(CD_ALIGN) double arg[sz];
1011  ALIGNED(CD_ALIGN) double val[sz];
1012  init_genrand( 337288717L );
1013  // first test arguments close to zero
1014  for( int i=0; i < sz; ++i )
1015  {
1016  arg[i] = exp(genrand_real1()*258.-260.);
1017  if( (i%2) == 1 )
1018  arg[i] *= -1.;
1019  }
1020  vlog1p( arg, val, 0, sz );
1021  for( int i=0; i < sz; ++i )
1022  CHECK( fp_equal( val[i], log1p(arg[i]) ) );
1023  // next test full range
1024  for( int i=0; i < sz; ++i )
1025  arg[i] = exp(genrand_real1()*1418.17-708.39) - 0.9999999999;
1026  vlog1p( arg, val, 0, sz );
1027  for( int i=0; i < sz; ++i )
1028  CHECK( fp_equal( val[i], log1p(arg[i]) ) );
1029 #else
1030  CHECK( fp_equal( 1., 1. ) );
1031 #endif
1032  }
1033 
1034  TEST(TestVlogf)
1035  {
1036 #ifdef __AVX__
1037  sys_float x, y[4];
1038  x = FLT_MIN/10.f;
1039  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1040  x = 0.f;
1041  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1042  x = -1.f;
1043  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1044  x = numeric_limits<sys_float>().infinity();
1045  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1046  x = -numeric_limits<sys_float>().infinity();
1047  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1048  x = numeric_limits<sys_float>().quiet_NaN();
1049  CHECK_THROW( vlog(y,x,x,x,x), domain_error );
1050 
1051  const int sz = 2048;
1052  ALIGNED(CD_ALIGN) sys_float arg[sz];
1053  ALIGNED(CD_ALIGN) sys_float val[sz];
1054  init_genrand( 395955717L );
1055  for( int i=0; i < sz; ++i )
1056  arg[i] = exp(genrand_real1()*176.05-87.33);
1057  vlog( arg, val, 0, sz );
1058  for( int i=0; i < sz; ++i )
1059  CHECK( fp_equal( val[i], logf(arg[i]) ) );
1060 #else
1061  CHECK( fp_equal( 1., 1. ) );
1062 #endif
1063  }
1064 
1065  TEST(TestVlog10f)
1066  {
1067 #ifdef __AVX__
1068  sys_float x, y[4];
1069  x = FLT_MIN/10.f;
1070  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1071  x = 0.f;
1072  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1073  x = -1.f;
1074  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1075  x = numeric_limits<sys_float>().infinity();
1076  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1077  x = -numeric_limits<sys_float>().infinity();
1078  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1079  x = numeric_limits<sys_float>().quiet_NaN();
1080  CHECK_THROW( vlog10(y,x,x,x,x), domain_error );
1081 
1082  const int sz = 2048;
1083  ALIGNED(CD_ALIGN) sys_float arg[sz];
1084  ALIGNED(CD_ALIGN) sys_float val[sz];
1085  init_genrand( 395281237L );
1086  for( int i=0; i < sz; ++i )
1087  arg[i] = exp(genrand_real1()*176.05-87.33);
1088  vlog10( arg, val, 0, sz );
1089  for( int i=0; i < sz; ++i )
1090  CHECK( fp_equal( val[i], log10f(arg[i]) ) );
1091 #else
1092  CHECK( fp_equal( 1., 1. ) );
1093 #endif
1094  }
1095 
1096  TEST(TestVlog1pf)
1097  {
1098 #ifdef __AVX__
1099  sys_float x, y[4];
1100  x = -1.f;
1101  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1102  x = -2.f;
1103  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1104  x = numeric_limits<sys_float>().infinity();
1105  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1106  x = -numeric_limits<sys_float>().infinity();
1107  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1108  x = numeric_limits<sys_float>().quiet_NaN();
1109  CHECK_THROW( vlog1p(y,x,x,x,x), domain_error );
1110 
1111  const int sz = 2048;
1112  ALIGNED(CD_ALIGN) sys_float arg[sz];
1113  ALIGNED(CD_ALIGN) sys_float val[sz];
1114  init_genrand( 337898717L );
1115  // first test arguments close to zero
1116  for( int i=0; i < sz; ++i )
1117  {
1118  arg[i] = exp(genrand_real1()*78.-80.);
1119  if( (i%2) == 1 )
1120  arg[i] *= -1.f;
1121  }
1122  vlog1p( arg, val, 0, sz );
1123  for( int i=0; i < sz; ++i )
1124  CHECK( fp_equal( val[i], log1pf(arg[i]) ) );
1125  // next test full range
1126  for( int i=0; i < sz; ++i )
1127  arg[i] = exp(genrand_real1()*176.05-87.33) - 0.99999;
1128  vlog1p( arg, val, 0, sz );
1129  for( int i=0; i < sz; ++i )
1130  CHECK( fp_equal( val[i], log1pf(arg[i]) ) );
1131 #else
1132  CHECK( fp_equal( 1., 1. ) );
1133 #endif
1134  }
1135 
1136  TEST(TestVlogdx4)
1137  {
1138  double y[4];
1139  vlog(y,11.,22.,33.,44.);
1140  CHECK( fp_equal( y[0], log(11.) ) );
1141  CHECK( fp_equal( y[1], log(22.) ) );
1142  CHECK( fp_equal( y[2], log(33.) ) );
1143  CHECK( fp_equal( y[3], log(44.) ) );
1144  }
1145 
1146  TEST(TestVlog10dx4)
1147  {
1148  double y[4];
1149  vlog10(y,11.,22.,33.,44.);
1150  CHECK( fp_equal( y[0], log10(11.) ) );
1151  CHECK( fp_equal( y[1], log10(22.) ) );
1152  CHECK( fp_equal( y[2], log10(33.) ) );
1153  CHECK( fp_equal( y[3], log10(44.) ) );
1154  }
1155 
1156  TEST(TestVlog1pdx4)
1157  {
1158  double y[4];
1159  vlog1p(y,11.,22.,33.,44.);
1160  CHECK( fp_equal( y[0], log1p(11.) ) );
1161  CHECK( fp_equal( y[1], log1p(22.) ) );
1162  CHECK( fp_equal( y[2], log1p(33.) ) );
1163  CHECK( fp_equal( y[3], log1p(44.) ) );
1164  }
1165 
1166  TEST(TestVlogdx8)
1167  {
1168  double y[8];
1169  vlog(y,11.,22.,33.,44.,55.,66.,77.,88.);
1170  CHECK( fp_equal( y[0], log(11.) ) );
1171  CHECK( fp_equal( y[1], log(22.) ) );
1172  CHECK( fp_equal( y[2], log(33.) ) );
1173  CHECK( fp_equal( y[3], log(44.) ) );
1174  CHECK( fp_equal( y[4], log(55.) ) );
1175  CHECK( fp_equal( y[5], log(66.) ) );
1176  CHECK( fp_equal( y[6], log(77.) ) );
1177  CHECK( fp_equal( y[7], log(88.) ) );
1178  }
1179 
1180  TEST(TestVlog10dx8)
1181  {
1182  double y[8];
1183  vlog10(y,11.,22.,33.,44.,55.,66.,77.,88.);
1184  CHECK( fp_equal( y[0], log10(11.) ) );
1185  CHECK( fp_equal( y[1], log10(22.) ) );
1186  CHECK( fp_equal( y[2], log10(33.) ) );
1187  CHECK( fp_equal( y[3], log10(44.) ) );
1188  CHECK( fp_equal( y[4], log10(55.) ) );
1189  CHECK( fp_equal( y[5], log10(66.) ) );
1190  CHECK( fp_equal( y[6], log10(77.) ) );
1191  CHECK( fp_equal( y[7], log10(88.) ) );
1192  }
1193 
1194  TEST(TestVlog1pdx8)
1195  {
1196  double y[8];
1197  vlog1p(y,11.,22.,33.,44.,55.,66.,77.,88.);
1198  CHECK( fp_equal( y[0], log1p(11.) ) );
1199  CHECK( fp_equal( y[1], log1p(22.) ) );
1200  CHECK( fp_equal( y[2], log1p(33.) ) );
1201  CHECK( fp_equal( y[3], log1p(44.) ) );
1202  CHECK( fp_equal( y[4], log1p(55.) ) );
1203  CHECK( fp_equal( y[5], log1p(66.) ) );
1204  CHECK( fp_equal( y[6], log1p(77.) ) );
1205  CHECK( fp_equal( y[7], log1p(88.) ) );
1206  }
1207 
1208  TEST(TestVlogfx4)
1209  {
1210  sys_float y[4];
1211  vlog(y,11.f,22.f,33.f,44.f);
1212  CHECK( fp_equal( y[0], logf(11.f) ) );
1213  CHECK( fp_equal( y[1], logf(22.f) ) );
1214  CHECK( fp_equal( y[2], logf(33.f) ) );
1215  CHECK( fp_equal( y[3], logf(44.f) ) );
1216  }
1217 
1218  TEST(TestVlog10fx4)
1219  {
1220  sys_float y[4];
1221  vlog10(y,11.f,22.f,33.f,44.f);
1222  CHECK( fp_equal( y[0], log10f(11.f) ) );
1223  CHECK( fp_equal( y[1], log10f(22.f) ) );
1224  CHECK( fp_equal( y[2], log10f(33.f) ) );
1225  CHECK( fp_equal( y[3], log10f(44.f) ) );
1226  }
1227 
1228  TEST(TestVlog1pfx4)
1229  {
1230  sys_float y[4];
1231  vlog1p(y,11.f,22.f,33.f,44.f);
1232  CHECK( fp_equal( y[0], log1pf(11.f) ) );
1233  CHECK( fp_equal( y[1], log1pf(22.f) ) );
1234  CHECK( fp_equal( y[2], log1pf(33.f) ) );
1235  CHECK( fp_equal( y[3], log1pf(44.f) ) );
1236  }
1237 
1238  TEST(TestVlogfx8)
1239  {
1240  sys_float y[8];
1241  vlog(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1242  CHECK( fp_equal( y[0], logf(11.f) ) );
1243  CHECK( fp_equal( y[1], logf(22.f) ) );
1244  CHECK( fp_equal( y[2], logf(33.f) ) );
1245  CHECK( fp_equal( y[3], logf(44.f) ) );
1246  CHECK( fp_equal( y[4], logf(55.f) ) );
1247  CHECK( fp_equal( y[5], logf(66.f) ) );
1248  CHECK( fp_equal( y[6], logf(77.f) ) );
1249  CHECK( fp_equal( y[7], logf(88.f) ) );
1250  }
1251 
1252  TEST(TestVlog10fx8)
1253  {
1254  sys_float y[8];
1255  vlog10(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1256  CHECK( fp_equal( y[0], log10f(11.f) ) );
1257  CHECK( fp_equal( y[1], log10f(22.f) ) );
1258  CHECK( fp_equal( y[2], log10f(33.f) ) );
1259  CHECK( fp_equal( y[3], log10f(44.f) ) );
1260  CHECK( fp_equal( y[4], log10f(55.f) ) );
1261  CHECK( fp_equal( y[5], log10f(66.f) ) );
1262  CHECK( fp_equal( y[6], log10f(77.f) ) );
1263  CHECK( fp_equal( y[7], log10f(88.f) ) );
1264  }
1265 
1266  TEST(TestVlog1pfx8)
1267  {
1268  sys_float y[8];
1269  vlog1p(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1270  CHECK( fp_equal( y[0], log1pf(11.f) ) );
1271  CHECK( fp_equal( y[1], log1pf(22.f) ) );
1272  CHECK( fp_equal( y[2], log1pf(33.f) ) );
1273  CHECK( fp_equal( y[3], log1pf(44.f) ) );
1274  CHECK( fp_equal( y[4], log1pf(55.f) ) );
1275  CHECK( fp_equal( y[5], log1pf(66.f) ) );
1276  CHECK( fp_equal( y[6], log1pf(77.f) ) );
1277  CHECK( fp_equal( y[7], log1pf(88.f) ) );
1278  }
1279 
1280  TEST(TestVlogfx16)
1281  {
1282  sys_float y[16];
1283  vlog(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1284  CHECK( fp_equal( y[0], logf(11.f) ) );
1285  CHECK( fp_equal( y[1], logf(22.f) ) );
1286  CHECK( fp_equal( y[2], logf(33.f) ) );
1287  CHECK( fp_equal( y[3], logf(44.f) ) );
1288  CHECK( fp_equal( y[4], logf(55.f) ) );
1289  CHECK( fp_equal( y[5], logf(66.f) ) );
1290  CHECK( fp_equal( y[6], logf(77.f) ) );
1291  CHECK( fp_equal( y[7], logf(88.f) ) );
1292  CHECK( fp_equal( y[8], logf(111.f) ) );
1293  CHECK( fp_equal( y[9], logf(122.f) ) );
1294  CHECK( fp_equal( y[10], logf(133.f) ) );
1295  CHECK( fp_equal( y[11], logf(144.f) ) );
1296  CHECK( fp_equal( y[12], logf(155.f) ) );
1297  CHECK( fp_equal( y[13], logf(166.f) ) );
1298  CHECK( fp_equal( y[14], logf(177.f) ) );
1299  CHECK( fp_equal( y[15], logf(188.f) ) );
1300  }
1301 
1302  TEST(TestVlog10fx16)
1303  {
1304  sys_float y[16];
1305  vlog10(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1306  CHECK( fp_equal( y[0], log10f(11.f) ) );
1307  CHECK( fp_equal( y[1], log10f(22.f) ) );
1308  CHECK( fp_equal( y[2], log10f(33.f) ) );
1309  CHECK( fp_equal( y[3], log10f(44.f) ) );
1310  CHECK( fp_equal( y[4], log10f(55.f) ) );
1311  CHECK( fp_equal( y[5], log10f(66.f) ) );
1312  CHECK( fp_equal( y[6], log10f(77.f) ) );
1313  CHECK( fp_equal( y[7], log10f(88.f) ) );
1314  CHECK( fp_equal( y[8], log10f(111.f) ) );
1315  CHECK( fp_equal( y[9], log10f(122.f) ) );
1316  CHECK( fp_equal( y[10], log10f(133.f) ) );
1317  CHECK( fp_equal( y[11], log10f(144.f) ) );
1318  CHECK( fp_equal( y[12], log10f(155.f) ) );
1319  CHECK( fp_equal( y[13], log10f(166.f) ) );
1320  CHECK( fp_equal( y[14], log10f(177.f) ) );
1321  CHECK( fp_equal( y[15], log10f(188.f) ) );
1322  }
1323 
1324  TEST(TestVlog1pfx16)
1325  {
1326  sys_float y[16];
1327  vlog1p(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1328  CHECK( fp_equal( y[0], log1pf(11.f) ) );
1329  CHECK( fp_equal( y[1], log1pf(22.f) ) );
1330  CHECK( fp_equal( y[2], log1pf(33.f) ) );
1331  CHECK( fp_equal( y[3], log1pf(44.f) ) );
1332  CHECK( fp_equal( y[4], log1pf(55.f) ) );
1333  CHECK( fp_equal( y[5], log1pf(66.f) ) );
1334  CHECK( fp_equal( y[6], log1pf(77.f) ) );
1335  CHECK( fp_equal( y[7], log1pf(88.f) ) );
1336  CHECK( fp_equal( y[8], log1pf(111.f) ) );
1337  CHECK( fp_equal( y[9], log1pf(122.f) ) );
1338  CHECK( fp_equal( y[10], log1pf(133.f) ) );
1339  CHECK( fp_equal( y[11], log1pf(144.f) ) );
1340  CHECK( fp_equal( y[12], log1pf(155.f) ) );
1341  CHECK( fp_equal( y[13], log1pf(166.f) ) );
1342  CHECK( fp_equal( y[14], log1pf(177.f) ) );
1343  CHECK( fp_equal( y[15], log1pf(188.f) ) );
1344  }
1345 
1346  TEST(TestVsqrtd)
1347  {
1348 #ifdef __AVX__
1349  double x, y[4];
1350  x = -1.;
1351  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1352  x = -numeric_limits<double>().infinity();
1353  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1354  x = numeric_limits<double>().infinity();
1355  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1356  x = numeric_limits<double>().quiet_NaN();
1357  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1358  x = 0.;
1359  vsqrt(y,x,x,x,x);
1360  CHECK( y[0] == 0. );
1361 
1362  const int sz = 2048;
1363  ALIGNED(CD_ALIGN) double arg[sz];
1364  ALIGNED(CD_ALIGN) double val[sz];
1365  init_genrand( 335098717L );
1366  for( int i=0; i < sz; ++i )
1367  arg[i] = exp(genrand_real1()*1418.17-708.39);
1368  vsqrt( arg, val, 0, sz );
1369  for( int i=0; i < sz; ++i )
1370  CHECK( fp_equal( val[i], sqrt(arg[i]) ) );
1371 #else
1372  CHECK( fp_equal( 1., 1. ) );
1373 #endif
1374  }
1375 
1376  TEST(TestVhypotd)
1377  {
1378  const int sz = 2048;
1379  ALIGNED(CD_ALIGN) double arg1[sz];
1380  ALIGNED(CD_ALIGN) double arg2[sz];
1381  ALIGNED(CD_ALIGN) double val[sz];
1382 #ifdef __AVX__
1383  double y[4], x = -numeric_limits<double>().infinity();
1384  CHECK_THROW( vhypot(y,x,1.,x,1.,x,1.,x,1.), domain_error );
1385  CHECK_THROW( vhypot(y,1.,x,1.,x,1.,x,1.,x), domain_error );
1386  x = numeric_limits<double>().infinity();
1387  CHECK_THROW( vhypot(y,x,1.,x,1.,x,1.,x,1.), domain_error );
1388  CHECK_THROW( vhypot(y,1.,x,1.,x,1.,x,1.,x), domain_error );
1389  x = numeric_limits<double>().quiet_NaN();
1390  CHECK_THROW( vhypot(y,x,1.,x,1.,x,1.,x,1.), domain_error );
1391  CHECK_THROW( vhypot(y,1.,x,1.,x,1.,x,1.,x), domain_error );
1392  vhypot(y,0.,0.,1.,0.,0.,1.,DBL_MAX/2.,DBL_MAX/2.);
1393  CHECK( fp_equal( y[0], 0. ) );
1394  CHECK( fp_equal( y[1], 1. ) );
1395  CHECK( fp_equal( y[2], 1. ) );
1396  CHECK( fp_equal( y[3], DBL_MAX/sqrt(2.) ) );
1397  // now check results over the entire range
1398  init_genrand( 395933017L );
1399  for( int i=0; i < sz; ++i )
1400  {
1401  arg1[i] = exp(genrand_real1()*1417.82-708.39);
1402  if( (i%4) < 2 )
1403  arg1[i] = -arg1[i];
1404  arg2[i] = exp(genrand_real1()*1417.82-708.39);
1405  if( (i%2) == 1 )
1406  arg2[i] = -arg2[i];
1407  }
1408  vhypot( arg1, arg2, val, 0, sz );
1409  for( int i=0; i < sz; ++i )
1410  {
1411  CHECK( fp_equal( val[i], hypot(arg1[i], arg2[i]) ) );
1412  }
1413 #endif
1414  for( int i=0; i < 32; ++i )
1415  {
1416  arg1[i] = double(i);
1417  arg2[i] = double(i);
1418  }
1419  // finally check that non-aligned arrays are treated correctly
1420  for( int i=0; i < 32; ++i )
1421  {
1422  for( int j=i+1; j <= 32; ++j )
1423  {
1424  invalidate_array( val, 32*sizeof(double) );
1425  vhypot( arg1, arg2, val, i, j );
1426  for( int k=0; k < 32; ++k )
1427  {
1428  if( k < i || k >= j )
1429  CHECK( isnan(val[k]) );
1430  else
1431  CHECK( fp_equal( val[k], double(k)*sqrt(2.) ) );
1432  }
1433  invalidate_array( val, 32*sizeof(double) );
1434  vhypot( &arg1[i], &arg2[i], val, 0, j-i );
1435  for( int k=0; k < 32; ++k )
1436  {
1437  if( k >= j-i )
1438  CHECK( isnan(val[k]) );
1439  else
1440  CHECK( fp_equal( val[k], double(k+i)*sqrt(2.) ) );
1441  }
1442  invalidate_array( val, 32*sizeof(double) );
1443  vhypot( &arg1[i], arg2, &val[i], 0, j-i );
1444  for( int k=0; k < 32; ++k )
1445  {
1446  if( k < i || k >= j )
1447  CHECK( isnan(val[k]) );
1448  else
1449  CHECK( fp_equal( val[k], sqrt(double((k-i)*(k-i)+k*k)) ) );
1450  }
1451  invalidate_array( val, 32*sizeof(double) );
1452  vhypot( arg1, &arg2[i], &val[i], 0, j-i );
1453  for( int k=0; k < 32; ++k )
1454  {
1455  if( k < i || k >= j )
1456  CHECK( isnan(val[k]) );
1457  else
1458  CHECK( fp_equal( val[k], sqrt(double((k-i)*(k-i)+k*k)) ) );
1459  }
1460  invalidate_array( val, 32*sizeof(double) );
1461  vhypot( &arg1[i], &arg2[i/2], val, 0, j-i );
1462  for( int k=0; k < 32; ++k )
1463  {
1464  if( k >= j-i )
1465  CHECK( isnan(val[k]) );
1466  else
1467  {
1468  double ref2 = double((k+i)*(k+i) + (k+i/2)*(k+i/2));
1469  CHECK( fp_equal( val[k], sqrt(ref2) ) );
1470  }
1471  }
1472  }
1473  }
1474  }
1475 
1476  TEST(TestVsqrtf)
1477  {
1478 #ifdef __AVX__
1479  sys_float x, y[4];
1480  x = -1.f;
1481  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1482  x = -numeric_limits<sys_float>().infinity();
1483  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1484  x = numeric_limits<sys_float>().infinity();
1485  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1486  x = numeric_limits<sys_float>().quiet_NaN();
1487  CHECK_THROW( vsqrt(y,x,x,x,x), domain_error );
1488  x = 0.f;
1489  vsqrt(y,x,x,x,x);
1490  CHECK( y[0] == 0.f );
1491 
1492  const int sz = 2048;
1493  ALIGNED(CD_ALIGN) sys_float arg[sz];
1494  ALIGNED(CD_ALIGN) sys_float val[sz];
1495  init_genrand( 335036617L );
1496  for( int i=0; i < sz; ++i )
1497  arg[i] = exp(genrand_real1()*176.05-87.33);
1498  vsqrt( arg, val, 0, sz );
1499  for( int i=0; i < sz; ++i )
1500  CHECK( fp_equal( val[i], sqrtf(arg[i]) ) );
1501 #else
1502  CHECK( fp_equal( 1., 1. ) );
1503 #endif
1504  }
1505 
1506  TEST(TestVhypotf)
1507  {
1508  const int sz = 2048;
1509  ALIGNED(CD_ALIGN) sys_float arg1[sz];
1510  ALIGNED(CD_ALIGN) sys_float arg2[sz];
1511  ALIGNED(CD_ALIGN) sys_float val[sz];
1512 #ifdef __AVX__
1513  sys_float y[4], x = -numeric_limits<sys_float>().infinity();
1514  CHECK_THROW( vhypot(y,x,1.f,x,1.f,x,1.f,x,1.f), domain_error );
1515  CHECK_THROW( vhypot(y,1.f,x,1.f,x,1.f,x,1.f,x), domain_error );
1516  x = numeric_limits<sys_float>().infinity();
1517  CHECK_THROW( vhypot(y,x,1.f,x,1.f,x,1.f,x,1.f), domain_error );
1518  CHECK_THROW( vhypot(y,1.f,x,1.f,x,1.f,x,1.f,x), domain_error );
1519  x = numeric_limits<sys_float>().quiet_NaN();
1520  CHECK_THROW( vhypot(y,x,1.f,x,1.f,x,1.f,x,1.f), domain_error );
1521  CHECK_THROW( vhypot(y,1.f,x,1.f,x,1.f,x,1.f,x), domain_error );
1522  vhypot(y,0.f,0.f,1.f,0.f,0.f,1.f,FLT_MAX/2.f,FLT_MAX/2.f);
1523  CHECK( fp_equal( y[0], 0.f ) );
1524  CHECK( fp_equal( y[1], 1.f ) );
1525  CHECK( fp_equal( y[2], 1.f ) );
1526  CHECK( fp_equal( y[3], FLT_MAX/sqrtf(2.f) ) );
1527  // now check results over the entire range
1528  init_genrand( 395912717L );
1529  for( int i=0; i < sz; ++i )
1530  {
1531  arg1[i] = exp(genrand_real1()*175.70-87.33);
1532  if( (i%4) < 2 )
1533  arg1[i] = -arg1[i];
1534  arg2[i] = exp(genrand_real1()*175.70-87.33);
1535  if( (i%2) == 1 )
1536  arg2[i] = -arg2[i];
1537  }
1538  vhypot( arg1, arg2, val, 0, sz );
1539  for( int i=0; i < sz; ++i )
1540  {
1541  CHECK( fp_equal( val[i], hypotf(arg1[i], arg2[i]) ) );
1542  }
1543 #endif
1544  for( int i=0; i < 32; ++i )
1545  {
1546  arg1[i] = sys_float(i);
1547  arg2[i] = sys_float(i);
1548  }
1549  // finally check that non-aligned arrays are treated correctly
1550  for( int i=0; i < 32; ++i )
1551  {
1552  for( int j=i+1; j <= 32; ++j )
1553  {
1554  invalidate_array( val, 32*sizeof(sys_float) );
1555  vhypot( arg1, arg2, val, i, j );
1556  for( int k=0; k < 32; ++k )
1557  {
1558  if( k < i || k >= j )
1559  CHECK( isnanf(val[k]) );
1560  else
1561  CHECK( fp_equal( val[k], sys_float(k)*sqrtf(2.f) ) );
1562  }
1563  invalidate_array( val, 32*sizeof(sys_float) );
1564  vhypot( &arg1[i], &arg2[i], val, 0, j-i );
1565  for( int k=0; k < 32; ++k )
1566  {
1567  if( k >= j-i )
1568  CHECK( isnanf(val[k]) );
1569  else
1570  CHECK( fp_equal( val[k], sys_float(k+i)*sqrtf(2.f) ) );
1571  }
1572  invalidate_array( val, 32*sizeof(sys_float) );
1573  vhypot( &arg1[i], arg2, &val[i], 0, j-i );
1574  for( int k=0; k < 32; ++k )
1575  {
1576  if( k < i || k >= j )
1577  CHECK( isnanf(val[k]) );
1578  else
1579  CHECK( fp_equal( val[k], sqrtf(sys_float((k-i)*(k-i)+k*k)) ) );
1580  }
1581  invalidate_array( val, 32*sizeof(sys_float) );
1582  vhypot( arg1, &arg2[i], &val[i], 0, j-i );
1583  for( int k=0; k < 32; ++k )
1584  {
1585  if( k < i || k >= j )
1586  CHECK( isnanf(val[k]) );
1587  else
1588  CHECK( fp_equal( val[k], sqrtf(sys_float((k-i)*(k-i)+k*k)) ) );
1589  }
1590  invalidate_array( val, 32*sizeof(sys_float) );
1591  vhypot( &arg1[i], &arg2[i/2], val, 0, j-i );
1592  for( int k=0; k < 32; ++k )
1593  {
1594  if( k >= j-i )
1595  CHECK( isnanf(val[k]) );
1596  else
1597  {
1598  sys_float ref2 = sys_float((k+i)*(k+i) + (k+i/2)*(k+i/2));
1599  CHECK( fp_equal( val[k], sqrtf(ref2) ) );
1600  }
1601  }
1602  }
1603  }
1604  }
1605 
1606  TEST(TestVsqrtdx4)
1607  {
1608  double y[4];
1609  vsqrt(y,11.,22.,33.,44.);
1610  CHECK( fp_equal( y[0], sqrt(11.) ) );
1611  CHECK( fp_equal( y[1], sqrt(22.) ) );
1612  CHECK( fp_equal( y[2], sqrt(33.) ) );
1613  CHECK( fp_equal( y[3], sqrt(44.) ) );
1614  }
1615 
1616  TEST(TestVhypotdx4)
1617  {
1618  double y[4];
1619  vhypot(y,11.,22.,33.,44.,55.,66.,77.,88.);
1620  CHECK( fp_equal( y[0], hypot(11.,22.) ) );
1621  CHECK( fp_equal( y[1], hypot(33.,44.) ) );
1622  CHECK( fp_equal( y[2], hypot(55.,66.) ) );
1623  CHECK( fp_equal( y[3], hypot(77.,88.) ) );
1624  }
1625 
1626  TEST(TestVsqrtdx8)
1627  {
1628  double y[8];
1629  vsqrt(y,11.,22.,33.,44.,55.,66.,77.,88.);
1630  CHECK( fp_equal( y[0], sqrt(11.) ) );
1631  CHECK( fp_equal( y[1], sqrt(22.) ) );
1632  CHECK( fp_equal( y[2], sqrt(33.) ) );
1633  CHECK( fp_equal( y[3], sqrt(44.) ) );
1634  CHECK( fp_equal( y[4], sqrt(55.) ) );
1635  CHECK( fp_equal( y[5], sqrt(66.) ) );
1636  CHECK( fp_equal( y[6], sqrt(77.) ) );
1637  CHECK( fp_equal( y[7], sqrt(88.) ) );
1638  }
1639 
1640  TEST(TestVsqrtfx4)
1641  {
1642  sys_float y[4];
1643  vsqrt(y,11.f,22.f,33.f,44.f);
1644  CHECK( fp_equal( y[0], sqrtf(11.f) ) );
1645  CHECK( fp_equal( y[1], sqrtf(22.f) ) );
1646  CHECK( fp_equal( y[2], sqrtf(33.f) ) );
1647  CHECK( fp_equal( y[3], sqrtf(44.f) ) );
1648  }
1649 
1650  TEST(TestVhypotfx4)
1651  {
1652  sys_float y[4];
1653  vhypot(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1654  CHECK( fp_equal( y[0], hypotf(11.f,22.f) ) );
1655  CHECK( fp_equal( y[1], hypotf(33.f,44.f) ) );
1656  CHECK( fp_equal( y[2], hypotf(55.f,66.f) ) );
1657  CHECK( fp_equal( y[3], hypotf(77.f,88.f) ) );
1658  }
1659 
1660  TEST(TestVsqrtfx8)
1661  {
1662  sys_float y[8];
1663  vsqrt(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1664  CHECK( fp_equal( y[0], sqrtf(11.f) ) );
1665  CHECK( fp_equal( y[1], sqrtf(22.f) ) );
1666  CHECK( fp_equal( y[2], sqrtf(33.f) ) );
1667  CHECK( fp_equal( y[3], sqrtf(44.f) ) );
1668  CHECK( fp_equal( y[4], sqrtf(55.f) ) );
1669  CHECK( fp_equal( y[5], sqrtf(66.f) ) );
1670  CHECK( fp_equal( y[6], sqrtf(77.f) ) );
1671  CHECK( fp_equal( y[7], sqrtf(88.f) ) );
1672  }
1673 
1674  TEST(TestVhypotfx8)
1675  {
1676  sys_float y[8];
1677  vhypot(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1678  CHECK( fp_equal( y[0], hypotf(11.f,22.f) ) );
1679  CHECK( fp_equal( y[1], hypotf(33.f,44.f) ) );
1680  CHECK( fp_equal( y[2], hypotf(55.f,66.f) ) );
1681  CHECK( fp_equal( y[3], hypotf(77.f,88.f) ) );
1682  CHECK( fp_equal( y[4], hypotf(111.f,122.f) ) );
1683  CHECK( fp_equal( y[5], hypotf(133.f,144.f) ) );
1684  CHECK( fp_equal( y[6], hypotf(155.f,166.f) ) );
1685  CHECK( fp_equal( y[7], hypotf(177.f,188.f) ) );
1686  }
1687 
1688  TEST(TestVsqrtfx16)
1689  {
1690  sys_float y[16];
1691  vsqrt(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1692  CHECK( fp_equal( y[0], sqrtf(11.f) ) );
1693  CHECK( fp_equal( y[1], sqrtf(22.f) ) );
1694  CHECK( fp_equal( y[2], sqrtf(33.f) ) );
1695  CHECK( fp_equal( y[3], sqrtf(44.f) ) );
1696  CHECK( fp_equal( y[4], sqrtf(55.f) ) );
1697  CHECK( fp_equal( y[5], sqrtf(66.f) ) );
1698  CHECK( fp_equal( y[6], sqrtf(77.f) ) );
1699  CHECK( fp_equal( y[7], sqrtf(88.f) ) );
1700  CHECK( fp_equal( y[8], sqrtf(111.f) ) );
1701  CHECK( fp_equal( y[9], sqrtf(122.f) ) );
1702  CHECK( fp_equal( y[10], sqrtf(133.f) ) );
1703  CHECK( fp_equal( y[11], sqrtf(144.f) ) );
1704  CHECK( fp_equal( y[12], sqrtf(155.f) ) );
1705  CHECK( fp_equal( y[13], sqrtf(166.f) ) );
1706  CHECK( fp_equal( y[14], sqrtf(177.f) ) );
1707  CHECK( fp_equal( y[15], sqrtf(188.f) ) );
1708  }
1709 
1710  TEST(TestVasinhd)
1711  {
1712 #ifdef __AVX__
1713  double x, y[4];
1714  x = -numeric_limits<double>().infinity();
1715  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1716  x = numeric_limits<double>().infinity();
1717  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1718  x = -numeric_limits<double>().quiet_NaN();
1719  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1720  x = 0.;
1721  vasinh(y,x,x,x,x);
1722  CHECK( y[0] == 0. );
1723 
1724  const int sz = 2048;
1725  ALIGNED(CD_ALIGN) double arg[sz];
1726  ALIGNED(CD_ALIGN) double val[sz];
1727  init_genrand( 335366717L );
1728  for( int i=0; i < sz; ++i )
1729  {
1730  arg[i] = exp(genrand_real1()*1418.17-708.39);
1731  if( (i%2) == 1 )
1732  arg[i] = -arg[i];
1733  }
1734  vasinh( arg, val, 0, sz );
1735  for( int i=0; i < sz; ++i )
1736  CHECK( fp_equal( val[i], asinh(arg[i]) ) );
1737 #else
1738  CHECK( fp_equal( 1., 1. ) );
1739 #endif
1740  }
1741 
1742  TEST(TestVasinhdFast)
1743  {
1744 #ifdef __AVX__
1745  double x, y[4];
1746  x = -numeric_limits<double>().infinity();
1747  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1748  x = numeric_limits<double>().infinity();
1749  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1750  x = numeric_limits<double>().quiet_NaN();
1751  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1752  x = -1.;
1753  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1754  double z = sqrt(DBL_MAX);
1755  x = nextafter(z,DBL_MAX);
1756  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1757  x = nextafter(z,-DBL_MAX);
1758  vfast_asinh(y,x,x,x,x);
1759  CHECK( y[0] > 0. && y[0] < DBL_MAX );
1760  x = 0.;
1761  vfast_asinh(y,x,x,x,x);
1762  CHECK( y[0] == 0. );
1763 
1764  const int sz = 2048;
1765  ALIGNED(CD_ALIGN) double arg[sz];
1766  ALIGNED(CD_ALIGN) double val[sz];
1767  init_genrand( 335777717L );
1768  for( int i=0; i < sz; ++i )
1769  arg[i] = exp(genrand_real1()*1063.28-708.39);
1770  vfast_asinh( arg, val, 0, sz );
1771  for( int i=0; i < sz; ++i )
1772  CHECK( fp_equal( val[i], asinh(arg[i]) ) );
1773 #else
1774  CHECK( fp_equal( 1., 1. ) );
1775 #endif
1776  }
1777 
1778  TEST(TestVasinhf)
1779  {
1780 #ifdef __AVX__
1781  sys_float x, y[4];
1782  x = -numeric_limits<sys_float>().infinity();
1783  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1784  x = numeric_limits<sys_float>().infinity();
1785  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1786  x = numeric_limits<sys_float>().quiet_NaN();
1787  CHECK_THROW( vasinh(y,x,x,x,x), domain_error );
1788  x = 0.f;
1789  vasinh(y,x,x,x,x);
1790  CHECK( y[0] == 0.f );
1791 
1792  const int sz = 2048;
1793  ALIGNED(CD_ALIGN) sys_float arg[sz];
1794  ALIGNED(CD_ALIGN) sys_float val[sz];
1795  init_genrand( 335391417L );
1796  for( int i=0; i < sz; ++i )
1797  {
1798  arg[i] = exp(genrand_real1()*176.05-87.33);
1799  if( (i%2) == 1 )
1800  arg[i] = -arg[i];
1801  }
1802  vasinh( arg, val, 0, sz );
1803  for( int i=0; i < sz; ++i )
1804  CHECK( fp_equal( val[i], asinhf(arg[i]) ) );
1805 #else
1806  CHECK( fp_equal( 1., 1. ) );
1807 #endif
1808  }
1809 
1810  TEST(TestVasinhfFast)
1811  {
1812 #ifdef __AVX__
1813  sys_float x, y[4];
1814  x = -numeric_limits<sys_float>().infinity();
1815  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1816  x = numeric_limits<sys_float>().infinity();
1817  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1818  x = numeric_limits<sys_float>().quiet_NaN();
1819  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1820  x = -1.f;
1821  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1822  sys_float z = sqrt(FLT_MAX);
1823  x = nextafterf(z,FLT_MAX);
1824  CHECK_THROW( vfast_asinh(y,x,x,x,x), domain_error );
1825  x = nextafterf(z,-FLT_MAX);
1826  vfast_asinh(y,x,x,x,x);
1827  CHECK( y[0] > 0.f && y[0] < FLT_MAX );
1828  x = 0.f;
1829  vfast_asinh(y,x,x,x,x);
1830  CHECK( y[0] == 0.f );
1831 
1832  const int sz = 2048;
1833  ALIGNED(CD_ALIGN) sys_float arg[sz];
1834  ALIGNED(CD_ALIGN) sys_float val[sz];
1835  init_genrand( 330286717L );
1836  for( int i=0; i < sz; ++i )
1837  arg[i] = exp(genrand_real1()*131.69-87.33);
1838  vfast_asinh( arg, val, 0, sz );
1839  for( int i=0; i < sz; ++i )
1840  CHECK( fp_equal( val[i], asinhf(arg[i]) ) );
1841 #else
1842  CHECK( fp_equal( 1., 1. ) );
1843 #endif
1844  }
1845 
1846  TEST(TestVasinhdx4)
1847  {
1848  double y[4];
1849  vasinh(y,11.,22.,33.,44.);
1850  CHECK( fp_equal( y[0], asinh(11.) ) );
1851  CHECK( fp_equal( y[1], asinh(22.) ) );
1852  CHECK( fp_equal( y[2], asinh(33.) ) );
1853  CHECK( fp_equal( y[3], asinh(44.) ) );
1854  }
1855 
1856  TEST(TestVasinhdFastx4)
1857  {
1858  double y[4];
1859  vfast_asinh(y,11.,22.,33.,44.);
1860  CHECK( fp_equal( y[0], asinh(11.) ) );
1861  CHECK( fp_equal( y[1], asinh(22.) ) );
1862  CHECK( fp_equal( y[2], asinh(33.) ) );
1863  CHECK( fp_equal( y[3], asinh(44.) ) );
1864  }
1865 
1866  TEST(TestVasinhdx8)
1867  {
1868  double y[8];
1869  vasinh(y,11.,22.,33.,44.,55.,66.,77.,88.);
1870  CHECK( fp_equal( y[0], asinh(11.) ) );
1871  CHECK( fp_equal( y[1], asinh(22.) ) );
1872  CHECK( fp_equal( y[2], asinh(33.) ) );
1873  CHECK( fp_equal( y[3], asinh(44.) ) );
1874  CHECK( fp_equal( y[4], asinh(55.) ) );
1875  CHECK( fp_equal( y[5], asinh(66.) ) );
1876  CHECK( fp_equal( y[6], asinh(77.) ) );
1877  CHECK( fp_equal( y[7], asinh(88.) ) );
1878  }
1879 
1880  TEST(TestVasinhdFastx8)
1881  {
1882  double y[8];
1883  vfast_asinh(y,11.,22.,33.,44.,55.,66.,77.,88.);
1884  CHECK( fp_equal( y[0], asinh(11.) ) );
1885  CHECK( fp_equal( y[1], asinh(22.) ) );
1886  CHECK( fp_equal( y[2], asinh(33.) ) );
1887  CHECK( fp_equal( y[3], asinh(44.) ) );
1888  CHECK( fp_equal( y[4], asinh(55.) ) );
1889  CHECK( fp_equal( y[5], asinh(66.) ) );
1890  CHECK( fp_equal( y[6], asinh(77.) ) );
1891  CHECK( fp_equal( y[7], asinh(88.) ) );
1892  }
1893 
1894  TEST(TestVasinhfx4)
1895  {
1896  sys_float y[4];
1897  vasinh(y,11.f,22.f,33.f,44.f);
1898  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1899  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1900  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1901  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1902  }
1903 
1904  TEST(TestVasinhfFastx4)
1905  {
1906  sys_float y[4];
1907  vfast_asinh(y,11.f,22.f,33.f,44.f);
1908  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1909  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1910  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1911  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1912  }
1913 
1914  TEST(TestVasinhfx8)
1915  {
1916  sys_float y[8];
1917  vasinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1918  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1919  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1920  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1921  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1922  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1923  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1924  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1925  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1926  }
1927 
1928  TEST(TestVasinhfFastx8)
1929  {
1930  sys_float y[8];
1931  vfast_asinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f);
1932  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1933  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1934  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1935  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1936  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1937  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1938  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1939  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1940  }
1941 
1942  TEST(TestVasinhfx16)
1943  {
1944  sys_float y[16];
1945  vasinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1946  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1947  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1948  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1949  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1950  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1951  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1952  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1953  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1954  CHECK( fp_equal( y[8], asinhf(111.f) ) );
1955  CHECK( fp_equal( y[9], asinhf(122.f) ) );
1956  CHECK( fp_equal( y[10], asinhf(133.f) ) );
1957  CHECK( fp_equal( y[11], asinhf(144.f) ) );
1958  CHECK( fp_equal( y[12], asinhf(155.f) ) );
1959  CHECK( fp_equal( y[13], asinhf(166.f) ) );
1960  CHECK( fp_equal( y[14], asinhf(177.f) ) );
1961  CHECK( fp_equal( y[15], asinhf(188.f) ) );
1962  }
1963 
1964  TEST(TestVasinhfFastx16)
1965  {
1966  sys_float y[16];
1967  vfast_asinh(y,11.f,22.f,33.f,44.f,55.f,66.f,77.f,88.f,111.f,122.f,133.f,144.f,155.f,166.f,177.f,188.f);
1968  CHECK( fp_equal( y[0], asinhf(11.f) ) );
1969  CHECK( fp_equal( y[1], asinhf(22.f) ) );
1970  CHECK( fp_equal( y[2], asinhf(33.f) ) );
1971  CHECK( fp_equal( y[3], asinhf(44.f) ) );
1972  CHECK( fp_equal( y[4], asinhf(55.f) ) );
1973  CHECK( fp_equal( y[5], asinhf(66.f) ) );
1974  CHECK( fp_equal( y[6], asinhf(77.f) ) );
1975  CHECK( fp_equal( y[7], asinhf(88.f) ) );
1976  CHECK( fp_equal( y[8], asinhf(111.f) ) );
1977  CHECK( fp_equal( y[9], asinhf(122.f) ) );
1978  CHECK( fp_equal( y[10], asinhf(133.f) ) );
1979  CHECK( fp_equal( y[11], asinhf(144.f) ) );
1980  CHECK( fp_equal( y[12], asinhf(155.f) ) );
1981  CHECK( fp_equal( y[13], asinhf(166.f) ) );
1982  CHECK( fp_equal( y[14], asinhf(177.f) ) );
1983  CHECK( fp_equal( y[15], asinhf(188.f) ) );
1984  }
1985 }
double exp10(double x)
Definition: cddefines.h:1383
void avx_free(void *p_ptr_alloc)
Definition: vectorize.h:105
T * get_ptr(T *v)
Definition: cddefines.h:1113
double genrand_real1()
void vlog10(const double x[], double y[], long nlo, long nhi)
void vfast_asinh(const double x[], double y[], long nlo, long nhi)
void invalidate_array(T *p, size_t size)
Definition: cddefines.h:1095
ALIGNED(CD_ALIGN) static const double qg32_w[numPoints]
void vlog1p(const double x[], double y[], long nlo, long nhi)
void vhypot(const double x1[], const double x2[], double y[], long nlo, long nhi)
unsigned long genrand_int32()
void vlog(const double x[], double y[], long nlo, long nhi)
double reduce_a(const double *a, long ilo, long ihi)
void * avx_alloc(size_t sz)
Definition: vectorize.h:86
void vexp(const double x[], double y[], long nlo, long nhi)
bool fp_equal(sys_float x, sys_float y, int n=3)
Definition: cddefines.h:858
#define MALLOC_AVX(exp)
Definition: cddefines.h:559
float sys_float
Definition: cddefines.h:127
static double a1[83]
void vexp10(const double x[], double y[], long nlo, long nhi)
double reduce_abc_ab(const double *a, const double *b, const double *c, long ilo, long ihi, double *sum_ab)
void vasinh(const double x[], double y[], long nlo, long nhi)
double reduce_ab(const double *a, const double *b, long ilo, long ihi)
double reduce_ab_a(const double *a, const double *b, long ilo, long ihi, double *sum_a)
void vsqrt(const double x[], double y[], long nlo, long nhi)
T pow2(T a)
Definition: cddefines.h:985
#define isnan
Definition: cddefines.h:663
double reduce_abc(const double *a, const double *b, const double *c, long ilo, long ihi)
double pow(double x, int i)
Definition: cddefines.h:782
void init_genrand(unsigned long s)
#define CD_ALIGN
Definition: cpu.h:156
void vexpm1(const double x[], double y[], long nlo, long nhi)
static double a2[63]