cloudy  trunk
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vectorize_hyper_core.h
Go to the documentation of this file.
1 /* This file is part of Cloudy and is copyright (C)1978-2017 by Gary J. Ferland and
2  * others. For conditions of distribution and use see copyright notice in license.txt */
3 
4 #ifndef VECTORIZE_HYPER_CORE_H
5 #define VECTORIZE_HYPER_CORE_H
6 
7 #include "vectorize_math.h"
8 #include "vectorize_sqrt_core.h"
9 #include "vectorize_log_core.h"
10 
11 //
12 // Written by Peter A.M. van Hoof, Royal Observatory of Belgium, Brussels
13 //
14 // this file contains vectorized versions of the single and double variants of the asinh()
15 // function. They are vectorized using AVX instructions, but also make use of AVX2, FMA,
16 // and AVX512 instructions when available. The basic algorithms for calculating the asinh()
17 // functions were somewhat simplified from the openlibm library versions available at
18 // http://openlibm.org/ which is subject to the following copyright:
19 //
20 // ====================================================
21 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
22 //
23 // Developed at SunSoft, a Sun Microsystems, Inc. business.
24 // Permission to use, copy, modify, and distribute this
25 // software is freely granted, provided that this notice
26 // is preserved.
27 // ====================================================
28 //
29 
30 #ifdef __AVX__
31 
32 VECLL_CONST(asinh_mask1,0x7fffffffffffffff);
33 VECLL_CONST(asinh_mask2,0x8000000000000000);
34 
35 VECDI_CONST(asinh_2p28,0x41b0000000000000); // 2^28
36 
37 #ifdef __AVX512F__
38 
39 inline v8df v1asinhd_core(v8df x)
40 {
41  v8df x2 = _mm512_mul_pd(x, x);
42  v8df arg = _mm512_add_pd(x2, one);
43  arg = v1sqrtd_core(arg);
44  arg = _mm512_add_pd(arg, one);
45  arg = _mm512_div_pd(x2, arg);
46  arg = _mm512_add_pd(arg, x);
47  return v1log1pd_core(arg);
48 }
49 
50 #else
51 
52 inline v4df v1asinhd_core(v4df x)
53 {
54  v4df x2 = _mm256_mul_pd(x, x);
55  v4df arg = _mm256_add_pd(x2, one);
56  arg = v1sqrtd_core(arg);
57  arg = _mm256_add_pd(arg, one);
58  arg = _mm256_div_pd(x2, arg);
59  arg = _mm256_add_pd(arg, x);
60  return v1log1pd_core(arg);
61 }
62 
63 #endif // __AVX512F__
64 
65 VECII_CONST(asinh_mask1f,0x7fffffff);
66 VECII_CONST(asinh_mask2f,0x80000000);
67 
68 VECFI_CONST(asinhf_2p28,0x4d800000); // 2^28
69 
70 #ifdef __AVX512F__
71 
72 inline v16sf v1asinhf_core(v16sf x)
73 {
74  v16sf x2 = _mm512_mul_ps(x, x);
75  v16sf arg = _mm512_add_ps(x2, onef);
76  arg = v1sqrtf_core(arg);
77  arg = _mm512_add_ps(arg, onef);
78  arg = _mm512_div_ps(x2, arg);
79  arg = _mm512_add_ps(arg, x);
80  return v1log1pf_core(arg);
81 }
82 
83 #else
84 
85 inline v8sf v1asinhf_core(v8sf x)
86 {
87  v8sf x2 = _mm256_mul_ps(x, x);
88  v8sf arg = _mm256_add_ps(x2, onef);
89  arg = v1sqrtf_core(arg);
90  arg = _mm256_add_ps(arg, onef);
91  arg = _mm256_div_ps(x2, arg);
92  arg = _mm256_add_ps(arg, x);
93  return v1log1pf_core(arg);
94 }
95 
96 #endif // __AVX512F__
97 
98 #endif // __AVX__
99 
100 #endif
static double x2[63]