cloudy  trunk
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vectorize_log.cpp
Go to the documentation of this file.
1 /* This file is part of Cloudy and is copyright (C)1978-2017 by Gary J. Ferland and
2  * others. For conditions of distribution and use see copyright notice in license.txt */
3 #include "cddefines.h"
4 #include "vectorize.h"
5 #include "vectorize_math.h"
6 #include "vectorize_log_core.h"
7 
8 //
9 // Written by Peter A.M. van Hoof, Royal Observatory of Belgium, Brussels
10 //
11 // this file contains vectorized versions of the single and double variants of the log()
12 // function. They are vectorized using AVX instructions, but also make use of AVX2, FMA,
13 // and AVX512 instructions when available. The basic algorithms for calculating the log()
14 // functions were slightly simplified from the openlibm library versions available at
15 // http://openlibm.org/ which is subject to the following copyright:
16 //
17 // ====================================================
18 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
19 //
20 // Developed at SunSoft, a Sun Microsystems, Inc. business.
21 // Permission to use, copy, modify, and distribute this
22 // software is freely granted, provided that this notice
23 // is preserved.
24 // ====================================================
25 //
26 // This work was also inspired by similar routines written by Giovanni Garberoglio which
27 // are available at: http://software-lisc.fbk.eu/avx_mathfun/
28 //
29 
30 #ifdef __AVX__
31 
32 #ifdef __AVX512F__
33 
34 inline v8df v1logd(v8df x)
35 {
36  __mmask8 invalid1 = _mm512_cmp_pd_mask(x, dbl_min, _CMP_LT_OQ);
37  __mmask8 invalid2 = _mm512_cmp_pd_mask(x, dbl_max, _CMP_NLE_UQ);
38  if( _mm512_kortestz(invalid1, invalid2) )
39  {
40  __mmask8 invalid = invalid1 | invalid2;
41  throw domain_error( DEMsg("v1logd", x, invalid) );
42  }
43  return v1logd_core(x);
44 }
45 
46 inline v8df v1log10d(v8df x)
47 {
48  __mmask8 invalid1 = _mm512_cmp_pd_mask(x, dbl_min, _CMP_LT_OQ);
49  __mmask8 invalid2 = _mm512_cmp_pd_mask(x, dbl_max, _CMP_NLE_UQ);
50  if( _mm512_kortestz(invalid1, invalid2) )
51  {
52  __mmask8 invalid = invalid1 | invalid2;
53  throw domain_error( DEMsg("v1log10d", x, invalid) );
54  }
55  x = v1logd_core(x);
56  return _mm512_mul_pd(x, log10e);
57 }
58 
59 inline v8df v1log1pd(v8df x)
60 {
61  __mmask8 invalid1 = _mm512_cmp_pd_mask(x, mone, _CMP_LE_OQ);
62  __mmask8 invalid2 = _mm512_cmp_pd_mask(x, dbl_max, _CMP_NLE_UQ);
63  if( _mm512_kortestz(invalid1, invalid2) )
64  {
65  __mmask8 invalid = invalid1 | invalid2;
66  throw domain_error( DEMsg("v1log1pd", x, invalid) );
67  }
68  return v1log1pd_core(x);
69 }
70 
71 inline v16sf v1logf(v16sf x)
72 {
73  __mmask16 invalid1 = _mm512_cmp_ps_mask(x, flt_min, _CMP_LT_OQ);
74  __mmask16 invalid2 = _mm512_cmp_ps_mask(x, flt_max, _CMP_NLE_UQ);
75  if( _mm512_kortestz(invalid1, invalid2) )
76  {
77  __mmask16 invalid = invalid1 | invalid2;
78  throw domain_error( DEMsg("v1logf", x, invalid) );
79  }
80  return v1logf_core(x);
81 }
82 
83 inline v16sf v1log10f(v16sf x)
84 {
85  __mmask16 invalid1 = _mm512_cmp_ps_mask(x, flt_min, _CMP_LT_OQ);
86  __mmask16 invalid2 = _mm512_cmp_ps_mask(x, flt_max, _CMP_NLE_UQ);
87  if( _mm512_kortestz(invalid1, invalid2) )
88  {
89  __mmask16 invalid = invalid1 | invalid2;
90  throw domain_error( DEMsg("v1log10f", x, invalid) );
91  }
92  x = v1logf_core(x);
93  return _mm512_mul_ps(x, log10ef);
94 }
95 
96 inline v16sf v1log1pf(v16sf x)
97 {
98  __mmask16 invalid1 = _mm512_cmp_ps_mask(x, monef, _CMP_LE_OQ);
99  __mmask16 invalid2 = _mm512_cmp_ps_mask(x, flt_max, _CMP_NLE_UQ);
100  if( _mm512_kortestz(invalid1, invalid2) )
101  {
102  __mmask16 invalid = invalid1 | invalid2;
103  throw domain_error( DEMsg("v1log1pf", x, invalid) );
104  }
105  return v1log1pf_core(x);
106 }
107 
108 #else
109 
110 inline v4df v1logd(v4df x)
111 {
112  v4df invalid1 = _mm256_cmp_pd(x, dbl_min, _CMP_LT_OQ);
113  v4df invalid2 = _mm256_cmp_pd(x, dbl_max, _CMP_NLE_UQ);
114  v4df invalid = _mm256_or_pd(invalid1, invalid2);
115  if( ! _mm256_testz_pd(invalid, invalid) )
116  throw domain_error( DEMsg("v1logd", x, invalid) );
117  return v1logd_core(x);
118 }
119 
120 inline v4df v1log10d(v4df x)
121 {
122  v4df invalid1 = _mm256_cmp_pd(x, dbl_min, _CMP_LT_OQ);
123  v4df invalid2 = _mm256_cmp_pd(x, dbl_max, _CMP_NLE_UQ);
124  v4df invalid = _mm256_or_pd(invalid1, invalid2);
125  if( ! _mm256_testz_pd(invalid, invalid) )
126  throw domain_error( DEMsg("v1log10d", x, invalid) );
127  x = v1logd_core(x);
128  return _mm256_mul_pd(x, log10e);
129 }
130 
131 inline v4df v1log1pd(v4df x)
132 {
133  v4df invalid1 = _mm256_cmp_pd(x, mone, _CMP_LE_OQ);
134  v4df invalid2 = _mm256_cmp_pd(x, dbl_max, _CMP_NLE_UQ);
135  v4df invalid = _mm256_or_pd(invalid1, invalid2);
136  if( ! _mm256_testz_pd(invalid, invalid) )
137  throw domain_error( DEMsg("v1log1pd", x, invalid) );
138  return v1log1pd_core(x);
139 }
140 
141 inline v8sf v1logf(v8sf x)
142 {
143  v8sf invalid1 = _mm256_cmp_ps(x, flt_min, _CMP_LT_OQ);
144  v8sf invalid2 = _mm256_cmp_ps(x, flt_max, _CMP_NLE_UQ);
145  v8sf invalid = _mm256_or_ps(invalid1, invalid2);
146  if( ! _mm256_testz_ps(invalid, invalid) )
147  throw domain_error( DEMsg("v1logf", x, invalid) );
148  return v1logf_core(x);
149 }
150 
151 inline v8sf v1log10f(v8sf x)
152 {
153  v8sf invalid1 = _mm256_cmp_ps(x, flt_min, _CMP_LT_OQ);
154  v8sf invalid2 = _mm256_cmp_ps(x, flt_max, _CMP_NLE_UQ);
155  v8sf invalid = _mm256_or_ps(invalid1, invalid2);
156  if( ! _mm256_testz_ps(invalid, invalid) )
157  throw domain_error( DEMsg("v1log10f", x, invalid) );
158  x = v1logf_core(x);
159  return _mm256_mul_ps(x, log10ef);
160 }
161 
162 inline v8sf v1log1pf(v8sf x)
163 {
164  v8sf invalid1 = _mm256_cmp_ps(x, monef, _CMP_LE_OQ);
165  v8sf invalid2 = _mm256_cmp_ps(x, flt_max, _CMP_NLE_UQ);
166  v8sf invalid = _mm256_or_ps(invalid1, invalid2);
167  if( ! _mm256_testz_ps(invalid, invalid) )
168  throw domain_error( DEMsg("v1log1pf", x, invalid) );
169  return v1log1pf_core(x);
170 }
171 
172 #endif // __AVX512F__
173 
174 #else
175 
176 // stub routines, should never be called
177 inline int v1logd(int) { return 0; }
178 inline int v1log10d(int) { return 0; }
179 inline int v1log1pd(int) { return 0; }
180 inline int v1logf(int) { return 0; }
181 inline int v1log10f(int) { return 0; }
182 inline int v1log1pf(int) { return 0; }
183 
184 #endif // __AVX__
185 
186 // wrapper routines to give math functions C++ linkage
187 // this prevents warnings from the Oracle Studio compiler
188 inline double wr_logd(double x)
189 {
190  return log(x);
191 }
192 
194 {
195  return logf(x);
196 }
197 
198 inline double wr_log10d(double x)
199 {
200  return log10(x);
201 }
202 
204 {
205  return log10f(x);
206 }
207 
208 inline double wr_log1pd(double x)
209 {
210  return log1p(x);
211 }
212 
214 {
215  return log1pf(x);
216 }
217 
218 
219 void vlog(const double x[], double y[], long nlo, long nhi)
220 {
221  DEBUG_ENTRY( "vlog()" );
222 
223  vecfun( x, y, nlo, nhi, wr_logd, v1logd );
224 }
225 
226 void vlog10(const double x[], double y[], long nlo, long nhi)
227 {
228  DEBUG_ENTRY( "vlog10()" );
229 
230  vecfun( x, y, nlo, nhi, wr_log10d, v1log10d );
231 }
232 
233 void vlog1p(const double x[], double y[], long nlo, long nhi)
234 {
235  DEBUG_ENTRY( "vlog1p()" );
236 
237  vecfun( x, y, nlo, nhi, wr_log1pd, v1log1pd );
238 }
239 
240 void vlog(const sys_float x[], sys_float y[], long nlo, long nhi)
241 {
242  DEBUG_ENTRY( "vlog()" );
243 
244  vecfun( x, y, nlo, nhi, wr_logf, v1logf );
245 }
246 
247 void vlog10(const sys_float x[], sys_float y[], long nlo, long nhi)
248 {
249  DEBUG_ENTRY( "vlog10()" );
250 
251  vecfun( x, y, nlo, nhi, wr_log10f, v1log10f );
252 }
253 
254 void vlog1p(const sys_float x[], sys_float y[], long nlo, long nhi)
255 {
256  DEBUG_ENTRY( "vlog1p()" );
257 
258  vecfun( x, y, nlo, nhi, wr_log1pf, v1log1pf );
259 }
260 
261 void vlog(double *y, double x0, double x1, double x2, double x3)
262 {
263  V1FUN_PD_4(log, 1.);
264 }
265 
266 void vlog10(double *y, double x0, double x1, double x2, double x3)
267 {
268  V1FUN_PD_4(log10, 1.);
269 }
270 
271 void vlog1p(double *y, double x0, double x1, double x2, double x3)
272 {
273  V1FUN_PD_4(log1p, 1.);
274 }
275 
276 void vlog(double *y, double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7)
277 {
278  V1FUN_PD_8(log, 1.);
279 }
280 
281 void vlog10(double *y, double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7)
282 {
283  V1FUN_PD_8(log10, 1.);
284 }
285 
286 void vlog1p(double *y, double x0, double x1, double x2, double x3, double x4, double x5, double x6, double x7)
287 {
288  V1FUN_PD_8(log1p, 1.);
289 }
290 
292 {
293  V1FUN_PS_4(log, 1.f);
294 }
295 
297 {
298  V1FUN_PS_4(log10, 1.f);
299 }
300 
302 {
303  V1FUN_PS_4(log1p, 1.f);
304 }
305 
307  sys_float x6, sys_float x7)
308 {
309  V1FUN_PS_8(log, 1.f);
310 }
311 
313  sys_float x6, sys_float x7)
314 {
315  V1FUN_PS_8(log10, 1.f);
316 }
317 
319  sys_float x6, sys_float x7)
320 {
321  V1FUN_PS_8(log1p, 1.f);
322 }
323 
325  sys_float x6, sys_float x7, sys_float x8, sys_float x9, sys_float x10, sys_float x11, sys_float x12,
326  sys_float x13, sys_float x14, sys_float x15)
327 {
328  V1FUN_PS_16(log, 1.f);
329 }
330 
332  sys_float x6, sys_float x7, sys_float x8, sys_float x9, sys_float x10, sys_float x11, sys_float x12,
333  sys_float x13, sys_float x14, sys_float x15)
334 {
335  V1FUN_PS_16(log10, 1.f);
336 }
337 
339  sys_float x6, sys_float x7, sys_float x8, sys_float x9, sys_float x10, sys_float x11, sys_float x12,
340  sys_float x13, sys_float x14, sys_float x15)
341 {
342  V1FUN_PS_16(log1p, 1.f);
343 }
#define V1FUN_PD_8(FUN, V)
sys_float wr_log1pf(sys_float x)
static double x2[63]
void vlog10(const double x[], double y[], long nlo, long nhi)
static double x1[83]
int v1logf(int)
sys_float wr_log10f(sys_float x)
void vlog1p(const double x[], double y[], long nlo, long nhi)
double wr_log10d(double x)
void vlog(const double x[], double y[], long nlo, long nhi)
sys_float wr_logf(sys_float x)
double wr_logd(double x)
void vecfun(const T x[], T y[], long nlo, long nhi, T(*scalfun1)(T), V(*)(V))
static double x0[83]
float sys_float
Definition: cddefines.h:127
#define V1FUN_PS_4(FUN, V)
#define V1FUN_PD_4(FUN, V)
int v1log10f(int)
#define DEBUG_ENTRY(funcname)
Definition: cddefines.h:729
int v1log10d(int)
double wr_log1pd(double x)
int v1logd(int)
#define V1FUN_PS_8(FUN, V)
#define V1FUN_PS_16(FUN, V)
int v1log1pf(int)
int v1log1pd(int)