Skip to content

Commit 51ea65b

Browse files
grey-eminencesys_zuul
authored andcommitted
IMF LA open-sourcing. FP64 acospi.
Change-Id: I9c950fe7d747129c713a45ca40c5bbc00df8f36d
1 parent 1c3ebeb commit 51ea65b

File tree

2 files changed

+184
-4
lines changed

2 files changed

+184
-4
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
27+
#include "../imf.h"
28+
#pragma OPENCL FP_CONTRACT OFF
29+
typedef struct
30+
{
31+
unsigned long SgnBit;
32+
unsigned long OneHalf;
33+
unsigned long SmallNorm;
34+
unsigned long dRsqrtMsk;
35+
unsigned long MOne;
36+
unsigned long HalfMask;
37+
unsigned long Two;
38+
unsigned long sqrt_coeff[4];
39+
unsigned long poly_coeff[12];
40+
unsigned long InvPiH;
41+
unsigned long InvPiL;
42+
unsigned long One;
43+
unsigned long InvPi;
44+
unsigned long SgnMask;
45+
unsigned long ep_coeff[6];
46+
47+
unsigned long dInfs[2];
48+
unsigned long dOnes[2];
49+
unsigned long dZeros[2];
50+
} __internal_dacospi_la_data_t;
51+
static __constant __internal_dacospi_la_data_t __internal_dacospi_la_data = {
52+
53+
0x8000000000000000uL, 0x3fe0000000000000uL, 0x3000000000000000uL, 0xffffff0000000000uL, 0xbff0000000000000uL, 0xfffffffffc000000uL,
54+
0x4000000000000000uL, {
55+
0xbf918000993B24C3uL, 0x3fa400006F70D42DuL, 0xbfb7FFFFFFFFFE97uL, 0x3fcFFFFFFFFFFF9DuL}
56+
57+
, {
58+
0x3f84F4523BC020D8uL, 0xbf759EE439EEE799uL, 0x3f79C78AE09A5457uL, 0x3f60C57DF579794AuL, 0x3f6FCBA466EA069BuL, 0x3f7219262ADC70A8uL,
59+
0x3f76A256C108AAEduL, 0x3f7D2B0EA1978F74uL, 0x3f83CE53573AD4F4uL, 0x3f8D1A452B1C8F4EuL, 0x3f98723A1D5E7F21uL, 0x3faB2995E7B7B28BuL}
60+
61+
, 0x3fd45F3070000000uL, 0xbe21B1BBEA2AAEE4uL, 0x3ff0000000000000uL, 0x3fd45F306DC9C883uL, 0x8000000000000000uL, {
62+
0x3f88BAFFDA4549F0uL,
63+
0x3f7262B57524FB3BuL,
64+
0x3f84CD955BDDED9fuL,
65+
0x3f8D02B66C2AD236uL,
66+
0x3f9872BCE76EFA44uL,
67+
0x3faB2994D916CB05uL}
68+
69+
, {0x7ff0000000000000uL, 0xfff0000000000000uL}
70+
71+
, {0x3ff0000000000000uL, 0xbff0000000000000uL}
72+
73+
, {0x0000000000000000uL, 0x8000000000000000uL}
74+
75+
};
76+
static __constant int_double __dacospi_la_c12 = { 0x3f84f45239939e37UL };
77+
static __constant int_double __dacospi_la_c11 = { 0xbf759ee4331f54b9UL };
78+
static __constant int_double __dacospi_la_c10 = { 0x3f79c78adbe84367UL };
79+
static __constant int_double __dacospi_la_c9 = { 0x3f60c57df93896daUL };
80+
static __constant int_double __dacospi_la_c8 = { 0x3f6fcba465f52565UL };
81+
static __constant int_double __dacospi_la_c7 = { 0x3f7219262af154f1UL };
82+
static __constant int_double __dacospi_la_c6 = { 0x3f76a256c10640a7UL };
83+
static __constant int_double __dacospi_la_c5 = { 0x3f7d2b0ea197bf84UL };
84+
static __constant int_double __dacospi_la_c4 = { 0x3f83ce53573ad3bcUL };
85+
static __constant int_double __dacospi_la_c3 = { 0x3f8d1a452b1c8f59UL };
86+
static __constant int_double __dacospi_la_c2 = { 0x3f98723a1d5e7f21UL };
87+
static __constant int_double __dacospi_la_c1 = { 0x3fab2995e7b7b28cUL };
88+
static __constant int_double __dacospi_la_c0 = { 0x3fd45f306dc9c883UL };
89+
static __constant int_double __dacospi_la_two = { 0x4000000000000000UL };
90+
91+
static __constant int_double __dacospi_la_pi2h = { 0x3ff921fb54442d18UL };
92+
static __constant int_double __dacospi_la_pi2l = { 0x3c91a62633145c07UL };
93+
94+
static __constant int_float __dacospi_la_small_float = { 0x01800000u };
95+
96+
__attribute__((always_inline))
97+
inline int __internal_dacospi_la_cout (double *pxin, double *pres)
98+
{
99+
int nRet = 0;
100+
double xin = *pxin;
101+
int_double y, res;
102+
{
103+
int_double x, y, xa, RS, Shh2, High, R0;
104+
double R, E, poly, Sh, p910, p78, p56, p34, p12, R2;
105+
unsigned long sgn_x;
106+
float yf;
107+
int_float fcorr;
108+
109+
x.f = xin;
110+
111+
xa.f = __builtin_spirv_OpenCL_fabs_f64 (x.f);
112+
113+
sgn_x = x.w ^ xa.w;
114+
115+
y.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (-0.5, xa.f, 0.5);
116+
117+
R = xin * xin;
118+
R = __builtin_spirv_OpenCL_fmin_f64_f64 (R, y.f);
119+
120+
High.f = sgn_x ? 1.0 : 0;
121+
High.f = (xa.f <= 0.5) ? 0.5 : High.f;
122+
123+
yf = (float) y.f;
124+
125+
yf += __dacospi_la_small_float.f;
126+
yf = 1.0f / __builtin_spirv_OpenCL_sqrt_f32 (yf);
127+
RS.f = (double) (yf);
128+
129+
RS.w |= sgn_x;
130+
131+
Sh = y.f * RS.f;
132+
133+
Shh2.f = -2.0 * Sh;
134+
135+
E = __builtin_spirv_OpenCL_fma_f64_f64_f64 (-RS.f, Sh, 1.0);
136+
137+
R0.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (0.375, E, 0.5);
138+
R0.f *= E;
139+
140+
R0.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (R0.f, Shh2.f, Shh2.f);
141+
142+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (__dacospi_la_c12.f, R, __dacospi_la_c11.f);
143+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c10.f);
144+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c9.f);
145+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c8.f);
146+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c7.f);
147+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c6.f);
148+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c5.f);
149+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c4.f);
150+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c3.f);
151+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c2.f);
152+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c1.f);
153+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacospi_la_c0.f);
154+
R0.f = (xa.f <= 0.5) ? x.f : R0.f;
155+
156+
res.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (-poly, R0.f, High.f);
157+
158+
}
159+
160+
*pres = res.f;
161+
nRet = (y.f >= 0) ? 0 : 1;
162+
163+
return nRet;
164+
}
165+
166+
double __ocl_svml_acospi (double a)
167+
{
168+
169+
double va1;
170+
double vr1;
171+
unsigned int vm;
172+
173+
double r;
174+
175+
va1 = a;;
176+
177+
__internal_dacospi_la_cout (&va1, &vr1);
178+
r = vr1;;
179+
180+
return r;
181+
182+
}

IGC/BiFModule/Implementation/Math/acospi.cl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2828
#include "../../Headers/spirv.h"
2929

3030
#if defined(cl_khr_fp64)
31-
32-
#include "../ExternalLibraries/libclc/doubles.cl"
33-
31+
#include "../IMF/FP64/acospi_d_la.cl"
3432
#endif // defined(cl_khr_fp64)
3533

3634
INLINE float __builtin_spirv_OpenCL_acospi_f32( float x )
@@ -44,7 +42,7 @@ GENERATE_VECTOR_FUNCTIONS_1ARG( __builtin_spirv_OpenCL_acospi, float, float, f32
4442

4543
INLINE double __builtin_spirv_OpenCL_acospi_f64( double x )
4644
{
47-
return libclc_acospi_f64(x);
45+
return __ocl_svml_acospi(x);
4846
}
4947

5048
GENERATE_VECTOR_FUNCTIONS_1ARG( __builtin_spirv_OpenCL_acospi, double, double, f64 )

0 commit comments

Comments
 (0)