Skip to content

Commit 1c3ebeb

Browse files
grey-eminencesys_zuul
authored andcommitted
IMF LA open-sourcing. FP64 acos.
Change-Id: Ied8a9b6eef4bad4566f0766347fd24b3d9e809ab
1 parent 0a94c3f commit 1c3ebeb

File tree

2 files changed

+184
-4
lines changed

2 files changed

+184
-4
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
27+
#include "../imf.h"
28+
#pragma OPENCL FP_CONTRACT OFF
29+
typedef struct
30+
{
31+
unsigned long SgnBit;
32+
unsigned long OneHalf;
33+
unsigned long SmallNorm;
34+
unsigned long dRsqrtMsk;
35+
unsigned long MOne;
36+
unsigned long HalfMask;
37+
unsigned long Two;
38+
unsigned long sqrt_coeff[4];
39+
unsigned long poly_coeff[12];
40+
unsigned long PiL;
41+
unsigned long PiH;
42+
unsigned long Pi2L;
43+
unsigned long Pi2H;
44+
unsigned long Zero;
45+
unsigned long SgnMask;
46+
unsigned long NanMask;
47+
unsigned long ep_coeff[6];
48+
49+
unsigned long dInfs[2];
50+
unsigned long dOnes[2];
51+
unsigned long dZeros[2];
52+
} __internal_dacos_la_data_t;
53+
static __constant __internal_dacos_la_data_t __internal_dacos_la_data = {
54+
55+
0x8000000000000000uL, 0x3fe0000000000000uL, 0x3000000000000000uL, 0xffffff0000000000uL, 0xbff0000000000000uL, 0xfffffffffc000000uL,
56+
0x4000000000000000uL, {
57+
0xbf918000993B24C3uL, 0x3fa400006F70D42DuL, 0xbfb7FFFFFFFFFE97uL, 0x3fcFFFFFFFFFFF9DuL}
58+
59+
, {
60+
0x3fa07520C70EB909uL, 0xbf90FB17F7DBB0EDuL, 0x3f943F44BFBC3BAEuL, 0x3f7A583395D45ED5uL, 0x3f88F8DC2AFCCAD6uL, 0x3f8C6DBBCB88BD57uL,
61+
0x3f91C6DCF538AD2EuL, 0x3f96E89CEBDEFadduL, 0x3f9F1C72E13AD8BEuL, 0x3fa6DB6DB3B445F8uL, 0x3fb333333337E0DEuL, 0x3fc555555555529CuL}
62+
63+
, 0x3ca1a62633145c07uL, 0x400921fb54442d18uL, 0x3c91a62633145c07uL, 0x3ff921fb54442d18uL, 0x0000000000000000uL, 0x8000000000000000uL,
64+
0xfffc000000000000uL, {
65+
0x3fa36C5AF645A11EuL, 0x3f8CE147EA9E9282uL, 0x3fa056B4151FA155uL, 0x3fa6C8ED2A4CCE54uL, 0x3fb33399EBF85B6AuL,
66+
0x3fc5555480C83A45uL}
67+
68+
, {0x7ff0000000000000uL, 0xfff0000000000000uL}
69+
70+
, {0x3ff0000000000000uL, 0xbff0000000000000uL}
71+
72+
, {0x0000000000000000uL, 0x8000000000000000uL}
73+
74+
};
75+
static __constant int_double __dacos_la_c12 = { 0x3fa07520c559a401UL };
76+
static __constant int_double __dacos_la_c11 = { 0xbf90fb17f2824aa0UL };
77+
static __constant int_double __dacos_la_c10 = { 0x3f943f44bc0c21f0UL };
78+
static __constant int_double __dacos_la_c9 = { 0x3f7a58339bb6f20cUL };
79+
static __constant int_double __dacos_la_c8 = { 0x3f88f8dc2a3c76d7UL };
80+
static __constant int_double __dacos_la_c7 = { 0x3f8c6dbbcba98e67UL };
81+
static __constant int_double __dacos_la_c6 = { 0x3f91c6dcf536c796UL };
82+
static __constant int_double __dacos_la_c5 = { 0x3f96e89cebdf209cUL };
83+
static __constant int_double __dacos_la_c4 = { 0x3f9f1c72e13ad6d3UL };
84+
static __constant int_double __dacos_la_c3 = { 0x3fa6db6db3b44600UL };
85+
static __constant int_double __dacos_la_c2 = { 0x3fb333333337e0deUL };
86+
static __constant int_double __dacos_la_c1 = { 0x3fc555555555529cUL };
87+
static __constant int_double __dacos_la_c0 = { 0x3ff0000000000000UL };
88+
static __constant int_double __dacos_la_pi2h = { 0x3ff921fb54442d18UL };
89+
static __constant int_double __dacos_la_pi2l = { 0x3c91a62633145c07UL };
90+
91+
static __constant int_double __dacos_la_pih = { 0x400921fb54442d18UL };
92+
static __constant int_double __dacos_la_pil = { 0x3ca1a62633145c07UL };
93+
94+
static __constant int_float __dacos_la_small_float = { 0x01800000u };
95+
96+
__attribute__((always_inline))
97+
inline int __internal_dacos_la_cout (double *pxin, double *pres)
98+
{
99+
int nRet = 0;
100+
double xin = *pxin;
101+
int_double y, res;
102+
{
103+
int_double x, xa, RS, Shh2, High, R0;
104+
double R, E, poly, Sh;
105+
unsigned long sgn_x;
106+
float yf;
107+
int_float fcorr;
108+
109+
x.f = xin;
110+
111+
xa.f = __builtin_spirv_OpenCL_fabs_f64 (x.f);
112+
113+
sgn_x = x.w ^ xa.w;
114+
115+
y.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (-(0.5), xa.f, 0.5);
116+
117+
R = xin * xin;
118+
R = __builtin_spirv_OpenCL_fmin_f64_f64 (R, y.f);
119+
120+
High.f = sgn_x ? __dacos_la_pih.f : 0;
121+
High.f = (xa.f <= 0.5) ? __dacos_la_pi2h.f : High.f;
122+
123+
yf = (float) y.f;
124+
125+
yf += __dacos_la_small_float.f;
126+
yf = 1.0f / __builtin_spirv_OpenCL_sqrt_f32 (yf);
127+
RS.f = (double) (yf);
128+
129+
RS.w |= sgn_x;
130+
131+
Sh = (y.f * RS.f);
132+
133+
Shh2.f = -2.0 * Sh;
134+
135+
E = (__builtin_spirv_OpenCL_fma_f64_f64_f64 (-(RS.f), Sh, 1.0));
136+
137+
R0.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (0.375, E, 0.5);
138+
R0.f *= E;
139+
140+
R0.f = __builtin_spirv_OpenCL_fma_f64_f64_f64 (R0.f, Shh2.f, Shh2.f);
141+
142+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (__dacos_la_c12.f, R, __dacos_la_c11.f);
143+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c10.f);
144+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c9.f);
145+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c8.f);
146+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c7.f);
147+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c6.f);
148+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c5.f);
149+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c4.f);
150+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c3.f);
151+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c2.f);
152+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c1.f);
153+
poly = __builtin_spirv_OpenCL_fma_f64_f64_f64 (poly, R, __dacos_la_c0.f);
154+
R0.f = (xa.f <= 0.5) ? x.f : R0.f;
155+
156+
res.f = (__builtin_spirv_OpenCL_fma_f64_f64_f64 (-(poly), R0.f, High.f));
157+
158+
}
159+
160+
*pres = res.f;
161+
nRet = (y.f >= 0) ? 0 : 1;
162+
163+
return nRet;
164+
}
165+
166+
double __ocl_svml_acos (double a)
167+
{
168+
169+
double va1;
170+
double vr1;
171+
unsigned int vm;
172+
173+
double r;
174+
175+
va1 = a;;
176+
177+
__internal_dacos_la_cout (&va1, &vr1);
178+
r = vr1;;
179+
180+
return r;
181+
182+
}

IGC/BiFModule/Implementation/Math/acos.cl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3030
GENERATE_VECTOR_FUNCTIONS_1ARG( __builtin_spirv_OpenCL_acos, float, float, f32 )
3131

3232
#if defined(cl_khr_fp64)
33-
34-
#include "../ExternalLibraries/libclc/doubles.cl"
35-
33+
#include "../IMF/FP64/acos_d_la.cl"
3634
#endif // defined(cl_khr_fp64)
3735

3836

3937
#if defined(cl_khr_fp64)
4038

4139
INLINE double __builtin_spirv_OpenCL_acos_f64( double x )
4240
{
43-
return libclc_acos_f64(x);
41+
return __ocl_svml_acos(x);
4442
}
4543

4644
GENERATE_VECTOR_FUNCTIONS_1ARG( __builtin_spirv_OpenCL_acos, double, double, f64 )

0 commit comments

Comments
 (0)