Skip to content

Commit 882fe2b

Browse files
akolesov-nvigcbot
authored andcommitted
New multiple accuracy IMF math functions
New multiple accuracy IMF math functions & updates in existing implementations: sin cos tan sinh cosh tanh asin acos atan atan2 asinh acosh atanh exp exp2 exp10 expm1 log log2 log10 log1p hypot pow erf erfc sincos
1 parent 83d1a04 commit 882fe2b

File tree

112 files changed

+76955
-1008
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+76955
-1008
lines changed

IGC/BiFModule/Implementation/IMF/FP32/acos_s_ep.cl

Lines changed: 465 additions & 0 deletions
Large diffs are not rendered by default.

IGC/BiFModule/Implementation/IMF/FP32/acos_s_ha.cl

Lines changed: 501 additions & 0 deletions
Large diffs are not rendered by default.

IGC/BiFModule/Implementation/IMF/FP32/acosh_s_ep.cl

Lines changed: 486 additions & 0 deletions
Large diffs are not rendered by default.

IGC/BiFModule/Implementation/IMF/FP32/acosh_s_ha.cl

Lines changed: 598 additions & 0 deletions
Large diffs are not rendered by default.

IGC/BiFModule/Implementation/IMF/FP32/asin_s_ep.cl

Lines changed: 464 additions & 0 deletions
Large diffs are not rendered by default.

IGC/BiFModule/Implementation/IMF/FP32/asin_s_ha.cl

Lines changed: 496 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
#include "../imf.h"
9+
#pragma OPENCL FP_CONTRACT OFF
10+
static __constant union {
11+
unsigned int w;
12+
float f;
13+
int i;
14+
} __sasinh_ep_large_x = {0x49800000u};
15+
static __constant union {
16+
unsigned int w;
17+
float f;
18+
int i;
19+
} __sasinh_ep_small_x = {0x39800000u};
20+
// largest norm
21+
static __constant union {
22+
unsigned int w;
23+
float f;
24+
int i;
25+
} __sasinh_ep_largest_norm = {0x7f7fffffu};
26+
// log(2)
27+
static __constant union {
28+
unsigned int w;
29+
float f;
30+
int i;
31+
} __sasinh_ep_ln2 = {0x3f317218u};
32+
static __constant union {
33+
unsigned int w;
34+
float f;
35+
int i;
36+
} __sasinh_ep_c4 = {0x3e1103e9u};
37+
static __constant union {
38+
unsigned int w;
39+
float f;
40+
int i;
41+
} __sasinh_ep_c3 = {0xbe84f69cu};
42+
static __constant union {
43+
unsigned int w;
44+
float f;
45+
int i;
46+
} __sasinh_ep_c2 = {0x3ead39b3u};
47+
static __constant union {
48+
unsigned int w;
49+
float f;
50+
int i;
51+
} __sasinh_ep_c1 = {0xbefff0d2u};
52+
static __constant union {
53+
unsigned int w;
54+
float f;
55+
int i;
56+
} __sasinh_ep_c0 = {0x3f7ffcc1u};
57+
// 2^(-6)
58+
static __constant union {
59+
unsigned int w;
60+
float f;
61+
int i;
62+
} __sasinh_ep_small2_x = {0x3c800000u};
63+
__attribute__((always_inline)) inline int
64+
__ocl_svml_internal_sasinh_ep(float *a, float *r) {
65+
int nRet = 0;
66+
float x = *a;
67+
float z2h, Sh, RS, E;
68+
float poly, R;
69+
union {
70+
unsigned int w;
71+
float f;
72+
int i;
73+
} Yh, res, xin, sgn, xa, two_expon;
74+
int expon, e23, iexpon_corr;
75+
z2h = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(x, x, 1.0f);
76+
RS = 1.0f / SPIRV_OCL_BUILTIN(sqrt, _f32, )(z2h);
77+
Sh = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(z2h, RS, 0.0f);
78+
xa.f = SPIRV_OCL_BUILTIN(fabs, _f32, )(x);
79+
// |x| + Sh + Sl
80+
Yh.f = xa.f + Sh;
81+
// set Yh, Yl for large |x|
82+
// will use exponent correction in log computation, for large x
83+
Yh.f = (xa.f < __sasinh_ep_large_x.f) ? Yh.f : xa.f * 0.5f;
84+
// fixup needed for x near largest normal
85+
iexpon_corr = (xa.f < __sasinh_ep_large_x.f) ? 0 : 2;
86+
// expon(Yh) + 2
87+
expon = ((Yh.w + 0x00400000) >> 23) - 0x7f;
88+
// new expon
89+
e23 = expon << 23;
90+
// Yh * 2^(-expon-2)
91+
Yh.w -= e23;
92+
// reduced argument
93+
R = Yh.f - 1.0f;
94+
// add exponent correction
95+
expon += iexpon_corr;
96+
// polynomial
97+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__sasinh_ep_c4.f, R,
98+
__sasinh_ep_c3.f);
99+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ep_c2.f);
100+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ep_c1.f);
101+
xin.f = x;
102+
sgn.w = xin.w ^ xa.w;
103+
poly *= R;
104+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, R);
105+
res.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(((float)expon),
106+
__sasinh_ep_ln2.f, poly);
107+
res.w ^= sgn.w;
108+
// fixup for small or Inf/NaN
109+
res.f =
110+
((xa.f < __sasinh_ep_small2_x.f) | (xa.w > __sasinh_ep_largest_norm.w))
111+
? (x + sgn.f)
112+
: res.f;
113+
*r = res.f;
114+
return nRet;
115+
}
116+
float __ocl_svml_asinhf_ep(float x) {
117+
float r;
118+
__ocl_svml_internal_sasinh_ep(&x, &r);
119+
return r;
120+
}
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
#include "../imf.h"
9+
#pragma OPENCL FP_CONTRACT OFF
10+
static __constant union {
11+
unsigned int w;
12+
float f;
13+
int i;
14+
} __sasinh_ha_large_x = {0x49800000u};
15+
static __constant union {
16+
unsigned int w;
17+
float f;
18+
int i;
19+
} __sasinh_ha_small_x = {0x39800000u};
20+
// largest norm
21+
static __constant union {
22+
unsigned int w;
23+
float f;
24+
int i;
25+
} __sasinh_ha_largest_norm = {0x7f7fffffu};
26+
// log(2)
27+
static __constant union {
28+
unsigned int w;
29+
float f;
30+
int i;
31+
} __sasinh_ha_ln2l = {0xb102e308u};
32+
static __constant union {
33+
unsigned int w;
34+
float f;
35+
int i;
36+
} __sasinh_ha_ln2h = {0x3f317218u};
37+
static __constant union {
38+
unsigned int w;
39+
float f;
40+
int i;
41+
} __sasinh_ha_c9 = {0xbd3bc2cau};
42+
static __constant union {
43+
unsigned int w;
44+
float f;
45+
int i;
46+
} __sasinh_ha_c8 = {0x3dd8bd42u};
47+
static __constant union {
48+
unsigned int w;
49+
float f;
50+
int i;
51+
} __sasinh_ha_c7 = {0xbe075e7fu};
52+
static __constant union {
53+
unsigned int w;
54+
float f;
55+
int i;
56+
} __sasinh_ha_c6 = {0x3e1445e9u};
57+
static __constant union {
58+
unsigned int w;
59+
float f;
60+
int i;
61+
} __sasinh_ha_c5 = {0xbe2a6712u};
62+
static __constant union {
63+
unsigned int w;
64+
float f;
65+
int i;
66+
} __sasinh_ha_c4 = {0x3e4cb1a3u};
67+
static __constant union {
68+
unsigned int w;
69+
float f;
70+
int i;
71+
} __sasinh_ha_c3 = {0xbe800059u};
72+
static __constant union {
73+
unsigned int w;
74+
float f;
75+
int i;
76+
} __sasinh_ha_c2 = {0x3eaaaae2u};
77+
static __constant union {
78+
unsigned int w;
79+
float f;
80+
int i;
81+
} __sasinh_ha_c1 = {0xbf000000u};
82+
static __constant union {
83+
unsigned int w;
84+
float f;
85+
int i;
86+
} __sasinh_ha_c0 = {0x3f800000u};
87+
static __constant float __sasinh_ha_fc0[] = {
88+
0x1.fffffep-1, 0x1.dcd7b4p-25, // HI + LO: 0.99999994 + 5.5511784e-08
89+
// [0x3f7fffff + 0x336e6bda]
90+
};
91+
static __constant float __sasinh_ha_fln2[] = {
92+
0x1.62e42ep-1, 0x1.efa39ep-25, // HI + LO: 0.69314712 + 5.7699989e-08
93+
// [0x3f317217 + 0x3377d1cf]
94+
};
95+
__attribute__((always_inline)) inline int
96+
__ocl_svml_internal_sasinh_ha(float *a, float *r) {
97+
int nRet = 0;
98+
float x = *a;
99+
float x2h, z2h, x2l, z2l, A, B, Bh, Sh, S0h, Sl, RS, E, Yhh;
100+
float Bl, poly, R, Rl, R0, exponf;
101+
union {
102+
unsigned int w;
103+
float f;
104+
int i;
105+
} Yh, Yl, res, xin, sgn, xa, two_expon;
106+
int expon, e23, iexpon_corr;
107+
x2h = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(x, x, 0.0f);
108+
z2h = x2h + 1.0f;
109+
A = SPIRV_OCL_BUILTIN(fmax, _f32_f32, )(x2h, 1.0f);
110+
B = SPIRV_OCL_BUILTIN(fmin, _f32_f32, )(x2h, 1.0f);
111+
x2l = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(x, x, (-x2h));
112+
Bh = z2h - A;
113+
Bl = B - Bh;
114+
z2l = x2l + Bl;
115+
RS = 1.0f / SPIRV_OCL_BUILTIN(sqrt, _f32, )(z2h);
116+
S0h = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(z2h, RS, 0.0f);
117+
// rsqrt(z2h)*0.5
118+
RS *= 0.5f;
119+
// (1+x^2) - Sh^2
120+
E = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )((-S0h), S0h, z2h);
121+
E = E + z2l;
122+
// sqrt(1+x^2)_low
123+
Sl = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(E, RS, 0.0f);
124+
Sh = S0h + Sl;
125+
Yhh = Sh - S0h;
126+
Sl = Sl - Yhh;
127+
xa.f = SPIRV_OCL_BUILTIN(fabs, _f32, )(x);
128+
// |x| + Sh + Sl
129+
Yh.f = xa.f + Sh;
130+
Yhh = Yh.f - Sh;
131+
Yl.f = xa.f - Yhh;
132+
Yl.f = Yl.f + Sl;
133+
// set Yh, Yl for large |x|
134+
// will use exponent correction in log computation, for large x
135+
Yh.f = (xa.f < __sasinh_ha_large_x.f) ? Yh.f : xa.f * 0.5f;
136+
Yl.f = (xa.f < __sasinh_ha_large_x.f) ? Yl.f : 0;
137+
// fixup needed for x near largest normal
138+
iexpon_corr = (xa.f < __sasinh_ha_large_x.f) ? 0 : 2;
139+
// expon(Yh) + 2
140+
expon = ((Yh.w + 0x00400000) >> 23) - 0x7f;
141+
// new expon
142+
e23 = expon << 23;
143+
// 2^(-expon)
144+
two_expon.w = 0x3f800000 - e23;
145+
// Yl * 2^(-expon)
146+
Yl.f *= two_expon.f;
147+
// Yh * 2^(-expon-2)
148+
Yh.w -= e23;
149+
// reduced argument
150+
R0 = Yh.f - 1.0f;
151+
R = Yl.f + R0;
152+
// add exponent correction
153+
expon += iexpon_corr;
154+
// log() polynomial
155+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__sasinh_ha_c9.f, R,
156+
__sasinh_ha_c8.f);
157+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ha_c7.f);
158+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ha_c6.f);
159+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ha_c5.f);
160+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ha_c4.f);
161+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ha_c3.f);
162+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ha_c2.f);
163+
poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(poly, R, __sasinh_ha_c1.f);
164+
float fR[2], fPoly[2], fExpon[2];
165+
fR[0] = R0;
166+
fR[1] = Yl.f;
167+
fPoly[0] = poly;
168+
fPoly[1] = 0.0f;
169+
fExpon[0] = __sasinh_ha_fln2[0];
170+
fExpon[1] = __sasinh_ha_fln2[1];
171+
{
172+
float __ph, __phl;
173+
__ph = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], fR[0], 0.0f);
174+
__phl = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], fR[0], -__ph);
175+
fPoly[1] = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[1], fR[0], __phl);
176+
fPoly[1] =
177+
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], fR[1], fPoly[1]);
178+
fPoly[0] = __ph;
179+
};
180+
{
181+
float __ph, __ahl, __ahh;
182+
__ph = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], 1.0f,
183+
__sasinh_ha_fc0[0]);
184+
__ahh =
185+
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__ph, 1.0f, -__sasinh_ha_fc0[0]);
186+
__ahl = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], 1.0f, -__ahh);
187+
fPoly[1] = (fPoly[1] + __sasinh_ha_fc0[1]) + __ahl;
188+
fPoly[0] = __ph;
189+
};
190+
;
191+
{
192+
float __ph, __phl;
193+
__ph = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fExpon[0], expon, 0.0f);
194+
__phl = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fExpon[0], expon, -__ph);
195+
fExpon[1] = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fExpon[1], expon, __phl);
196+
fExpon[0] = __ph;
197+
};
198+
{
199+
float __ph, __phl;
200+
__ph = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], fR[0], 0.0f);
201+
__phl = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], fR[0], -__ph);
202+
fPoly[1] = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[1], fR[0], __phl);
203+
fPoly[1] =
204+
SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], fR[1], fPoly[1]);
205+
fPoly[0] = __ph;
206+
};
207+
{
208+
float __ph, __ahl, __ahh;
209+
__ph = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], 1.0f, fExpon[0]);
210+
__ahh = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(__ph, 1.0f, -fExpon[0]);
211+
__ahl = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, )(fPoly[0], 1.0f, -__ahh);
212+
fPoly[1] = (fPoly[1] + fExpon[1]) + __ahl;
213+
fPoly[0] = __ph;
214+
};
215+
;
216+
res.f = fPoly[0] + fPoly[1];
217+
xin.f = x;
218+
sgn.w = xin.w ^ xa.w;
219+
res.w ^= sgn.w;
220+
// fixup for small or Inf/NaN
221+
res.f = ((xa.f < __sasinh_ha_small_x.f) | (xa.w > __sasinh_ha_largest_norm.w))
222+
? (x + sgn.f)
223+
: res.f;
224+
*r = res.f;
225+
return nRet;
226+
}
227+
float __ocl_svml_asinhf_ha(float x) {
228+
float r;
229+
__ocl_svml_internal_sasinh_ha(&x, &r);
230+
return r;
231+
}

0 commit comments

Comments
 (0)