Skip to content

Commit 9a4719b

Browse files
authored
[SYCL] Add rcp for fp32 and fp64 with rounding mode supported (#11768)
This PR adds frcp_rd/n/u/z and drcp_rd/n/u/z to sycl::ext::intel::math which corresponds to CUDA math's __frcp_r* and __drcp_r*
1 parent 69f4e16 commit 9a4719b

File tree

6 files changed

+115
-0
lines changed

6 files changed

+115
-0
lines changed

libdevice/imf_wrapper.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1931,21 +1931,33 @@ float __devicelib_imf_fdiv_rd(float, float);
19311931
DEVICE_EXTERN_C_INLINE
19321932
float __imf_fdiv_rd(float x, float y) { return __devicelib_imf_fdiv_rd(x, y); }
19331933

1934+
DEVICE_EXTERN_C_INLINE
1935+
float __imf_frcp_rd(float x) { return __devicelib_imf_fdiv_rd(1.0f, x); }
1936+
19341937
DEVICE_EXTERN_C_INLINE
19351938
float __devicelib_imf_fdiv_rn(float, float);
19361939

19371940
DEVICE_EXTERN_C_INLINE
19381941
float __imf_fdiv_rn(float x, float y) { return __devicelib_imf_fdiv_rn(x, y); }
19391942

1943+
DEVICE_EXTERN_C_INLINE
1944+
float __imf_frcp_rn(float x) { return __devicelib_imf_fdiv_rn(1.0f, x); }
1945+
19401946
DEVICE_EXTERN_C_INLINE
19411947
float __devicelib_imf_fdiv_ru(float, float);
19421948

19431949
DEVICE_EXTERN_C_INLINE
19441950
float __imf_fdiv_ru(float x, float y) { return __devicelib_imf_fdiv_ru(x, y); }
19451951

1952+
DEVICE_EXTERN_C_INLINE
1953+
float __imf_frcp_ru(float x) { return __devicelib_imf_fdiv_ru(1.0f, x); }
1954+
19461955
DEVICE_EXTERN_C_INLINE
19471956
float __devicelib_imf_fdiv_rz(float, float);
19481957

19491958
DEVICE_EXTERN_C_INLINE
19501959
float __imf_fdiv_rz(float x, float y) { return __devicelib_imf_fdiv_rz(x, y); }
1960+
1961+
DEVICE_EXTERN_C_INLINE
1962+
float __imf_frcp_rz(float x) { return __devicelib_imf_fdiv_rz(1.0f, x); }
19511963
#endif // __LIBDEVICE_IMF_ENABLED__

libdevice/imf_wrapper_fp64.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,9 @@ double __imf_ddiv_rd(double x, double y) {
482482
return __devicelib_imf_ddiv_rd(x, y);
483483
}
484484

485+
DEVICE_EXTERN_C_INLINE
486+
double __imf_drcp_rd(double x) { return __devicelib_imf_ddiv_rd(1.0, x); }
487+
485488
DEVICE_EXTERN_C_INLINE
486489
double __devicelib_imf_ddiv_rn(double, double);
487490

@@ -490,6 +493,9 @@ double __imf_ddiv_rn(double x, double y) {
490493
return __devicelib_imf_ddiv_rn(x, y);
491494
}
492495

496+
DEVICE_EXTERN_C_INLINE
497+
double __imf_drcp_rn(double x) { return __devicelib_imf_ddiv_rn(1.0, x); }
498+
493499
DEVICE_EXTERN_C_INLINE
494500
double __devicelib_imf_ddiv_ru(double, double);
495501

@@ -498,11 +504,17 @@ double __imf_ddiv_ru(double x, double y) {
498504
return __devicelib_imf_ddiv_ru(x, y);
499505
}
500506

507+
DEVICE_EXTERN_C_INLINE
508+
double __imf_drcp_ru(double x) { return __devicelib_imf_ddiv_ru(1.0, x); }
509+
501510
DEVICE_EXTERN_C_INLINE
502511
double __devicelib_imf_ddiv_rz(double, double);
503512

504513
DEVICE_EXTERN_C_INLINE
505514
double __imf_ddiv_rz(double x, double y) {
506515
return __devicelib_imf_ddiv_rz(x, y);
507516
}
517+
518+
DEVICE_EXTERN_C_INLINE
519+
double __imf_drcp_rz(double x) { return __devicelib_imf_ddiv_rz(1.0, x); }
508520
#endif // __LIBDEVICE_IMF_ENABLED__

sycl/include/sycl/builtins.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rd(float x, float y);
107107
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rn(float x, float y);
108108
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_ru(float x, float y);
109109
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rz(float x, float y);
110+
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rd(float x);
111+
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rn(float x);
112+
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_ru(float x);
113+
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rz(float x);
110114
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x);
111115
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x);
112116
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x);
@@ -336,6 +340,10 @@ extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rd(double x, double y);
336340
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rn(double x, double y);
337341
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_ru(double x, double y);
338342
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rz(double x, double y);
343+
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rd(double x);
344+
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rn(double x);
345+
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_ru(double x);
346+
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rz(double x);
339347
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rd(double x);
340348
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rn(double x);
341349
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_ru(double x);

sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ float __imf_fdiv_rz(float, float);
2727
float __imf_fdiv_rn(float, float);
2828
float __imf_fdiv_ru(float, float);
2929
float __imf_fdiv_rd(float, float);
30+
float __imf_frcp_rz(float);
31+
float __imf_frcp_rn(float);
32+
float __imf_frcp_ru(float);
33+
float __imf_frcp_rd(float);
3034

3135
double __imf_dadd_rz(double, double);
3236
double __imf_dadd_rn(double, double);
@@ -44,6 +48,10 @@ double __imf_ddiv_rz(double, double);
4448
double __imf_ddiv_rn(double, double);
4549
double __imf_ddiv_ru(double, double);
4650
double __imf_ddiv_rd(double, double);
51+
double __imf_drcp_rz(double);
52+
double __imf_drcp_rn(double);
53+
double __imf_drcp_ru(double);
54+
double __imf_drcp_rd(double);
4755
};
4856

4957
namespace sycl {
@@ -114,6 +122,14 @@ template <typename Tp = float> Tp fdiv_rz(Tp x, Tp y) {
114122
return __imf_fdiv_rz(x, y);
115123
}
116124

125+
template <typename Tp = float> Tp frcp_rd(Tp x) { return __imf_frcp_rd(x); }
126+
127+
template <typename Tp = float> Tp frcp_rn(Tp x) { return __imf_frcp_rn(x); }
128+
129+
template <typename Tp = float> Tp frcp_ru(Tp x) { return __imf_frcp_ru(x); }
130+
131+
template <typename Tp = float> Tp frcp_rz(Tp x) { return __imf_frcp_rz(x); }
132+
117133
template <typename Tp = double> Tp dadd_rd(Tp x, Tp y) {
118134
return __imf_dadd_rd(x, y);
119135
}
@@ -177,6 +193,14 @@ template <typename Tp = double> Tp ddiv_ru(Tp x, Tp y) {
177193
template <typename Tp = double> Tp ddiv_rz(Tp x, Tp y) {
178194
return __imf_ddiv_rz(x, y);
179195
}
196+
197+
template <typename Tp = double> Tp drcp_rd(Tp x) { return __imf_drcp_rd(x); }
198+
199+
template <typename Tp = double> Tp drcp_rn(Tp x) { return __imf_drcp_rn(x); }
200+
201+
template <typename Tp = double> Tp drcp_ru(Tp x) { return __imf_drcp_ru(x); }
202+
203+
template <typename Tp = double> Tp drcp_rz(Tp x) { return __imf_drcp_rz(x); }
180204
} // namespace ext::intel::math
181205
} // namespace _V1
182206
} // namespace sycl

sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,35 @@ int main(int, char **) {
121121
std::cout << "sycl::ext::intel::math::fdiv_rz passes." << std::endl;
122122
}
123123

124+
{
125+
std::initializer_list<float> input_vals = {
126+
0x1.ba90e6p+1, 0x1.4p+1, 0x1.ea77e6p-2, 0x1.e8330ap+19,
127+
-0x1.4ffd68p+5, -0x1.443084p-15, 0x1.605fb2p+6, -0x1.2eb718p-7};
128+
std::initializer_list<unsigned> ref_vals_rd = {
129+
0x3e9414f5, 0x3ecccccc, 0x40059e85, 0x35863d80,
130+
0xbcc30db3, 0xc6ca2743, 0x3c39fbfb, 0xc2d87e72};
131+
std::initializer_list<unsigned> ref_vals_rn = {
132+
0x3e9414f5, 0x3ecccccd, 0x40059e85, 0x35863d80,
133+
0xbcc30db2, 0xc6ca2743, 0x3c39fbfc, 0xc2d87e71};
134+
std::initializer_list<unsigned> ref_vals_ru = {
135+
0x3e9414f6, 0x3ecccccd, 0x40059e86, 0x35863d81,
136+
0xbcc30db2, 0xc6ca2742, 0x3c39fbfc, 0xc2d87e71};
137+
std::initializer_list<unsigned> ref_vals_rz = {
138+
0x3e9414f5, 0x3ecccccc, 0x40059e85, 0x35863d80,
139+
0xbcc30db2, 0xc6ca2742, 0x3c39fbfb, 0xc2d87e71};
140+
test(device_queue, input_vals, ref_vals_rd,
141+
FT(unsigned, sycl::ext::intel::math::frcp_rd));
142+
std::cout << "sycl::ext::intel::math::frcp_rd passes." << std::endl;
143+
test(device_queue, input_vals, ref_vals_rn,
144+
FT(unsigned, sycl::ext::intel::math::frcp_rn));
145+
std::cout << "sycl::ext::intel::math::frcp_rn passes." << std::endl;
146+
test(device_queue, input_vals, ref_vals_ru,
147+
FT(unsigned, sycl::ext::intel::math::frcp_ru));
148+
std::cout << "sycl::ext::intel::math::frcp_ru passes." << std::endl;
149+
test(device_queue, input_vals, ref_vals_rz,
150+
FT(unsigned, sycl::ext::intel::math::frcp_rz));
151+
std::cout << "sycl::ext::intel::math::frcp_rz passes." << std::endl;
152+
}
153+
124154
return 0;
125155
}

sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,5 +146,34 @@ int main(int, char **) {
146146
std::cout << "sycl::ext::intel::math::ddiv_rz passes." << std::endl;
147147
}
148148

149+
{
150+
std::initializer_list<double> input_vals1 = {
151+
0x1p+2, 0x1.fbd37afb0f8edp-1, 0x1.9238e38e38e35p+6, 0x1.7p+3};
152+
std::initializer_list<unsigned long long> ref_vals_rd = {
153+
0x3fd0000000000000, 0x3ff021aa6a60809b, 0x3f845de9ef97e71e,
154+
0x3fb642c8590b2164};
155+
std::initializer_list<unsigned long long> ref_vals_rn = {
156+
0x3fd0000000000000, 0x3ff021aa6a60809c, 0x3f845de9ef97e71f,
157+
0x3fb642c8590b2164};
158+
std::initializer_list<unsigned long long> ref_vals_ru = {
159+
0x3fd0000000000000, 0x3ff021aa6a60809c, 0x3f845de9ef97e71f,
160+
0x3fb642c8590b2165};
161+
std::initializer_list<unsigned long long> ref_vals_rz = {
162+
0x3fd0000000000000, 0x3ff021aa6a60809b, 0x3f845de9ef97e71e,
163+
0x3fb642c8590b2164};
164+
test(device_queue, input_vals1, ref_vals_rd,
165+
FT(unsigned long long, sycl::ext::intel::math::drcp_rd));
166+
std::cout << "sycl::ext::intel::math::drcp_rd passes." << std::endl;
167+
test(device_queue, input_vals1, ref_vals_rn,
168+
FT(unsigned long long, sycl::ext::intel::math::drcp_rn));
169+
std::cout << "sycl::ext::intel::math::drcp_rn passes." << std::endl;
170+
test(device_queue, input_vals1, ref_vals_ru,
171+
FT(unsigned long long, sycl::ext::intel::math::drcp_ru));
172+
std::cout << "sycl::ext::intel::math::drcp_ru passes." << std::endl;
173+
test(device_queue, input_vals1, ref_vals_rz,
174+
FT(unsigned long long, sycl::ext::intel::math::drcp_rz));
175+
std::cout << "sycl::ext::intel::math::drcp_rz passes." << std::endl;
176+
}
177+
149178
return 0;
150179
}

0 commit comments

Comments
 (0)