Skip to content

[SYCL] Add rcp for fp32 and fp64 with rounding mode supported #11768

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions libdevice/imf_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1931,21 +1931,33 @@ float __devicelib_imf_fdiv_rd(float, float);
DEVICE_EXTERN_C_INLINE
float __imf_fdiv_rd(float x, float y) { return __devicelib_imf_fdiv_rd(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_rd(float x) { return __devicelib_imf_fdiv_rd(1.0f, x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fdiv_rn(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fdiv_rn(float x, float y) { return __devicelib_imf_fdiv_rn(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_rn(float x) { return __devicelib_imf_fdiv_rn(1.0f, x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fdiv_ru(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fdiv_ru(float x, float y) { return __devicelib_imf_fdiv_ru(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_ru(float x) { return __devicelib_imf_fdiv_ru(1.0f, x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fdiv_rz(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fdiv_rz(float x, float y) { return __devicelib_imf_fdiv_rz(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_rz(float x) { return __devicelib_imf_fdiv_rz(1.0f, x); }
#endif // __LIBDEVICE_IMF_ENABLED__
12 changes: 12 additions & 0 deletions libdevice/imf_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,9 @@ double __imf_ddiv_rd(double x, double y) {
return __devicelib_imf_ddiv_rd(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_rd(double x) { return __devicelib_imf_ddiv_rd(1.0, x); }

DEVICE_EXTERN_C_INLINE
double __devicelib_imf_ddiv_rn(double, double);

Expand All @@ -490,6 +493,9 @@ double __imf_ddiv_rn(double x, double y) {
return __devicelib_imf_ddiv_rn(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_rn(double x) { return __devicelib_imf_ddiv_rn(1.0, x); }

DEVICE_EXTERN_C_INLINE
double __devicelib_imf_ddiv_ru(double, double);

Expand All @@ -498,11 +504,17 @@ double __imf_ddiv_ru(double x, double y) {
return __devicelib_imf_ddiv_ru(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_ru(double x) { return __devicelib_imf_ddiv_ru(1.0, x); }

DEVICE_EXTERN_C_INLINE
double __devicelib_imf_ddiv_rz(double, double);

DEVICE_EXTERN_C_INLINE
double __imf_ddiv_rz(double x, double y) {
return __devicelib_imf_ddiv_rz(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_rz(double x) { return __devicelib_imf_ddiv_rz(1.0, x); }
#endif // __LIBDEVICE_IMF_ENABLED__
8 changes: 8 additions & 0 deletions sycl/include/sycl/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rd(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rn(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_ru(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rz(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rd(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rn(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_ru(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rz(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x);
Expand Down Expand Up @@ -336,6 +340,10 @@ extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rd(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rn(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_ru(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rz(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rd(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rn(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_ru(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rz(double x);
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rd(double x);
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rn(double x);
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_ru(double x);
Expand Down
24 changes: 24 additions & 0 deletions sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ float __imf_fdiv_rz(float, float);
float __imf_fdiv_rn(float, float);
float __imf_fdiv_ru(float, float);
float __imf_fdiv_rd(float, float);
float __imf_frcp_rz(float);
float __imf_frcp_rn(float);
float __imf_frcp_ru(float);
float __imf_frcp_rd(float);

double __imf_dadd_rz(double, double);
double __imf_dadd_rn(double, double);
Expand All @@ -44,6 +48,10 @@ double __imf_ddiv_rz(double, double);
double __imf_ddiv_rn(double, double);
double __imf_ddiv_ru(double, double);
double __imf_ddiv_rd(double, double);
double __imf_drcp_rz(double);
double __imf_drcp_rn(double);
double __imf_drcp_ru(double);
double __imf_drcp_rd(double);
};

namespace sycl {
Expand Down Expand Up @@ -114,6 +122,14 @@ template <typename Tp = float> Tp fdiv_rz(Tp x, Tp y) {
return __imf_fdiv_rz(x, y);
}

template <typename Tp = float> Tp frcp_rd(Tp x) { return __imf_frcp_rd(x); }

template <typename Tp = float> Tp frcp_rn(Tp x) { return __imf_frcp_rn(x); }

template <typename Tp = float> Tp frcp_ru(Tp x) { return __imf_frcp_ru(x); }

template <typename Tp = float> Tp frcp_rz(Tp x) { return __imf_frcp_rz(x); }

template <typename Tp = double> Tp dadd_rd(Tp x, Tp y) {
return __imf_dadd_rd(x, y);
}
Expand Down Expand Up @@ -177,6 +193,14 @@ template <typename Tp = double> Tp ddiv_ru(Tp x, Tp y) {
template <typename Tp = double> Tp ddiv_rz(Tp x, Tp y) {
return __imf_ddiv_rz(x, y);
}

template <typename Tp = double> Tp drcp_rd(Tp x) { return __imf_drcp_rd(x); }

template <typename Tp = double> Tp drcp_rn(Tp x) { return __imf_drcp_rn(x); }

template <typename Tp = double> Tp drcp_ru(Tp x) { return __imf_drcp_ru(x); }

template <typename Tp = double> Tp drcp_rz(Tp x) { return __imf_drcp_rz(x); }
} // namespace ext::intel::math
} // namespace _V1
} // namespace sycl
30 changes: 30 additions & 0 deletions sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,35 @@ int main(int, char **) {
std::cout << "sycl::ext::intel::math::fdiv_rz passes." << std::endl;
}

{
std::initializer_list<float> input_vals = {
0x1.ba90e6p+1, 0x1.4p+1, 0x1.ea77e6p-2, 0x1.e8330ap+19,
-0x1.4ffd68p+5, -0x1.443084p-15, 0x1.605fb2p+6, -0x1.2eb718p-7};
std::initializer_list<unsigned> ref_vals_rd = {
0x3e9414f5, 0x3ecccccc, 0x40059e85, 0x35863d80,
0xbcc30db3, 0xc6ca2743, 0x3c39fbfb, 0xc2d87e72};
std::initializer_list<unsigned> ref_vals_rn = {
0x3e9414f5, 0x3ecccccd, 0x40059e85, 0x35863d80,
0xbcc30db2, 0xc6ca2743, 0x3c39fbfc, 0xc2d87e71};
std::initializer_list<unsigned> ref_vals_ru = {
0x3e9414f6, 0x3ecccccd, 0x40059e86, 0x35863d81,
0xbcc30db2, 0xc6ca2742, 0x3c39fbfc, 0xc2d87e71};
std::initializer_list<unsigned> ref_vals_rz = {
0x3e9414f5, 0x3ecccccc, 0x40059e85, 0x35863d80,
0xbcc30db2, 0xc6ca2742, 0x3c39fbfb, 0xc2d87e71};
test(device_queue, input_vals, ref_vals_rd,
FT(unsigned, sycl::ext::intel::math::frcp_rd));
std::cout << "sycl::ext::intel::math::frcp_rd passes." << std::endl;
test(device_queue, input_vals, ref_vals_rn,
FT(unsigned, sycl::ext::intel::math::frcp_rn));
std::cout << "sycl::ext::intel::math::frcp_rn passes." << std::endl;
test(device_queue, input_vals, ref_vals_ru,
FT(unsigned, sycl::ext::intel::math::frcp_ru));
std::cout << "sycl::ext::intel::math::frcp_ru passes." << std::endl;
test(device_queue, input_vals, ref_vals_rz,
FT(unsigned, sycl::ext::intel::math::frcp_rz));
std::cout << "sycl::ext::intel::math::frcp_rz passes." << std::endl;
}

return 0;
}
29 changes: 29 additions & 0 deletions sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,34 @@ int main(int, char **) {
std::cout << "sycl::ext::intel::math::ddiv_rz passes." << std::endl;
}

{
std::initializer_list<double> input_vals1 = {
0x1p+2, 0x1.fbd37afb0f8edp-1, 0x1.9238e38e38e35p+6, 0x1.7p+3};
std::initializer_list<unsigned long long> ref_vals_rd = {
0x3fd0000000000000, 0x3ff021aa6a60809b, 0x3f845de9ef97e71e,
0x3fb642c8590b2164};
std::initializer_list<unsigned long long> ref_vals_rn = {
0x3fd0000000000000, 0x3ff021aa6a60809c, 0x3f845de9ef97e71f,
0x3fb642c8590b2164};
std::initializer_list<unsigned long long> ref_vals_ru = {
0x3fd0000000000000, 0x3ff021aa6a60809c, 0x3f845de9ef97e71f,
0x3fb642c8590b2165};
std::initializer_list<unsigned long long> ref_vals_rz = {
0x3fd0000000000000, 0x3ff021aa6a60809b, 0x3f845de9ef97e71e,
0x3fb642c8590b2164};
test(device_queue, input_vals1, ref_vals_rd,
FT(unsigned long long, sycl::ext::intel::math::drcp_rd));
std::cout << "sycl::ext::intel::math::drcp_rd passes." << std::endl;
test(device_queue, input_vals1, ref_vals_rn,
FT(unsigned long long, sycl::ext::intel::math::drcp_rn));
std::cout << "sycl::ext::intel::math::drcp_rn passes." << std::endl;
test(device_queue, input_vals1, ref_vals_ru,
FT(unsigned long long, sycl::ext::intel::math::drcp_ru));
std::cout << "sycl::ext::intel::math::drcp_ru passes." << std::endl;
test(device_queue, input_vals1, ref_vals_rz,
FT(unsigned long long, sycl::ext::intel::math::drcp_rz));
std::cout << "sycl::ext::intel::math::drcp_rz passes." << std::endl;
}

return 0;
}