Add ceil,floor,rint,sqrt,rsqrt,trunc to sycl_ext_intel_math hpp (#7429)

jinge90 · web-flow · commit 1b7582bb9139 · 2022-11-18T08:54:24.000-08:00
Add following imf functions to sycl/ext/intel/math.hpp:
ceil, floor, trunc, rint, sqrt, rsqrt
Those functions are in sycl::ext::intel::math:: namespace and supports
float, double, half, half2.
Those C++ functions are just wrappers of __imf_* functions implemented
in SYCL libdevice.

Signed-off-by: jinge90 &lt;ge.jin@intel.com&gt;
diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
@@ -27,6 +27,24 @@ float __imf_saturatef(float);
 float __imf_copysignf(float, float);
 double __imf_copysign(double, double);
 _iml_half_internal __imf_copysignf16(_iml_half_internal, _iml_half_internal);
+float __imf_ceilf(float);
+double __imf_ceil(double);
+_iml_half_internal __imf_ceilf16(_iml_half_internal);
+float __imf_floorf(float);
+double __imf_floor(double);
+_iml_half_internal __imf_floorf16(_iml_half_internal);
+float __imf_rintf(float);
+double __imf_rint(double);
+_iml_half_internal __imf_rintf16(_iml_half_internal);
+float __imf_sqrtf(float);
+double __imf_sqrt(double);
+_iml_half_internal __imf_sqrtf16(_iml_half_internal);
+float __imf_rsqrtf(float);
+double __imf_rsqrt(double);
+_iml_half_internal __imf_rsqrtf16(_iml_half_internal);
+float __imf_truncf(float);
+double __imf_trunc(double);
+_iml_half_internal __imf_truncf16(_iml_half_internal);
 };
 
 namespace sycl {
@@ -36,6 +54,10 @@ namespace intel {
 namespace math {
 
 #if __cplusplus >= 201703L
+
+static_assert(sizeof(sycl::half) == sizeof(_iml_half_internal),
+              "sycl::half is not compatible with _iml_half_internal.");
+
 template <typename Tp>
 std::enable_if_t<std::is_same_v<Tp, float>, float> saturate(Tp x) {
   return __imf_saturatef(x);
@@ -54,13 +76,131 @@ std::enable_if_t<std::is_same_v<Tp, double>, double> copysign(Tp x, Tp y) {
 template <typename Tp>
 std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> copysign(Tp x,
                                                                       Tp y) {
-  static_assert(sizeof(sycl::half) == sizeof(_iml_half_internal),
-                "sycl::half is not compatible with _iml_half_internal.");
   _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
   _iml_half_internal yi = __builtin_bit_cast(_iml_half_internal, y);
   return __builtin_bit_cast(sycl::half, __imf_copysignf16(xi, yi));
 }
 
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, float>, float> ceil(Tp x) {
+  return __imf_ceilf(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, double>, double> ceil(Tp x) {
+  return __imf_ceil(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> ceil(Tp x) {
+  _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+  return __builtin_bit_cast(sycl::half, __imf_ceilf16(xi));
+}
+
+sycl::half2 ceil(sycl::half2 x) {
+  return sycl::half2{ceil(x.s0()), ceil(x.s1())};
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, float>, float> floor(Tp x) {
+  return __imf_floorf(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, double>, double> floor(Tp x) {
+  return __imf_floor(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> floor(Tp x) {
+  _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+  return __builtin_bit_cast(sycl::half, __imf_floorf16(xi));
+}
+
+sycl::half2 floor(sycl::half2 x) {
+  return sycl::half2{floor(x.s0()), floor(x.s1())};
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, float>, float> rint(Tp x) {
+  return __imf_rintf(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, double>, double> rint(Tp x) {
+  return __imf_rint(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> rint(Tp x) {
+  _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+  return __builtin_bit_cast(sycl::half, __imf_rintf16(xi));
+}
+
+sycl::half2 rint(sycl::half2 x) {
+  return sycl::half2{rint(x.s0()), rint(x.s1())};
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, float>, float> sqrt(Tp x) {
+  return __imf_sqrtf(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, double>, double> sqrt(Tp x) {
+  return __imf_sqrt(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> sqrt(Tp x) {
+  _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+  return __builtin_bit_cast(sycl::half, __imf_sqrtf16(xi));
+}
+
+sycl::half2 sqrt(sycl::half2 x) {
+  return sycl::half2{sqrt(x.s0()), sqrt(x.s1())};
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, float>, float> rsqrt(Tp x) {
+  return __imf_rsqrtf(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, double>, double> rsqrt(Tp x) {
+  return __imf_rsqrt(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> rsqrt(Tp x) {
+  _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+  return __builtin_bit_cast(sycl::half, __imf_rsqrtf16(xi));
+}
+
+sycl::half2 rsqrt(sycl::half2 x) {
+  return sycl::half2{rsqrt(x.s0()), rsqrt(x.s1())};
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, float>, float> trunc(Tp x) {
+  return __imf_truncf(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, double>, double> trunc(Tp x) {
+  return __imf_trunc(x);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> trunc(Tp x) {
+  _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+  return __builtin_bit_cast(sycl::half, __imf_truncf16(xi));
+}
+
+sycl::half2 trunc(sycl::half2 x) {
+  return sycl::half2{trunc(x.s0()), trunc(x.s1())};
+}
+
 #endif
 } // namespace math
 } // namespace intel