Skip to content

Commit b0bfdfa

Browse files
authored
[SYCL] Add imf simd emulation APIs to sycl_ext_intel_math (#8262)
CUDA math provides a series of simd intrinsic: https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__SIMD.html#group__CUDA__MATH__INTRINSIC__SIMD We provided corresponding APIs in SYCL libdevice which emulates the behaviors of these CUDA simd intrinsic. The PR adds these APIs to sycl_ext_intel_math header file, so users can invoke them when porting CUDA code to SYCL. --------- Signed-off-by: jinge90 <[email protected]>
1 parent e60c549 commit b0bfdfa

File tree

2 files changed

+605
-7
lines changed

2 files changed

+605
-7
lines changed

sycl/include/sycl/ext/intel/math.hpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#pragma once
1212
#include <sycl/builtins.hpp>
1313
#include <sycl/ext/intel/math/imf_half_trivial.hpp>
14+
#include <sycl/ext/intel/math/imf_simd.hpp>
1415
#include <sycl/half_type.hpp>
1516
#include <type_traits>
1617

@@ -97,7 +98,8 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> ceil(Tp x) {
9798
return __builtin_bit_cast(sycl::half, __imf_ceilf16(xi));
9899
}
99100

100-
sycl::half2 ceil(sycl::half2 x) {
101+
template <typename Tp>
102+
std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> ceil(Tp x) {
101103
return sycl::half2{ceil(x.s0()), ceil(x.s1())};
102104
}
103105

@@ -117,7 +119,8 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> floor(Tp x) {
117119
return __builtin_bit_cast(sycl::half, __imf_floorf16(xi));
118120
}
119121

120-
sycl::half2 floor(sycl::half2 x) {
122+
template <typename Tp>
123+
std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> floor(Tp x) {
121124
return sycl::half2{floor(x.s0()), floor(x.s1())};
122125
}
123126

@@ -137,7 +140,10 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> inv(Tp x) {
137140
return __builtin_bit_cast(sycl::half, __imf_invf16(xi));
138141
}
139142

140-
sycl::half2 inv(sycl::half2 x) { return sycl::half2{inv(x.s0()), inv(x.s1())}; }
143+
template <typename Tp>
144+
std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> inv(Tp x) {
145+
return sycl::half2{inv(x.s0()), inv(x.s1())};
146+
}
141147

142148
template <typename Tp>
143149
std::enable_if_t<std::is_same_v<Tp, float>, float> rint(Tp x) {
@@ -155,7 +161,8 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> rint(Tp x) {
155161
return __builtin_bit_cast(sycl::half, __imf_rintf16(xi));
156162
}
157163

158-
sycl::half2 rint(sycl::half2 x) {
164+
template <typename Tp>
165+
std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> rint(Tp x) {
159166
return sycl::half2{rint(x.s0()), rint(x.s1())};
160167
}
161168

@@ -175,7 +182,8 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> sqrt(Tp x) {
175182
return __builtin_bit_cast(sycl::half, __imf_sqrtf16(xi));
176183
}
177184

178-
sycl::half2 sqrt(sycl::half2 x) {
185+
template <typename Tp>
186+
std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> sqrt(Tp x) {
179187
return sycl::half2{sqrt(x.s0()), sqrt(x.s1())};
180188
}
181189

@@ -195,7 +203,8 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> rsqrt(Tp x) {
195203
return __builtin_bit_cast(sycl::half, __imf_rsqrtf16(xi));
196204
}
197205

198-
sycl::half2 rsqrt(sycl::half2 x) {
206+
template <typename Tp>
207+
std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> rsqrt(Tp x) {
199208
return sycl::half2{rsqrt(x.s0()), rsqrt(x.s1())};
200209
}
201210

@@ -215,9 +224,11 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> trunc(Tp x) {
215224
return __builtin_bit_cast(sycl::half, __imf_truncf16(xi));
216225
}
217226

218-
sycl::half2 trunc(sycl::half2 x) {
227+
template <typename Tp>
228+
std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> trunc(Tp x) {
219229
return sycl::half2{trunc(x.s0()), trunc(x.s1())};
220230
}
231+
221232
} // namespace ext::intel::math
222233
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
223234
} // namespace sycl

0 commit comments

Comments
 (0)