Skip to content

Commit bd81fc4

Browse files
[SYCL] Use pair of native::sin/cos for sincos under __FAST_MATH__ (#10481)
1 parent 5eef8c7 commit bd81fc4

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

sycl/include/sycl/builtins.hpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,8 @@ std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> sin(T x) __NOEXC {
734734

735735
// svgenfloat sincos (svgenfloat x, genfloatptr cosval)
736736
template <typename T, typename T2>
737-
std::enable_if_t<
738-
detail::is_svgenfloat<T>::value && detail::is_genfloatptr<T2>::value, T>
737+
std::enable_if_t<__FAST_MATH_GENFLOAT(T) && detail::is_genfloatptr<T2>::value,
738+
T>
739739
sincos(T x, T2 cosval) __NOEXC {
740740
detail::check_vector_size<T, T2>();
741741
return __sycl_std::__invoke_sincos<T>(x, cosval);
@@ -2500,6 +2500,23 @@ std::enable_if_t<detail::is_svgenfloatf<T>::value, T> cos(T x) __NOEXC {
25002500
return native::cos(x);
25012501
}
25022502

2503+
// svgenfloat sincos (svgenfloat x, genfloatptr cosval)
2504+
// This is a performance optimization to ensure that sincos isn't slower than a
2505+
// pair of sin/cos executed separately. Theoretically, calling non-native sincos
2506+
// might be faster than calling native::sin plus native::cos separately and we'd
2507+
// need some kind of cost model to make the right decision (and move this
2508+
// entirely to the JIT/AOT compilers). However, in practice, this simpler
2509+
// solution seems to work just fine and matches how sin/cos above are optimized
2510+
// for the fast math path.
2511+
template <typename T, typename T2>
2512+
std::enable_if_t<
2513+
detail::is_svgenfloatf<T>::value && detail::is_genfloatptr<T2>::value, T>
2514+
sincos(T x, T2 cosval) __NOEXC {
2515+
detail::check_vector_size<T, T2>();
2516+
*cosval = native::cos(x);
2517+
return native::sin(x);
2518+
}
2519+
25032520
// svgenfloatf exp (svgenfloatf x)
25042521
template <typename T>
25052522
std::enable_if_t<detail::is_svgenfloatf<T>::value, T> exp(T x) __NOEXC {

0 commit comments

Comments
 (0)