@@ -39,6 +39,135 @@ detail::enable_if_t<detail::is_genfloat<T>::value, T> acos(T x) __NOEXC {
39
39
return __sycl_std::__invoke_acos<T>(x);
40
40
}
41
41
42
+ #define __SYCL_MATH_FUNCTION_OVERLOAD (NAME ) \
43
+ template <typename T, size_t N> \
44
+ inline __SYCL_ALWAYS_INLINE std::enable_if_t < \
45
+ std::is_same<T, half>::value || std::is_same<T, float >::value || \
46
+ std::is_same<T, double >::value, \
47
+ sycl::marray<T, N>> \
48
+ NAME (sycl::marray<T, N> x) __NOEXC { \
49
+ sycl::marray<T, N> res; \
50
+ auto x_vec2 = reinterpret_cast <sycl::vec<T, 2 > const *>(&x); \
51
+ auto res_vec2 = reinterpret_cast <sycl::vec<T, 2 > *>(&res); \
52
+ for (size_t i = 0 ; i < N / 2 ; i++) { \
53
+ res_vec2[i] = __sycl_std::__invoke_##NAME<sycl::vec<T, 2 >>(x_vec2[i]); \
54
+ } \
55
+ if (N % 2 ) { \
56
+ res[N - 1 ] = __sycl_std::__invoke_##NAME<T>(x[N - 1 ]); \
57
+ } \
58
+ return res; \
59
+ }
60
+
61
+ __SYCL_MATH_FUNCTION_OVERLOAD (sin)
62
+ __SYCL_MATH_FUNCTION_OVERLOAD (cos)
63
+ __SYCL_MATH_FUNCTION_OVERLOAD (tan)
64
+ __SYCL_MATH_FUNCTION_OVERLOAD (cospi)
65
+ __SYCL_MATH_FUNCTION_OVERLOAD (sinpi)
66
+ __SYCL_MATH_FUNCTION_OVERLOAD (tanpi)
67
+ __SYCL_MATH_FUNCTION_OVERLOAD (sinh)
68
+ __SYCL_MATH_FUNCTION_OVERLOAD (cosh)
69
+ __SYCL_MATH_FUNCTION_OVERLOAD (tanh)
70
+ __SYCL_MATH_FUNCTION_OVERLOAD (asin)
71
+ __SYCL_MATH_FUNCTION_OVERLOAD (acos)
72
+ __SYCL_MATH_FUNCTION_OVERLOAD (atan)
73
+ __SYCL_MATH_FUNCTION_OVERLOAD (asinpi)
74
+ __SYCL_MATH_FUNCTION_OVERLOAD (acospi)
75
+ __SYCL_MATH_FUNCTION_OVERLOAD (atanpi)
76
+ __SYCL_MATH_FUNCTION_OVERLOAD (asinh)
77
+ __SYCL_MATH_FUNCTION_OVERLOAD (acosh)
78
+ __SYCL_MATH_FUNCTION_OVERLOAD (atanh)
79
+ __SYCL_MATH_FUNCTION_OVERLOAD (cbrt)
80
+ __SYCL_MATH_FUNCTION_OVERLOAD (ceil)
81
+ __SYCL_MATH_FUNCTION_OVERLOAD (floor)
82
+ __SYCL_MATH_FUNCTION_OVERLOAD (erfc)
83
+ __SYCL_MATH_FUNCTION_OVERLOAD (erf)
84
+ __SYCL_MATH_FUNCTION_OVERLOAD (exp)
85
+ __SYCL_MATH_FUNCTION_OVERLOAD (exp2)
86
+ __SYCL_MATH_FUNCTION_OVERLOAD (exp10)
87
+ __SYCL_MATH_FUNCTION_OVERLOAD (expm1)
88
+ __SYCL_MATH_FUNCTION_OVERLOAD (tgamma)
89
+ __SYCL_MATH_FUNCTION_OVERLOAD (lgamma)
90
+ __SYCL_MATH_FUNCTION_OVERLOAD (log)
91
+ __SYCL_MATH_FUNCTION_OVERLOAD (log2)
92
+ __SYCL_MATH_FUNCTION_OVERLOAD (log10)
93
+ __SYCL_MATH_FUNCTION_OVERLOAD (log1p)
94
+ __SYCL_MATH_FUNCTION_OVERLOAD (logb)
95
+ __SYCL_MATH_FUNCTION_OVERLOAD (rint)
96
+ __SYCL_MATH_FUNCTION_OVERLOAD (round)
97
+ __SYCL_MATH_FUNCTION_OVERLOAD (sqrt)
98
+ __SYCL_MATH_FUNCTION_OVERLOAD (rsqrt)
99
+ __SYCL_MATH_FUNCTION_OVERLOAD (trunc)
100
+
101
+ #undef __SYCL_MATH_FUNCTION_OVERLOAD
102
+
103
+ #define __SYCL_MATH_FUNCTION_2_OVERLOAD (NAME ) \
104
+ template <typename T, size_t N> \
105
+ inline __SYCL_ALWAYS_INLINE std::enable_if_t < \
106
+ std::is_same<T, half>::value || std::is_same<T, float >::value || \
107
+ std::is_same<T, double >::value, \
108
+ sycl::marray<T, N>> \
109
+ NAME (sycl::marray<T, N> x, sycl::marray<T, N> y) __NOEXC { \
110
+ sycl::marray<T, N> res; \
111
+ auto x_vec2 = reinterpret_cast <sycl::vec<T, 2 > const *>(&x); \
112
+ auto y_vec2 = reinterpret_cast <sycl::vec<T, 2 > const *>(&y); \
113
+ auto res_vec2 = reinterpret_cast <sycl::vec<T, 2 > *>(&res); \
114
+ for (size_t i = 0 ; i < N / 2 ; i++) { \
115
+ res_vec2[i] = \
116
+ __sycl_std::__invoke_##NAME<sycl::vec<T, 2 >>(x_vec2[i], y_vec2[i]); \
117
+ } \
118
+ if (N % 2 ) { \
119
+ res[N - 1 ] = __sycl_std::__invoke_##NAME<T>(x[N - 1 ], y[N - 1 ]); \
120
+ } \
121
+ return res; \
122
+ }
123
+
124
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (atan2)
125
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (atan2pi)
126
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (copysign)
127
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (fdim)
128
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (fmin)
129
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (fmax)
130
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (fmod)
131
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (hypot)
132
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (maxmag)
133
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (minmag)
134
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (nextafter)
135
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (pow)
136
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (powr)
137
+ __SYCL_MATH_FUNCTION_2_OVERLOAD (remainder)
138
+
139
+ #undef __SYCL_MATH_FUNCTION_2_OVERLOAD
140
+
141
+ #define __SYCL_MATH_FUNCTION_3_OVERLOAD (NAME ) \
142
+ template <typename T, size_t N> \
143
+ inline __SYCL_ALWAYS_INLINE std::enable_if_t < \
144
+ std::is_same<T, half>::value || std::is_same<T, float >::value || \
145
+ std::is_same<T, double >::value, \
146
+ sycl::marray<T, N>> \
147
+ NAME (sycl::marray<T, N> x, sycl::marray<T, N> y, sycl::marray<T, N> z) \
148
+ __NOEXC { \
149
+ sycl::marray<T, N> res; \
150
+ auto x_vec2 = reinterpret_cast <sycl::vec<T, 2 > const *>(&x); \
151
+ auto y_vec2 = reinterpret_cast <sycl::vec<T, 2 > const *>(&y); \
152
+ auto z_vec2 = reinterpret_cast <sycl::vec<T, 2 > const *>(&z); \
153
+ auto res_vec2 = reinterpret_cast <sycl::vec<T, 2 > *>(&res); \
154
+ for (size_t i = 0 ; i < N / 2 ; i++) { \
155
+ res_vec2[i] = __sycl_std::__invoke_##NAME<sycl::vec<T, 2 >>( \
156
+ x_vec2[i], y_vec2[i], z_vec2[i]); \
157
+ } \
158
+ if (N % 2 ) { \
159
+ res[N - 1 ] = \
160
+ __sycl_std::__invoke_##NAME<T>(x[N - 1 ], y[N - 1 ], z[N - 1 ]); \
161
+ } \
162
+ return res; \
163
+ }
164
+
165
+ __SYCL_MATH_FUNCTION_3_OVERLOAD (mad)
166
+ __SYCL_MATH_FUNCTION_3_OVERLOAD (mix)
167
+ __SYCL_MATH_FUNCTION_3_OVERLOAD (fma)
168
+
169
+ #undef __SYCL_MATH_FUNCTION_3_OVERLOAD
170
+
42
171
// genfloat acosh (genfloat x)
43
172
template <typename T>
44
173
detail::enable_if_t <detail::is_genfloat<T>::value, T> acosh (T x) __NOEXC {
@@ -1395,6 +1524,63 @@ select(T a, T b, T2 c) __NOEXC {
1395
1524
namespace native {
1396
1525
/* ----------------- 4.13.3 Math functions. ---------------------------------*/
1397
1526
// genfloatf cos (genfloatf x)
1527
+
1528
+ #define __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (NAME ) \
1529
+ template <size_t N> \
1530
+ inline __SYCL_ALWAYS_INLINE sycl::marray<float , N> NAME ( \
1531
+ sycl::marray<float , N> x) __NOEXC { \
1532
+ sycl::marray<float , N> res; \
1533
+ auto x_vec2 = reinterpret_cast <sycl::vec<float , 2 > const *>(&x); \
1534
+ auto res_vec2 = reinterpret_cast <sycl::vec<float , 2 > *>(&res); \
1535
+ for (size_t i = 0 ; i < N / 2 ; i++) { \
1536
+ res_vec2[i] = \
1537
+ __sycl_std::__invoke_native_##NAME<sycl::vec<float , 2 >>(x_vec2[i]); \
1538
+ } \
1539
+ if (N % 2 ) { \
1540
+ res[N - 1 ] = __sycl_std::__invoke_native_##NAME<float >(x[N - 1 ]); \
1541
+ } \
1542
+ return res; \
1543
+ }
1544
+
1545
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (sin)
1546
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (cos)
1547
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (tan)
1548
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (exp)
1549
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (exp2)
1550
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (exp10)
1551
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (log)
1552
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (log2)
1553
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (log10)
1554
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (sqrt)
1555
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (rsqrt)
1556
+ __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD (recip)
1557
+
1558
+ #undef __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD
1559
+
1560
+ #define __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD (NAME ) \
1561
+ template <size_t N> \
1562
+ inline __SYCL_ALWAYS_INLINE sycl::marray<float , N> NAME ( \
1563
+ sycl::marray<float , N> x, sycl::marray<float , N> y) __NOEXC { \
1564
+ sycl::marray<float , N> res; \
1565
+ auto x_vec2 = reinterpret_cast <sycl::vec<float , 2 > const *>(&x); \
1566
+ auto y_vec2 = reinterpret_cast <sycl::vec<float , 2 > const *>(&y); \
1567
+ auto res_vec2 = reinterpret_cast <sycl::vec<float , 2 > *>(&res); \
1568
+ for (size_t i = 0 ; i < N / 2 ; i++) { \
1569
+ res_vec2[i] = __sycl_std::__invoke_native_##NAME<sycl::vec<float , 2 >>( \
1570
+ x_vec2[i], y_vec2[i]); \
1571
+ } \
1572
+ if (N % 2 ) { \
1573
+ res[N - 1 ] = \
1574
+ __sycl_std::__invoke_native_##NAME<float >(x[N - 1 ], y[N - 1 ]); \
1575
+ } \
1576
+ return res; \
1577
+ }
1578
+
1579
+ __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD (divide)
1580
+ __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD (powr)
1581
+
1582
+ #undef __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD
1583
+
1398
1584
template <typename T>
1399
1585
detail::enable_if_t <detail::is_genfloatf<T>::value, T> cos (T x) __NOEXC {
1400
1586
return __sycl_std::__invoke_native_cos<T>(x);
@@ -1482,6 +1668,62 @@ detail::enable_if_t<detail::is_genfloatf<T>::value, T> tan(T x) __NOEXC {
1482
1668
} // namespace native
1483
1669
namespace half_precision {
1484
1670
/* ----------------- 4.13.3 Math functions. ---------------------------------*/
1671
+ #define __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (NAME ) \
1672
+ template <size_t N> \
1673
+ inline __SYCL_ALWAYS_INLINE sycl::marray<float , N> NAME ( \
1674
+ sycl::marray<float , N> x) __NOEXC { \
1675
+ sycl::marray<float , N> res; \
1676
+ auto x_vec2 = reinterpret_cast <sycl::vec<float , 2 > const *>(&x); \
1677
+ auto res_vec2 = reinterpret_cast <sycl::vec<float , 2 > *>(&res); \
1678
+ for (size_t i = 0 ; i < N / 2 ; i++) { \
1679
+ res_vec2[i] = \
1680
+ __sycl_std::__invoke_half_##NAME<sycl::vec<float , 2 >>(x_vec2[i]); \
1681
+ } \
1682
+ if (N % 2 ) { \
1683
+ res[N - 1 ] = __sycl_std::__invoke_half_##NAME<float >(x[N - 1 ]); \
1684
+ } \
1685
+ return res; \
1686
+ }
1687
+
1688
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (sin)
1689
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (cos)
1690
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (tan)
1691
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (exp)
1692
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (exp2)
1693
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (exp10)
1694
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (log)
1695
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (log2)
1696
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (log10)
1697
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (sqrt)
1698
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (rsqrt)
1699
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD (recip)
1700
+
1701
+ #undef __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD
1702
+
1703
+ #define __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD (NAME ) \
1704
+ template <size_t N> \
1705
+ inline __SYCL_ALWAYS_INLINE sycl::marray<float , N> NAME ( \
1706
+ sycl::marray<float , N> x, sycl::marray<float , N> y) __NOEXC { \
1707
+ sycl::marray<float , N> res; \
1708
+ auto x_vec2 = reinterpret_cast <sycl::vec<float , 2 > const *>(&x); \
1709
+ auto y_vec2 = reinterpret_cast <sycl::vec<float , 2 > const *>(&y); \
1710
+ auto res_vec2 = reinterpret_cast <sycl::vec<float , 2 > *>(&res); \
1711
+ for (size_t i = 0 ; i < N / 2 ; i++) { \
1712
+ res_vec2[i] = __sycl_std::__invoke_half_##NAME<sycl::vec<float , 2 >>( \
1713
+ x_vec2[i], y_vec2[i]); \
1714
+ } \
1715
+ if (N % 2 ) { \
1716
+ res[N - 1 ] = \
1717
+ __sycl_std::__invoke_half_##NAME<float >(x[N - 1 ], y[N - 1 ]); \
1718
+ } \
1719
+ return res; \
1720
+ }
1721
+
1722
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD (divide)
1723
+ __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD (powr)
1724
+
1725
+ #undef __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD
1726
+
1485
1727
// genfloatf cos (genfloatf x)
1486
1728
template <typename T>
1487
1729
detail::enable_if_t <detail::is_genfloatf<T>::value, T> cos (T x) __NOEXC {
0 commit comments