@@ -25,41 +25,43 @@ void assert_out_of_bound<sycl::half>(sycl::half val, sycl::half lower,
25
25
assert (lower < val && val < upper);
26
26
}
27
27
28
- template <typename T> void native_tanh_tester (sycl::queue q) {
29
- T r{0 };
28
+ template <typename T>
29
+ void native_tanh_tester (sycl::queue q, T val, T up, T lo) {
30
+ T r = val;
30
31
31
32
#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
32
33
{
33
34
sycl::buffer<T, 1 > BufR (&r, sycl::range<1 >(1 ));
34
35
q.submit ([&](sycl::handler &cgh) {
35
- auto AccR = BufR.template get_access <sycl::access::mode::write >(cgh);
36
+ auto AccR = BufR.template get_access <sycl::access::mode::read_write >(cgh);
36
37
cgh.single_task ([=]() {
37
- AccR[0 ] = sycl::ext::oneapi::experimental::native::tanh (T ( 1 . 0f ) );
38
+ AccR[0 ] = sycl::ext::oneapi::experimental::native::tanh (AccR[ 0 ] );
38
39
});
39
40
});
40
41
}
41
42
42
- assert_out_of_bound (r, T ( 0 . 75f ), T ( 0 . 77f )); // 0.76159415595576488812
43
+ assert_out_of_bound (r, up, lo);
43
44
#else
44
45
assert (!" SYCL_EXT_ONEAPI_NATIVE_MATH not supported" );
45
46
#endif
46
47
}
47
48
48
- template <typename T> void native_exp2_tester (sycl::queue q) {
49
- T r{0 };
49
+ template <typename T>
50
+ void native_exp2_tester (sycl::queue q, T val, T up, T lo) {
51
+ T r = val;
50
52
51
53
#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH
52
54
{
53
55
sycl::buffer<T, 1 > BufR (&r, sycl::range<1 >(1 ));
54
56
q.submit ([&](sycl::handler &cgh) {
55
- auto AccR = BufR.template get_access <sycl::access::mode::write >(cgh);
57
+ auto AccR = BufR.template get_access <sycl::access::mode::read_write >(cgh);
56
58
cgh.single_task ([=]() {
57
- AccR[0 ] = sycl::ext::oneapi::experimental::native::exp2 (T ( 0 . 5f ) );
59
+ AccR[0 ] = sycl::ext::oneapi::experimental::native::exp2 (AccR[ 0 ] );
58
60
});
59
61
});
60
62
}
61
63
62
- assert_out_of_bound (r, T ( 1 . 30f ), T ( 1 . 50f )); // 1.4142135623730950488
64
+ assert_out_of_bound (r, up, lo);
63
65
#else
64
66
assert (!" SYCL_EXT_ONEAPI_NATIVE_MATH not supported" );
65
67
#endif
@@ -69,27 +71,84 @@ int main() {
69
71
70
72
sycl::queue q;
71
73
72
- native_tanh_tester<float >(q);
73
- native_tanh_tester<sycl::float2>(q);
74
- native_tanh_tester<sycl::float3>(q);
75
- native_tanh_tester<sycl::float4>(q);
76
- native_tanh_tester<sycl::float8>(q);
77
- native_tanh_tester<sycl::float16>(q);
74
+ const double tv[16 ] = {-2.0 , -1.5 , -1.0 , 0.0 , 2.0 , 1.5 , 1.0 , 0.0 ,
75
+ -1.7 , 1.7 , -1.2 , 1.2 , -3.0 , 3.0 , -10.0 , 10.0 };
76
+ const double tl[16 ] = {-0.97 , -0.91 , -0.77 , -0.1 , 0.95 , 0.89 , 0.75 , -0.1 ,
77
+ -0.94 , 0.92 , -0.84 , 0.82 , -1.0 , 0.98 , -1.10 , 0.98 };
78
+ const double tu[16 ] = {-0.95 , -0.89 , -0.75 , 0.1 , 0.97 , 0.91 , 0.77 , 0.1 ,
79
+ -0.92 , 0.94 , -0.82 , 0.84 , -0.98 , 1.00 , -0.98 , 1.10 };
80
+
81
+ native_tanh_tester<float >(q, tv[0 ], tl[0 ], tu[0 ]);
82
+ native_tanh_tester<sycl::float2>(q, {tv[0 ], tv[1 ]}, {tl[0 ], tl[1 ]},
83
+ {tu[0 ], tu[1 ]});
84
+ native_tanh_tester<sycl::float3>(
85
+ q, {tv[0 ], tv[1 ], tv[2 ]}, {tl[0 ], tl[1 ], tl[2 ]}, {tu[0 ], tu[1 ], tu[2 ]});
86
+ native_tanh_tester<sycl::float4>(q, {tv[0 ], tv[1 ], tv[2 ], tv[3 ]},
87
+ {tl[0 ], tl[1 ], tl[2 ], tl[3 ]},
88
+ {tu[0 ], tu[1 ], tu[2 ], tu[3 ]});
89
+ native_tanh_tester<sycl::float8>(
90
+ q, {tv[0 ], tv[1 ], tv[2 ], tv[3 ], tv[4 ], tv[5 ], tv[6 ], tv[7 ]},
91
+ {tl[0 ], tl[1 ], tl[2 ], tl[3 ], tl[4 ], tl[5 ], tl[6 ], tl[7 ]},
92
+ {tu[0 ], tu[1 ], tu[2 ], tu[3 ], tu[4 ], tu[5 ], tu[6 ], tu[7 ]});
93
+ native_tanh_tester<sycl::float16>(
94
+ q,
95
+ {tv[0 ], tv[1 ], tv[2 ], tv[3 ], tv[4 ], tv[5 ], tv[6 ], tv[7 ], tv[8 ], tv[9 ],
96
+ tv[10 ], tv[11 ], tv[12 ], tv[13 ], tv[14 ], tv[15 ]},
97
+ {tl[0 ], tl[1 ], tl[2 ], tl[3 ], tl[4 ], tl[5 ], tl[6 ], tl[7 ], tl[8 ], tl[9 ],
98
+ tl[10 ], tl[11 ], tl[12 ], tl[13 ], tl[14 ], tl[15 ]},
99
+ {tu[0 ], tu[1 ], tu[2 ], tu[3 ], tu[4 ], tu[5 ], tu[6 ], tu[7 ], tu[8 ], tu[9 ],
100
+ tu[10 ], tu[11 ], tu[12 ], tu[13 ], tu[14 ], tu[15 ]});
78
101
79
102
if (q.get_device ().has (sycl::aspect::fp16)) {
80
- native_tanh_tester<sycl::half>(q);
81
- native_tanh_tester<sycl::half2>(q);
82
- native_tanh_tester<sycl::half3>(q);
83
- native_tanh_tester<sycl::half4>(q);
84
- native_tanh_tester<sycl::half8>(q);
85
- native_tanh_tester<sycl::half16>(q);
86
-
87
- native_exp2_tester<sycl::half>(q);
88
- native_exp2_tester<sycl::half2>(q);
89
- native_exp2_tester<sycl::half3>(q);
90
- native_exp2_tester<sycl::half4>(q);
91
- native_exp2_tester<sycl::half8>(q);
92
- native_exp2_tester<sycl::half16>(q);
103
+
104
+ native_tanh_tester<sycl::half>(q, tv[0 ], tl[0 ], tu[0 ]);
105
+ native_tanh_tester<sycl::half2>(q, {tv[0 ], tv[1 ]}, {tl[0 ], tl[1 ]},
106
+ {tu[0 ], tu[1 ]});
107
+ native_tanh_tester<sycl::half3>(
108
+ q, {tv[0 ], tv[1 ], tv[2 ]}, {tl[0 ], tl[1 ], tl[2 ]}, {tu[0 ], tu[1 ], tu[2 ]});
109
+ native_tanh_tester<sycl::half4>(q, {tv[0 ], tv[1 ], tv[2 ], tv[3 ]},
110
+ {tl[0 ], tl[1 ], tl[2 ], tl[3 ]},
111
+ {tu[0 ], tu[1 ], tu[2 ], tu[3 ]});
112
+ native_tanh_tester<sycl::half8>(
113
+ q, {tv[0 ], tv[1 ], tv[2 ], tv[3 ], tv[4 ], tv[5 ], tv[6 ], tv[7 ]},
114
+ {tl[0 ], tl[1 ], tl[2 ], tl[3 ], tl[4 ], tl[5 ], tl[6 ], tl[7 ]},
115
+ {tu[0 ], tu[1 ], tu[2 ], tu[3 ], tu[4 ], tu[5 ], tu[6 ], tu[7 ]});
116
+ native_tanh_tester<sycl::half16>(
117
+ q,
118
+ {tv[0 ], tv[1 ], tv[2 ], tv[3 ], tv[4 ], tv[5 ], tv[6 ], tv[7 ], tv[8 ], tv[9 ],
119
+ tv[10 ], tv[11 ], tv[12 ], tv[13 ], tv[14 ], tv[15 ]},
120
+ {tl[0 ], tl[1 ], tl[2 ], tl[3 ], tl[4 ], tl[5 ], tl[6 ], tl[7 ], tl[8 ], tl[9 ],
121
+ tl[10 ], tl[11 ], tl[12 ], tl[13 ], tl[14 ], tl[15 ]},
122
+ {tu[0 ], tu[1 ], tu[2 ], tu[3 ], tu[4 ], tu[5 ], tu[6 ], tu[7 ], tu[8 ], tu[9 ],
123
+ tu[10 ], tu[11 ], tu[12 ], tu[13 ], tu[14 ], tu[15 ]});
124
+
125
+ const double ev[16 ] = {-2.0 , -1.5 , -1.0 , 0.0 , 2.0 , 1.5 , 1.0 , 0.0 ,
126
+ -2.0 , -1.5 , -1.0 , 0.0 , 2.0 , 1.5 , 1.0 , 0.0 };
127
+ const double el[16 ] = {0.1 , 0.34 , 0.4 , -0.9 , 3.9 , 2.7 , 1.9 , -0.9 ,
128
+ 0.1 , 0.34 , 0.4 , -0.9 , 3.9 , 2.7 , 1.9 , -0.9 };
129
+ const double eu[16 ] = {0.3 , 0.36 , 0.6 , 1.1 , 4.1 , 2.9 , 2.1 , 1.1 ,
130
+ 0.3 , 0.36 , 0.6 , 1.1 , 4.1 , 2.9 , 2.1 , 1.1 };
131
+
132
+ native_exp2_tester<sycl::half>(q, ev[0 ], el[0 ], eu[0 ]);
133
+ native_exp2_tester<sycl::half2>(q, {ev[0 ], ev[1 ]}, {el[0 ], el[1 ]},
134
+ {eu[0 ], eu[1 ]});
135
+ native_exp2_tester<sycl::half3>(
136
+ q, {ev[0 ], ev[1 ], ev[2 ]}, {el[0 ], el[1 ], el[2 ]}, {eu[0 ], eu[1 ], eu[2 ]});
137
+ native_exp2_tester<sycl::half4>(q, {ev[0 ], ev[1 ], ev[2 ], ev[3 ]},
138
+ {el[0 ], el[1 ], el[2 ], el[3 ]},
139
+ {eu[0 ], eu[1 ], eu[2 ], eu[3 ]});
140
+ native_exp2_tester<sycl::half8>(
141
+ q, {ev[0 ], ev[1 ], ev[2 ], ev[3 ], ev[4 ], ev[5 ], ev[6 ], ev[7 ]},
142
+ {el[0 ], el[1 ], el[2 ], el[3 ], el[4 ], el[5 ], el[6 ], el[7 ]},
143
+ {eu[0 ], eu[1 ], eu[2 ], eu[3 ], eu[4 ], eu[5 ], eu[6 ], eu[7 ]});
144
+ native_exp2_tester<sycl::half16>(
145
+ q,
146
+ {ev[0 ], ev[1 ], ev[2 ], ev[3 ], ev[4 ], ev[5 ], ev[6 ], ev[7 ], ev[8 ], ev[9 ],
147
+ ev[10 ], ev[11 ], ev[12 ], ev[13 ], ev[14 ], ev[15 ]},
148
+ {el[0 ], el[1 ], el[2 ], el[3 ], el[4 ], el[5 ], el[6 ], el[7 ], el[8 ], el[9 ],
149
+ el[10 ], el[11 ], el[12 ], el[13 ], el[14 ], el[15 ]},
150
+ {eu[0 ], eu[1 ], eu[2 ], eu[3 ], eu[4 ], eu[5 ], eu[6 ], eu[7 ], eu[8 ], eu[9 ],
151
+ eu[10 ], eu[11 ], eu[12 ], eu[13 ], eu[14 ], eu[15 ]});
93
152
}
94
153
95
154
return 0 ;
0 commit comments