10
10
#include < CL/sycl.hpp>
11
11
12
12
#include < cmath>
13
- #include < unordered_set >
13
+ #include < limits >
14
14
15
- using namespace cl :: sycl;
15
+ using namespace sycl ;
16
16
17
- constexpr int N = 16 * 3 ; // divisible by all vector sizes
17
+ constexpr int SZ_max = 16 ;
18
18
19
- bool check (half a, half b) {
20
- return fabs (2 * (a - b) / (a + b)) <
21
- std::numeric_limits<cl::sycl::half>::epsilon () ||
22
- a < std::numeric_limits<cl::sycl::half>::min ();
19
+ bool check (float a, float b) {
20
+ return fabs (2 * (a - b) / (a + b)) < std::numeric_limits<half>::epsilon () ||
21
+ a < std::numeric_limits<half>::min ();
22
+ }
23
+
24
+ template <int N> bool check (vec<float , N> a, vec<float , N> b) {
25
+ for (int i = 0 ; i < N; i++) {
26
+ if (!check (a[i], b[i])) {
27
+ return false ;
28
+ }
29
+ }
30
+ return true ;
23
31
}
24
32
25
33
#define TEST_BUILTIN_1_VEC_IMPL (NAME, SZ ) \
26
34
{ \
27
- buffer<half##SZ> a_buf ((half##SZ *)&a[0 ], N / SZ); \
28
- buffer<half##SZ> d_buf ((half##SZ *)&d[0 ], N / SZ); \
29
- q.submit ([&](handler &cgh) { \
30
- auto A = a_buf.get_access <access::mode::read>(cgh); \
31
- auto D = d_buf.get_access <access::mode::write>(cgh); \
32
- cgh.parallel_for (N / SZ, \
33
- [=](id<1 > index) { D[index] = NAME (A[index]); }); \
34
- }); \
35
- } \
36
- for (int i = 0 ; i < N; i++) { \
37
- assert (check (d[i], NAME (a[i]))); \
35
+ float ##SZ *a = (float ##SZ *)&A[0 ]; \
36
+ float ##SZ *b = (float ##SZ *)&B[0 ]; \
37
+ if (i < SZ_max / SZ) { \
38
+ if (!check (NAME (a[i]), NAME (a[i].convert <half>()).convert <float >())) { \
39
+ err[0 ] = 1 ; \
40
+ } \
41
+ } \
38
42
}
39
43
40
44
// vectors of size 3 need separate test, as they actually have the size of 4
41
- // halfs
45
+ // elements
42
46
#define TEST_BUILTIN_1_VEC3_IMPL (NAME ) \
43
47
{ \
44
- buffer<half3> a_buf ((half3 *)&a[0 ], N / 4 ); \
45
- buffer<half3> d_buf ((half3 *)&d[0 ], N / 4 ); \
46
- q.submit ([&](handler &cgh) { \
47
- auto A = a_buf.get_access <access::mode::read>(cgh); \
48
- auto D = d_buf.get_access <access::mode::write>(cgh); \
49
- cgh.parallel_for (N / 4 , \
50
- [=](id<1 > index) { D[index] = NAME (A[index]); }); \
51
- }); \
52
- } \
53
- for (int i = 0 ; i < N; i++) { \
54
- if (i % 4 != 3 ) { \
55
- assert (check (d[i], NAME (a[i]))); \
48
+ float3 *a = (float3 *)&A[0 ]; \
49
+ float3 *b = (float3 *)&B[0 ]; \
50
+ if (i < SZ_max / 4 ) { \
51
+ if (!check (NAME (a[i]), NAME (a[i].convert <half>()).convert <float >())) { \
52
+ err[0 ] = 1 ; \
53
+ } \
56
54
} \
57
55
}
58
56
59
57
#define TEST_BUILTIN_1_SCAL_IMPL (NAME ) \
60
58
{ \
61
- buffer<half> a_buf (&a[0 ], N); \
62
- buffer<half> d_buf (&d[0 ], N); \
63
- q.submit ([&](handler &cgh) { \
64
- auto A = a_buf.get_access <access::mode::read>(cgh); \
65
- auto D = d_buf.get_access <access::mode::write>(cgh); \
66
- cgh.parallel_for (N, [=](id<1 > index) { D[index] = NAME (A[index]); }); \
67
- }); \
68
- } \
69
- for (int i = 0 ; i < N; i++) { \
70
- assert (check (d[i], NAME (a[i]))); \
59
+ float *a = (float *)&A[0 ]; \
60
+ float *b = (float *)&B[0 ]; \
61
+ if (!check (NAME (a[i]), (float )NAME ((half)a[i]))) { \
62
+ err[0 ] = 1 ; \
63
+ } \
71
64
}
72
65
73
66
#define TEST_BUILTIN_1 (NAME ) \
@@ -80,55 +73,37 @@ bool check(half a, half b) {
80
73
81
74
#define TEST_BUILTIN_2_VEC_IMPL (NAME, SZ ) \
82
75
{ \
83
- buffer<half##SZ> a_buf ((half##SZ *)&a[0 ], N / SZ); \
84
- buffer<half##SZ> b_buf ((half##SZ *)&b[0 ], N / SZ); \
85
- buffer<half##SZ> d_buf ((half##SZ *)&d[0 ], N / SZ); \
86
- q.submit ([&](handler &cgh) { \
87
- auto A = a_buf.get_access <access::mode::read>(cgh); \
88
- auto B = b_buf.get_access <access::mode::read>(cgh); \
89
- auto D = d_buf.get_access <access::mode::write>(cgh); \
90
- cgh.parallel_for ( \
91
- N / SZ, [=](id<1 > index) { D[index] = NAME (A[index], B[index]); }); \
92
- }); \
93
- } \
94
- for (int i = 0 ; i < N; i++) { \
95
- assert (check (d[i], NAME (a[i], b[i]))); \
76
+ float ##SZ *a = (float ##SZ *)&A[0 ]; \
77
+ float ##SZ *b = (float ##SZ *)&B[0 ]; \
78
+ if (i < SZ_max / SZ) { \
79
+ if (!check (NAME (a[i], b[i]), \
80
+ NAME (a[i].convert <half>(), b[i].convert <half>()) \
81
+ .convert <float >())) { \
82
+ err[0 ] = 1 ; \
83
+ } \
84
+ } \
96
85
}
97
86
98
87
#define TEST_BUILTIN_2_VEC3_IMPL (NAME ) \
99
88
{ \
100
- buffer<half3> a_buf ((half3 *)&a[0 ], N / 4 ); \
101
- buffer<half3> b_buf ((half3 *)&b[0 ], N / 4 ); \
102
- buffer<half3> d_buf ((half3 *)&d[0 ], N / 4 ); \
103
- q.submit ([&](handler &cgh) { \
104
- auto A = a_buf.get_access <access::mode::read>(cgh); \
105
- auto B = b_buf.get_access <access::mode::read>(cgh); \
106
- auto D = d_buf.get_access <access::mode::write>(cgh); \
107
- cgh.parallel_for ( \
108
- N / 4 , [=](id<1 > index) { D[index] = NAME (A[index], B[index]); }); \
109
- }); \
110
- } \
111
- for (int i = 0 ; i < N; i++) { \
112
- if (i % 4 != 3 ) { \
113
- assert (check (d[i], NAME (a[i], b[i]))); \
89
+ float3 *a = (float3 *)&A[0 ]; \
90
+ float3 *b = (float3 *)&B[0 ]; \
91
+ if (i < SZ_max / 4 ) { \
92
+ if (!check (NAME (a[i], b[i]), \
93
+ NAME (a[i].convert <half>(), b[i].convert <half>()) \
94
+ .convert <float >())) { \
95
+ err[0 ] = 1 ; \
96
+ } \
114
97
} \
115
98
}
116
99
117
100
#define TEST_BUILTIN_2_SCAL_IMPL (NAME ) \
118
101
{ \
119
- buffer<half> a_buf (&a[0 ], N); \
120
- buffer<half> b_buf (&b[0 ], N); \
121
- buffer<half> d_buf (&d[0 ], N); \
122
- q.submit ([&](handler &cgh) { \
123
- auto A = a_buf.get_access <access::mode::read>(cgh); \
124
- auto B = b_buf.get_access <access::mode::read>(cgh); \
125
- auto D = d_buf.get_access <access::mode::write>(cgh); \
126
- cgh.parallel_for ( \
127
- N, [=](id<1 > index) { D[index] = NAME (A[index], B[index]); }); \
128
- }); \
129
- } \
130
- for (int i = 0 ; i < N; i++) { \
131
- assert (check (d[i], NAME (a[i], b[i]))); \
102
+ float *a = (float *)&A[0 ]; \
103
+ float *b = (float *)&B[0 ]; \
104
+ if (!check (NAME (a[i], b[i]), (float )NAME ((half)a[i], (half)b[i]))) { \
105
+ err[0 ] = 1 ; \
106
+ } \
132
107
}
133
108
134
109
#define TEST_BUILTIN_2 (NAME ) \
@@ -141,64 +116,43 @@ bool check(half a, half b) {
141
116
142
117
#define TEST_BUILTIN_3_VEC_IMPL (NAME, SZ ) \
143
118
{ \
144
- buffer<half##SZ> a_buf ((half##SZ *)&a[0 ], N / SZ); \
145
- buffer<half##SZ> b_buf ((half##SZ *)&b[0 ], N / SZ); \
146
- buffer<half##SZ> c_buf ((half##SZ *)&c[0 ], N / SZ); \
147
- buffer<half##SZ> d_buf ((half##SZ *)&d[0 ], N / SZ); \
148
- q.submit ([&](handler &cgh) { \
149
- auto A = a_buf.get_access <access::mode::read>(cgh); \
150
- auto B = b_buf.get_access <access::mode::read>(cgh); \
151
- auto C = c_buf.get_access <access::mode::read>(cgh); \
152
- auto D = d_buf.get_access <access::mode::write>(cgh); \
153
- cgh.parallel_for (N / SZ, [=](id<1 > index) { \
154
- D[index] = NAME (A[index], B[index], C[index]); \
155
- }); \
156
- }); \
157
- } \
158
- for (int i = 0 ; i < N; i++) { \
159
- assert (check (d[i], NAME (a[i], b[i], c[i]))); \
119
+ float ##SZ *a = (float ##SZ *)&A[0 ]; \
120
+ float ##SZ *b = (float ##SZ *)&B[0 ]; \
121
+ float ##SZ *c = (float ##SZ *)&C[0 ]; \
122
+ if (i < SZ_max / SZ) { \
123
+ if (!check (NAME (a[i], b[i], c[i]), \
124
+ NAME (a[i].convert <half>(), b[i].convert <half>(), \
125
+ c[i].convert <half>()) \
126
+ .convert <float >())) { \
127
+ err[0 ] = 1 ; \
128
+ } \
129
+ } \
160
130
}
161
131
162
132
#define TEST_BUILTIN_3_VEC3_IMPL (NAME ) \
163
133
{ \
164
- buffer<half3> a_buf ((half3 *)&a[0 ], N / 4 ); \
165
- buffer<half3> b_buf ((half3 *)&b[0 ], N / 4 ); \
166
- buffer<half3> c_buf ((half3 *)&c[0 ], N / 4 ); \
167
- buffer<half3> d_buf ((half3 *)&d[0 ], N / 4 ); \
168
- q.submit ([&](handler &cgh) { \
169
- auto A = a_buf.get_access <access::mode::read>(cgh); \
170
- auto B = b_buf.get_access <access::mode::read>(cgh); \
171
- auto C = c_buf.get_access <access::mode::read>(cgh); \
172
- auto D = d_buf.get_access <access::mode::write>(cgh); \
173
- cgh.parallel_for (N / 4 , [=](id<1 > index) { \
174
- D[index] = NAME (A[index], B[index], C[index]); \
175
- }); \
176
- }); \
177
- } \
178
- for (int i = 0 ; i < N; i++) { \
179
- if (i % 4 != 3 ) { \
180
- assert (check (d[i], NAME (a[i], b[i], c[i]))); \
134
+ float3 *a = (float3 *)&A[0 ]; \
135
+ float3 *b = (float3 *)&B[0 ]; \
136
+ float3 *c = (float3 *)&C[0 ]; \
137
+ if (i < SZ_max / 4 ) { \
138
+ if (!check (NAME (a[i], b[i], c[i]), \
139
+ NAME (a[i].convert <half>(), b[i].convert <half>(), \
140
+ c[i].convert <half>()) \
141
+ .convert <float >())) { \
142
+ err[0 ] = 1 ; \
143
+ } \
181
144
} \
182
145
}
183
146
184
147
#define TEST_BUILTIN_3_SCAL_IMPL (NAME ) \
185
148
{ \
186
- buffer<half> a_buf (&a[0 ], N); \
187
- buffer<half> b_buf (&b[0 ], N); \
188
- buffer<half> c_buf (&c[0 ], N); \
189
- buffer<half> d_buf (&d[0 ], N); \
190
- q.submit ([&](handler &cgh) { \
191
- auto A = a_buf.get_access <access::mode::read>(cgh); \
192
- auto B = b_buf.get_access <access::mode::read>(cgh); \
193
- auto C = c_buf.get_access <access::mode::read>(cgh); \
194
- auto D = d_buf.get_access <access::mode::write>(cgh); \
195
- cgh.parallel_for (N, [=](id<1 > index) { \
196
- D[index] = NAME (A[index], B[index], C[index]); \
197
- }); \
198
- }); \
199
- } \
200
- for (int i = 0 ; i < N; i++) { \
201
- assert (check (d[i], NAME (a[i], b[i], c[i]))); \
149
+ float *a = (float *)&A[0 ]; \
150
+ float *b = (float *)&B[0 ]; \
151
+ float *c = (float *)&C[0 ]; \
152
+ if (!check (NAME (a[i], b[i], c[i]), \
153
+ (float )NAME ((half)a[i], (half)b[i], (half)c[i]))) { \
154
+ err[0 ] = 1 ; \
155
+ } \
202
156
}
203
157
204
158
#define TEST_BUILTIN_3 (NAME ) \
@@ -211,17 +165,33 @@ bool check(half a, half b) {
211
165
212
166
int main() {
213
167
queue q;
214
- std::vector<half> a (N) , b (N) , c (N) , d (N) ;
215
- for (int i = 0 ; i < N ; i++) {
216
- a[i] = i / (half)N ;
217
- b[i] = (N - i) / (half)N ;
218
- c[i] = (half )(3 * i);
168
+ float16 a , b, c, d;
169
+ for (int i = 0 ; i < SZ_max ; i++) {
170
+ a[i] = i / (float )SZ_max ;
171
+ b[i] = (SZ_max - i) / (float )SZ_max ;
172
+ c[i] = (float )(3 * i);
219
173
}
220
-
221
- TEST_BUILTIN_1 (fabs);
222
- TEST_BUILTIN_2 (fmin);
223
- TEST_BUILTIN_2 (fmax);
224
- TEST_BUILTIN_3 (fma);
174
+ int err = 0 ;
175
+ {
176
+ buffer<float16> a_buf (&a, 1 );
177
+ buffer<float16> b_buf (&b, 1 );
178
+ buffer<float16> c_buf (&c, 1 );
179
+ buffer<int > err_buf (&err, 1 );
180
+ q.submit ([&](handler &cgh) {
181
+ auto A = a_buf.get_access <access::mode::read>(cgh);
182
+ auto B = b_buf.get_access <access::mode::read>(cgh);
183
+ auto C = c_buf.get_access <access::mode::read>(cgh);
184
+ auto err = err_buf.get_access <access::mode::write>(cgh);
185
+ cgh.parallel_for (SZ_max, [=](item<1 > index) {
186
+ size_t i = index.get_id (0 );
187
+ TEST_BUILTIN_1 (fabs);
188
+ TEST_BUILTIN_2 (fmin);
189
+ TEST_BUILTIN_2 (fmax);
190
+ TEST_BUILTIN_3 (fma);
191
+ });
192
+ });
193
+ }
194
+ assert (err == 0 );
225
195
226
196
return 0 ;
227
197
}
0 commit comments