Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 20dd111

Browse files
committed
addressed review comments and refactored into a single kernel
1 parent 2f7fc03 commit 20dd111

File tree

1 file changed

+110
-140
lines changed

1 file changed

+110
-140
lines changed

SYCL/Basic/half_builtins.cpp

Lines changed: 110 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -10,64 +10,57 @@
1010
#include <CL/sycl.hpp>
1111

1212
#include <cmath>
13-
#include <unordered_set>
13+
#include <limits>
1414

15-
using namespace cl::sycl;
15+
using namespace sycl;
1616

17-
constexpr int N = 16 * 3; // divisible by all vector sizes
17+
constexpr int SZ_max = 16;
1818

19-
bool check(half a, half b) {
20-
return fabs(2 * (a - b) / (a + b)) <
21-
std::numeric_limits<cl::sycl::half>::epsilon() ||
22-
a < std::numeric_limits<cl::sycl::half>::min();
19+
bool check(float a, float b) {
20+
return fabs(2 * (a - b) / (a + b)) < std::numeric_limits<half>::epsilon() ||
21+
a < std::numeric_limits<half>::min();
22+
}
23+
24+
template <int N> bool check(vec<float, N> a, vec<float, N> b) {
25+
for (int i = 0; i < N; i++) {
26+
if (!check(a[i], b[i])) {
27+
return false;
28+
}
29+
}
30+
return true;
2331
}
2432

2533
#define TEST_BUILTIN_1_VEC_IMPL(NAME, SZ) \
2634
{ \
27-
buffer<half##SZ> a_buf((half##SZ *)&a[0], N / SZ); \
28-
buffer<half##SZ> d_buf((half##SZ *)&d[0], N / SZ); \
29-
q.submit([&](handler &cgh) { \
30-
auto A = a_buf.get_access<access::mode::read>(cgh); \
31-
auto D = d_buf.get_access<access::mode::write>(cgh); \
32-
cgh.parallel_for(N / SZ, \
33-
[=](id<1> index) { D[index] = NAME(A[index]); }); \
34-
}); \
35-
} \
36-
for (int i = 0; i < N; i++) { \
37-
assert(check(d[i], NAME(a[i]))); \
35+
float##SZ *a = (float##SZ *)&A[0]; \
36+
float##SZ *b = (float##SZ *)&B[0]; \
37+
if (i < SZ_max / SZ) { \
38+
if (!check(NAME(a[i]), NAME(a[i].convert<half>()).convert<float>())) { \
39+
err[0] = 1; \
40+
} \
41+
} \
3842
}
3943

4044
// vectors of size 3 need separate test, as they actually have the size of 4
41-
// halfs
45+
// elements
4246
#define TEST_BUILTIN_1_VEC3_IMPL(NAME) \
4347
{ \
44-
buffer<half3> a_buf((half3 *)&a[0], N / 4); \
45-
buffer<half3> d_buf((half3 *)&d[0], N / 4); \
46-
q.submit([&](handler &cgh) { \
47-
auto A = a_buf.get_access<access::mode::read>(cgh); \
48-
auto D = d_buf.get_access<access::mode::write>(cgh); \
49-
cgh.parallel_for(N / 4, \
50-
[=](id<1> index) { D[index] = NAME(A[index]); }); \
51-
}); \
52-
} \
53-
for (int i = 0; i < N; i++) { \
54-
if (i % 4 != 3) { \
55-
assert(check(d[i], NAME(a[i]))); \
48+
float3 *a = (float3 *)&A[0]; \
49+
float3 *b = (float3 *)&B[0]; \
50+
if (i < SZ_max / 4) { \
51+
if (!check(NAME(a[i]), NAME(a[i].convert<half>()).convert<float>())) { \
52+
err[0] = 1; \
53+
} \
5654
} \
5755
}
5856

5957
#define TEST_BUILTIN_1_SCAL_IMPL(NAME) \
6058
{ \
61-
buffer<half> a_buf(&a[0], N); \
62-
buffer<half> d_buf(&d[0], N); \
63-
q.submit([&](handler &cgh) { \
64-
auto A = a_buf.get_access<access::mode::read>(cgh); \
65-
auto D = d_buf.get_access<access::mode::write>(cgh); \
66-
cgh.parallel_for(N, [=](id<1> index) { D[index] = NAME(A[index]); }); \
67-
}); \
68-
} \
69-
for (int i = 0; i < N; i++) { \
70-
assert(check(d[i], NAME(a[i]))); \
59+
float *a = (float *)&A[0]; \
60+
float *b = (float *)&B[0]; \
61+
if (!check(NAME(a[i]), (float)NAME((half)a[i]))) { \
62+
err[0] = 1; \
63+
} \
7164
}
7265

7366
#define TEST_BUILTIN_1(NAME) \
@@ -80,55 +73,37 @@ bool check(half a, half b) {
8073

8174
#define TEST_BUILTIN_2_VEC_IMPL(NAME, SZ) \
8275
{ \
83-
buffer<half##SZ> a_buf((half##SZ *)&a[0], N / SZ); \
84-
buffer<half##SZ> b_buf((half##SZ *)&b[0], N / SZ); \
85-
buffer<half##SZ> d_buf((half##SZ *)&d[0], N / SZ); \
86-
q.submit([&](handler &cgh) { \
87-
auto A = a_buf.get_access<access::mode::read>(cgh); \
88-
auto B = b_buf.get_access<access::mode::read>(cgh); \
89-
auto D = d_buf.get_access<access::mode::write>(cgh); \
90-
cgh.parallel_for( \
91-
N / SZ, [=](id<1> index) { D[index] = NAME(A[index], B[index]); }); \
92-
}); \
93-
} \
94-
for (int i = 0; i < N; i++) { \
95-
assert(check(d[i], NAME(a[i], b[i]))); \
76+
float##SZ *a = (float##SZ *)&A[0]; \
77+
float##SZ *b = (float##SZ *)&B[0]; \
78+
if (i < SZ_max / SZ) { \
79+
if (!check(NAME(a[i], b[i]), \
80+
NAME(a[i].convert<half>(), b[i].convert<half>()) \
81+
.convert<float>())) { \
82+
err[0] = 1; \
83+
} \
84+
} \
9685
}
9786

9887
#define TEST_BUILTIN_2_VEC3_IMPL(NAME) \
9988
{ \
100-
buffer<half3> a_buf((half3 *)&a[0], N / 4); \
101-
buffer<half3> b_buf((half3 *)&b[0], N / 4); \
102-
buffer<half3> d_buf((half3 *)&d[0], N / 4); \
103-
q.submit([&](handler &cgh) { \
104-
auto A = a_buf.get_access<access::mode::read>(cgh); \
105-
auto B = b_buf.get_access<access::mode::read>(cgh); \
106-
auto D = d_buf.get_access<access::mode::write>(cgh); \
107-
cgh.parallel_for( \
108-
N / 4, [=](id<1> index) { D[index] = NAME(A[index], B[index]); }); \
109-
}); \
110-
} \
111-
for (int i = 0; i < N; i++) { \
112-
if (i % 4 != 3) { \
113-
assert(check(d[i], NAME(a[i], b[i]))); \
89+
float3 *a = (float3 *)&A[0]; \
90+
float3 *b = (float3 *)&B[0]; \
91+
if (i < SZ_max / 4) { \
92+
if (!check(NAME(a[i], b[i]), \
93+
NAME(a[i].convert<half>(), b[i].convert<half>()) \
94+
.convert<float>())) { \
95+
err[0] = 1; \
96+
} \
11497
} \
11598
}
11699

117100
#define TEST_BUILTIN_2_SCAL_IMPL(NAME) \
118101
{ \
119-
buffer<half> a_buf(&a[0], N); \
120-
buffer<half> b_buf(&b[0], N); \
121-
buffer<half> d_buf(&d[0], N); \
122-
q.submit([&](handler &cgh) { \
123-
auto A = a_buf.get_access<access::mode::read>(cgh); \
124-
auto B = b_buf.get_access<access::mode::read>(cgh); \
125-
auto D = d_buf.get_access<access::mode::write>(cgh); \
126-
cgh.parallel_for( \
127-
N, [=](id<1> index) { D[index] = NAME(A[index], B[index]); }); \
128-
}); \
129-
} \
130-
for (int i = 0; i < N; i++) { \
131-
assert(check(d[i], NAME(a[i], b[i]))); \
102+
float *a = (float *)&A[0]; \
103+
float *b = (float *)&B[0]; \
104+
if (!check(NAME(a[i], b[i]), (float)NAME((half)a[i], (half)b[i]))) { \
105+
err[0] = 1; \
106+
} \
132107
}
133108

134109
#define TEST_BUILTIN_2(NAME) \
@@ -141,64 +116,43 @@ bool check(half a, half b) {
141116

142117
#define TEST_BUILTIN_3_VEC_IMPL(NAME, SZ) \
143118
{ \
144-
buffer<half##SZ> a_buf((half##SZ *)&a[0], N / SZ); \
145-
buffer<half##SZ> b_buf((half##SZ *)&b[0], N / SZ); \
146-
buffer<half##SZ> c_buf((half##SZ *)&c[0], N / SZ); \
147-
buffer<half##SZ> d_buf((half##SZ *)&d[0], N / SZ); \
148-
q.submit([&](handler &cgh) { \
149-
auto A = a_buf.get_access<access::mode::read>(cgh); \
150-
auto B = b_buf.get_access<access::mode::read>(cgh); \
151-
auto C = c_buf.get_access<access::mode::read>(cgh); \
152-
auto D = d_buf.get_access<access::mode::write>(cgh); \
153-
cgh.parallel_for(N / SZ, [=](id<1> index) { \
154-
D[index] = NAME(A[index], B[index], C[index]); \
155-
}); \
156-
}); \
157-
} \
158-
for (int i = 0; i < N; i++) { \
159-
assert(check(d[i], NAME(a[i], b[i], c[i]))); \
119+
float##SZ *a = (float##SZ *)&A[0]; \
120+
float##SZ *b = (float##SZ *)&B[0]; \
121+
float##SZ *c = (float##SZ *)&C[0]; \
122+
if (i < SZ_max / SZ) { \
123+
if (!check(NAME(a[i], b[i], c[i]), \
124+
NAME(a[i].convert<half>(), b[i].convert<half>(), \
125+
c[i].convert<half>()) \
126+
.convert<float>())) { \
127+
err[0] = 1; \
128+
} \
129+
} \
160130
}
161131

162132
#define TEST_BUILTIN_3_VEC3_IMPL(NAME) \
163133
{ \
164-
buffer<half3> a_buf((half3 *)&a[0], N / 4); \
165-
buffer<half3> b_buf((half3 *)&b[0], N / 4); \
166-
buffer<half3> c_buf((half3 *)&c[0], N / 4); \
167-
buffer<half3> d_buf((half3 *)&d[0], N / 4); \
168-
q.submit([&](handler &cgh) { \
169-
auto A = a_buf.get_access<access::mode::read>(cgh); \
170-
auto B = b_buf.get_access<access::mode::read>(cgh); \
171-
auto C = c_buf.get_access<access::mode::read>(cgh); \
172-
auto D = d_buf.get_access<access::mode::write>(cgh); \
173-
cgh.parallel_for(N / 4, [=](id<1> index) { \
174-
D[index] = NAME(A[index], B[index], C[index]); \
175-
}); \
176-
}); \
177-
} \
178-
for (int i = 0; i < N; i++) { \
179-
if (i % 4 != 3) { \
180-
assert(check(d[i], NAME(a[i], b[i], c[i]))); \
134+
float3 *a = (float3 *)&A[0]; \
135+
float3 *b = (float3 *)&B[0]; \
136+
float3 *c = (float3 *)&C[0]; \
137+
if (i < SZ_max / 4) { \
138+
if (!check(NAME(a[i], b[i], c[i]), \
139+
NAME(a[i].convert<half>(), b[i].convert<half>(), \
140+
c[i].convert<half>()) \
141+
.convert<float>())) { \
142+
err[0] = 1; \
143+
} \
181144
} \
182145
}
183146

184147
#define TEST_BUILTIN_3_SCAL_IMPL(NAME) \
185148
{ \
186-
buffer<half> a_buf(&a[0], N); \
187-
buffer<half> b_buf(&b[0], N); \
188-
buffer<half> c_buf(&c[0], N); \
189-
buffer<half> d_buf(&d[0], N); \
190-
q.submit([&](handler &cgh) { \
191-
auto A = a_buf.get_access<access::mode::read>(cgh); \
192-
auto B = b_buf.get_access<access::mode::read>(cgh); \
193-
auto C = c_buf.get_access<access::mode::read>(cgh); \
194-
auto D = d_buf.get_access<access::mode::write>(cgh); \
195-
cgh.parallel_for(N, [=](id<1> index) { \
196-
D[index] = NAME(A[index], B[index], C[index]); \
197-
}); \
198-
}); \
199-
} \
200-
for (int i = 0; i < N; i++) { \
201-
assert(check(d[i], NAME(a[i], b[i], c[i]))); \
149+
float *a = (float *)&A[0]; \
150+
float *b = (float *)&B[0]; \
151+
float *c = (float *)&C[0]; \
152+
if (!check(NAME(a[i], b[i], c[i]), \
153+
(float)NAME((half)a[i], (half)b[i], (half)c[i]))) { \
154+
err[0] = 1; \
155+
} \
202156
}
203157

204158
#define TEST_BUILTIN_3(NAME) \
@@ -211,17 +165,33 @@ bool check(half a, half b) {
211165

212166
int main() {
213167
queue q;
214-
std::vector<half> a(N), b(N), c(N), d(N);
215-
for (int i = 0; i < N; i++) {
216-
a[i] = i / (half)N;
217-
b[i] = (N - i) / (half)N;
218-
c[i] = (half)(3 * i);
168+
float16 a, b, c, d;
169+
for (int i = 0; i < SZ_max; i++) {
170+
a[i] = i / (float)SZ_max;
171+
b[i] = (SZ_max - i) / (float)SZ_max;
172+
c[i] = (float)(3 * i);
219173
}
220-
221-
TEST_BUILTIN_1(fabs);
222-
TEST_BUILTIN_2(fmin);
223-
TEST_BUILTIN_2(fmax);
224-
TEST_BUILTIN_3(fma);
174+
int err = 0;
175+
{
176+
buffer<float16> a_buf(&a, 1);
177+
buffer<float16> b_buf(&b, 1);
178+
buffer<float16> c_buf(&c, 1);
179+
buffer<int> err_buf(&err, 1);
180+
q.submit([&](handler &cgh) {
181+
auto A = a_buf.get_access<access::mode::read>(cgh);
182+
auto B = b_buf.get_access<access::mode::read>(cgh);
183+
auto C = c_buf.get_access<access::mode::read>(cgh);
184+
auto err = err_buf.get_access<access::mode::write>(cgh);
185+
cgh.parallel_for(SZ_max, [=](item<1> index) {
186+
size_t i = index.get_id(0);
187+
TEST_BUILTIN_1(fabs);
188+
TEST_BUILTIN_2(fmin);
189+
TEST_BUILTIN_2(fmax);
190+
TEST_BUILTIN_3(fma);
191+
});
192+
});
193+
}
194+
assert(err == 0);
225195

226196
return 0;
227197
}

0 commit comments

Comments
 (0)