Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 636443f

Browse files
committed
[SYCL] Add BFloat16 feature end-to-end test
Spec: intel/llvm#4237 Implementation: intel/llvm#4213 Signed-off-by: Dmitry Sidorov <[email protected]>
1 parent f1a22ff commit 636443f

File tree

1 file changed

+150
-0
lines changed

1 file changed

+150
-0
lines changed

SYCL/BFloat16/bfloat16_type.cpp

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
//==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==//
8+
//
9+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10+
// See https://llvm.org/LICENSE.txt for license information.
11+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include <CL/sycl.hpp>
16+
#include <sycl/ext/intel/experimental/bfloat16.hpp>
17+
18+
#include <cmath>
19+
20+
using namespace cl::sycl;
21+
22+
constexpr size_t N = 100;
23+
24+
template <typename T> void assert_close(const T &C, const float ref) {
25+
for (size_t i = 0; i < N; i++) {
26+
auto diff = C[i] - ref;
27+
assert(std::fabs(static_cast<float>(diff)) <
28+
std::numeric_limits<float>::epsilon());
29+
}
30+
}
31+
32+
void verify_conv(queue &q, buffer<float, 1> &a, range<1> &r, const float ref) {
33+
q.submit([&](handler &cgh) {
34+
auto A = a.get_access<access::mode::read_write>(cgh);
35+
cgh.parallel_for<class calc_conv>(r, [=](id<1> index) {
36+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
37+
A[index] = AVal;
38+
});
39+
});
40+
41+
assert_close(a.get_access<access::mode::read>(), ref);
42+
}
43+
44+
void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
45+
const float ref) {
46+
buffer<float, 1> c{r};
47+
48+
q.submit([&](handler &cgh) {
49+
auto A = a.get_access<access::mode::read>(cgh);
50+
auto B = b.get_access<access::mode::read>(cgh);
51+
auto C = c.get_access<access::mode::write>(cgh);
52+
cgh.parallel_for<class calc_add>(r, [=](id<1> index) {
53+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
54+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
55+
cl::sycl::ext::intel::experimental::bfloat16 CVal =
56+
static_cast<float>(AVal) + static_cast<float>(BVal);
57+
C[index] = CVal;
58+
});
59+
});
60+
61+
assert_close(c.get_access<access::mode::read>(), ref);
62+
}
63+
64+
void verify_min(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
65+
const float ref) {
66+
buffer<float, 1> c{r};
67+
68+
q.submit([&](handler &cgh) {
69+
auto A = a.get_access<access::mode::read>(cgh);
70+
auto B = b.get_access<access::mode::read>(cgh);
71+
auto C = c.get_access<access::mode::write>(cgh);
72+
cgh.parallel_for<class calc_min>(r, [=](id<1> index) {
73+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
74+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
75+
cl::sycl::ext::intel::experimental::bfloat16 CVal =
76+
static_cast<float>(AVal) - static_cast<float>(BVal);
77+
C[index] = CVal;
78+
});
79+
});
80+
81+
assert_close(c.get_access<access::mode::read>(), ref);
82+
}
83+
84+
void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
85+
const float ref) {
86+
buffer<float, 1> c{r};
87+
88+
q.submit([&](handler &cgh) {
89+
auto A = a.get_access<access::mode::read>(cgh);
90+
auto B = b.get_access<access::mode::read>(cgh);
91+
auto C = c.get_access<access::mode::write>(cgh);
92+
cgh.parallel_for<class calc_mul>(r, [=](id<1> index) {
93+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
94+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
95+
cl::sycl::ext::intel::experimental::bfloat16 CVal =
96+
static_cast<float>(AVal) * static_cast<float>(BVal);
97+
C[index] = CVal;
98+
});
99+
});
100+
101+
assert_close(c.get_access<access::mode::read>(), ref);
102+
}
103+
104+
void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
105+
const float ref) {
106+
buffer<float, 1> c{r};
107+
108+
q.submit([&](handler &cgh) {
109+
auto A = a.get_access<access::mode::read>(cgh);
110+
auto B = b.get_access<access::mode::read>(cgh);
111+
auto C = c.get_access<access::mode::write>(cgh);
112+
cgh.parallel_for<class calc_div>(r, [=](id<1> index) {
113+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
114+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
115+
cl::sycl::ext::intel::experimental::bfloat16 CVal =
116+
static_cast<float>(AVal) / static_cast<float>(BVal);
117+
C[index] = CVal;
118+
});
119+
});
120+
121+
assert_close(c.get_access<access::mode::read>(), ref);
122+
}
123+
124+
int main() {
125+
device dev{default_selector()};
126+
127+
// TODO: replace is_gpu check with extension check when the appropriate part
128+
// of implementation ready
129+
if (!dev.is_gpu()) {
130+
std::cout << "This device doesn't support bfloat16 type" << std::endl;
131+
return 0;
132+
}
133+
134+
std::vector<float> vec_a(N, 5.0);
135+
std::vector<float> vec_b(N, 2.0);
136+
137+
range<1> r(N);
138+
buffer<float, 1> a{vec_a.data(), r};
139+
buffer<float, 1> b{vec_b.data(), r};
140+
141+
queue q{dev};
142+
143+
verify_conv(q, a, r, 5.0);
144+
verify_add(q, a, b, r, 7.0);
145+
verify_min(q, a, b, r, 3.0);
146+
verify_mul(q, a, b, r, 10.0);
147+
verify_div(q, a, b, r, 2.5);
148+
149+
return 0;
150+
}

0 commit comments

Comments
 (0)