Skip to content

Commit 480287d

Browse files
FznamznonrdeodharJackAKirksteffenlarsen
authored
Cherry-pick test changes related to move of bfloat16 (intel#1459)
* [SYCL] Test corrections after moving bfloat16 support out of experimental status. (intel#1129) Tests changes for intel/llvm#6524 Signed-off-by: Rajiv Deodhar <[email protected]> Co-authored-by: JackAKirk <[email protected]> * [SYCL] Correct bfloat16 namespace in ESIMD and matrix tests (intel#1422) intel/llvm#6524 moved bfloat16 out of the experimental namespace. This commit removes the last remaining uses of the experimental namespace in bfloat16 for ESIMD and matrix tests. Signed-off-by: Larsen, Steffen <[email protected]> Signed-off-by: Rajiv Deodhar <[email protected]> Signed-off-by: Larsen, Steffen <[email protected]> Co-authored-by: rdeodhar <[email protected]> Co-authored-by: JackAKirk <[email protected]> Co-authored-by: Steffen Larsen <[email protected]>
1 parent ede4924 commit 480287d

29 files changed

+244
-87
lines changed

SYCL/BFloat16/bfloat16_builtins.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include <vector>
1313

1414
using namespace sycl;
15-
using namespace sycl::ext::oneapi::experimental;
15+
using namespace sycl::ext::oneapi;
1616

1717
constexpr int N = 60; // divisible by all tested array sizes
1818
constexpr float bf16_eps = 0.00390625;
@@ -222,7 +222,7 @@ bool check(float a, float b) {
222222
int main() {
223223
queue q;
224224

225-
if (q.get_device().has(aspect::ext_oneapi_bfloat16)) {
225+
if (q.get_device().has(aspect::ext_oneapi_bfloat16_math_functions)) {
226226
std::vector<float> a(N), b(N), c(N);
227227
int err = 0;
228228

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// UNSUPPORTED: hip
2+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
3+
// Currently the feature is supported only on CPU and GPU, natively or by
4+
// software emulation.
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
6+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
7+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
8+
9+
//==---------- bfloat16_conversions.cpp - SYCL bfloat16 type test ---------==//
10+
//
11+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
12+
// See https://llvm.org/LICENSE.txt for license information.
13+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14+
//
15+
//===---------------------------------------------------------------------===//
16+
17+
#include <iostream>
18+
#include <sycl/sycl.hpp>
19+
20+
using namespace sycl;
21+
22+
template <typename T> T calculate(T a, T b) {
23+
sycl::ext::oneapi::bfloat16 x = -a;
24+
sycl::ext::oneapi::bfloat16 y = b;
25+
sycl::ext::oneapi::bfloat16 z = x + y;
26+
T result = z;
27+
return result;
28+
}
29+
30+
template <typename T> int test_device(queue Q) {
31+
T data[3] = {-7.0f, 8.1f, 0.0f};
32+
33+
buffer<T, 1> buf{data, 3};
34+
Q.submit([&](handler &cgh) {
35+
accessor numbers{buf, cgh, read_write};
36+
cgh.single_task([=]() { numbers[2] = calculate(numbers[0], numbers[1]); });
37+
});
38+
39+
host_accessor hostOutAcc{buf, read_only};
40+
std::cout << "Device Result = " << hostOutAcc[2] << std::endl;
41+
if (hostOutAcc[2] == 15.125f)
42+
return 0;
43+
return 1;
44+
}
45+
46+
template <typename T> int test_host() {
47+
T a{-5.6f};
48+
T b{-1.1f};
49+
T result = calculate(a, b);
50+
std::cout << "Host Result = " << result << std::endl;
51+
if (result == 4.5f)
52+
return 0;
53+
return 1;
54+
}
55+
56+
int main() {
57+
queue Q;
58+
int result;
59+
result = test_host<sycl::half>();
60+
result |= test_host<float>();
61+
if (Q.get_device().has(aspect::fp16))
62+
result |= test_device<sycl::half>(Q);
63+
result |= test_device<float>(Q);
64+
if (result)
65+
std::cout << "FAIL\n";
66+
else
67+
std::cout << "PASS\n";
68+
69+
return result;
70+
}

SYCL/BFloat16/bfloat16_example.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
///
2+
/// Check if bfloat16 example works using fallback libraries
3+
///
4+
5+
// REQUIRES: opencl-aot, ocloc, cpu, gpu-intel-gen9
6+
// UNSUPPORTED: cuda
7+
// CUDA is not compatible with SPIR.
8+
9+
// RUN: %clangxx -fsycl %s -o %t.out
10+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
13+
// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -o %t.out
14+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
15+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
16+
17+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out
18+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
19+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
20+
21+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out
22+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
23+
24+
// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
25+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
26+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
27+
28+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
29+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
30+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
31+
32+
// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
33+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
34+
35+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
36+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
37+
38+
#include <sycl/sycl.hpp>
39+
40+
using namespace sycl;
41+
using sycl::ext::oneapi::bfloat16;
42+
43+
float foo(float a, float b) {
44+
// Convert from float to bfloat16.
45+
bfloat16 A{a};
46+
bfloat16 B{b};
47+
48+
// Convert A and B from bfloat16 to float, do addition on floating-point
49+
// numbers, then convert the result to bfloat16 and store it in C.
50+
bfloat16 C = A + B;
51+
52+
// Return the result converted from bfloat16 to float.
53+
return C;
54+
}
55+
56+
int main(int argc, char *argv[]) {
57+
float data[3] = {7.0f, 8.1f, 0.0f};
58+
59+
float result_host = foo(7.0f, 8.1f);
60+
std::cout << "CPU Result = " << result_host << std::endl;
61+
if (std::abs(15.1f - result_host) > 0.1f) {
62+
std::cout << "Test failed. Expected CPU Result ~= 15.1" << std::endl;
63+
return 1;
64+
}
65+
66+
queue deviceQueue;
67+
buffer<float, 1> buf{data, 3};
68+
69+
deviceQueue.submit([&](handler &cgh) {
70+
accessor numbers{buf, cgh, read_write};
71+
cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); });
72+
});
73+
74+
host_accessor hostOutAcc{buf, read_only};
75+
float result_device = hostOutAcc[2];
76+
std::cout << "GPU Result = " << result_device << std::endl;
77+
if (std::abs(result_host - result_device) > 0.1f) {
78+
std::cout << "Test failed. CPU Result !~= GPU result" << std::endl;
79+
return 1;
80+
}
81+
82+
return 0;
83+
}

SYCL/BFloat16/bfloat16_type.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
// UNSUPPORTED: cuda || hip
2-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
3-
// TODO currently the feature isn't supported on most of the devices
4-
// need to enable the test when the aspect and device_if feature are
5-
// introduced
6-
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
7-
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
1+
// UNSUPPORTED: hip
2+
// RUN: %if cuda %{%clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 %s -o %t.out %}
3+
// TODO enable the below when CI supports >=sm_80
4+
// RUNx: %if cuda %{%GPU_RUN_PLACEHOLDER %t.out %}
5+
// RUN: %clangxx -fsycl %s -o %t.out
6+
// TODO currently the feature isn't supported on FPGA.
7+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
8+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
89
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
910

1011
//==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==//

SYCL/BFloat16/bfloat16_type.hpp

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <iostream>
2-
#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
2+
#include <sycl/ext/oneapi/bfloat16.hpp>
33
#include <sycl/sycl.hpp>
44

55
#include <cmath>
@@ -11,8 +11,7 @@ constexpr size_t N = 100;
1111
template <typename T> void assert_close(const T &C, const float ref) {
1212
for (size_t i = 0; i < N; i++) {
1313
auto diff = C[i] - ref;
14-
assert(std::fabs(static_cast<float>(diff)) <
15-
std::numeric_limits<float>::epsilon());
14+
assert(std::fabs(static_cast<float>(diff)) < 0.1);
1615
}
1716
}
1817

@@ -21,7 +20,7 @@ void verify_conv_implicit(queue &q, buffer<float, 1> &a, range<1> &r,
2120
q.submit([&](handler &cgh) {
2221
auto A = a.get_access<access::mode::read_write>(cgh);
2322
cgh.parallel_for<class calc_conv>(r, [=](id<1> index) {
24-
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
23+
sycl::ext::oneapi::bfloat16 AVal{A[index]};
2524
A[index] = AVal;
2625
});
2726
});
@@ -34,9 +33,8 @@ void verify_conv_explicit(queue &q, buffer<float, 1> &a, range<1> &r,
3433
q.submit([&](handler &cgh) {
3534
auto A = a.get_access<access::mode::read_write>(cgh);
3635
cgh.parallel_for<class calc_conv_impl>(r, [=](id<1> index) {
37-
uint16_t AVal =
38-
sycl::ext::oneapi::experimental::bfloat16::from_float(A[index]);
39-
A[index] = sycl::ext::oneapi::experimental::bfloat16::to_float(AVal);
36+
sycl::ext::oneapi::bfloat16 AVal = A[index];
37+
A[index] = float(AVal);
4038
});
4139
});
4240

@@ -52,9 +50,9 @@ void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
5250
auto B = b.get_access<access::mode::read>(cgh);
5351
auto C = c.get_access<access::mode::write>(cgh);
5452
cgh.parallel_for<class calc_add_expl>(r, [=](id<1> index) {
55-
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
56-
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
57-
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal + BVal;
53+
sycl::ext::oneapi::bfloat16 AVal{A[index]};
54+
sycl::ext::oneapi::bfloat16 BVal{B[index]};
55+
sycl::ext::oneapi::bfloat16 CVal = AVal + BVal;
5856
C[index] = CVal;
5957
});
6058
});
@@ -71,9 +69,9 @@ void verify_sub(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
7169
auto B = b.get_access<access::mode::read>(cgh);
7270
auto C = c.get_access<access::mode::write>(cgh);
7371
cgh.parallel_for<class calc_sub>(r, [=](id<1> index) {
74-
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
75-
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
76-
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal - BVal;
72+
sycl::ext::oneapi::bfloat16 AVal{A[index]};
73+
sycl::ext::oneapi::bfloat16 BVal{B[index]};
74+
sycl::ext::oneapi::bfloat16 CVal = AVal - BVal;
7775
C[index] = CVal;
7876
});
7977
});
@@ -88,8 +86,8 @@ void verify_minus(queue &q, buffer<float, 1> &a, range<1> &r, const float ref) {
8886
auto A = a.get_access<access::mode::read>(cgh);
8987
auto C = c.get_access<access::mode::write>(cgh);
9088
cgh.parallel_for<class calc_minus>(r, [=](id<1> index) {
91-
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
92-
sycl::ext::oneapi::experimental::bfloat16 CVal = -AVal;
89+
sycl::ext::oneapi::bfloat16 AVal{A[index]};
90+
sycl::ext::oneapi::bfloat16 CVal = -AVal;
9391
C[index] = CVal;
9492
});
9593
});
@@ -106,9 +104,9 @@ void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
106104
auto B = b.get_access<access::mode::read>(cgh);
107105
auto C = c.get_access<access::mode::write>(cgh);
108106
cgh.parallel_for<class calc_mul>(r, [=](id<1> index) {
109-
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
110-
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
111-
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal * BVal;
107+
sycl::ext::oneapi::bfloat16 AVal{A[index]};
108+
sycl::ext::oneapi::bfloat16 BVal{B[index]};
109+
sycl::ext::oneapi::bfloat16 CVal = AVal * BVal;
112110
C[index] = CVal;
113111
});
114112
});
@@ -125,9 +123,9 @@ void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
125123
auto B = b.get_access<access::mode::read>(cgh);
126124
auto C = c.get_access<access::mode::write>(cgh);
127125
cgh.parallel_for<class calc_div>(r, [=](id<1> index) {
128-
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
129-
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
130-
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal / BVal;
126+
sycl::ext::oneapi::bfloat16 AVal{A[index]};
127+
sycl::ext::oneapi::bfloat16 BVal{B[index]};
128+
sycl::ext::oneapi::bfloat16 CVal = AVal / BVal;
131129
C[index] = CVal;
132130
});
133131
});
@@ -144,19 +142,18 @@ void verify_logic(queue &q, buffer<float, 1> &a, buffer<float, 1> &b,
144142
auto B = b.get_access<access::mode::read>(cgh);
145143
auto C = c.get_access<access::mode::write>(cgh);
146144
cgh.parallel_for<class logic>(r, [=](id<1> index) {
147-
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
148-
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
145+
sycl::ext::oneapi::bfloat16 AVal{A[index]};
146+
sycl::ext::oneapi::bfloat16 BVal{B[index]};
149147
if (AVal) {
150148
if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal ||
151149
!BVal) {
152-
sycl::ext::oneapi::experimental::bfloat16 CVal =
153-
AVal != BVal ? AVal : BVal;
150+
sycl::ext::oneapi::bfloat16 CVal = AVal != BVal ? AVal : BVal;
154151
CVal--;
155152
CVal++;
156153
if (AVal == BVal) {
157154
CVal -= AVal;
158-
CVal *= 3.0;
159-
CVal /= 2.0;
155+
CVal *= 3.0f;
156+
CVal /= 2.0f;
160157
} else
161158
CVal += BVal;
162159
C[index] = CVal;
@@ -179,9 +176,9 @@ int run_tests() {
179176
return 0;
180177
}
181178

182-
std::vector<float> vec_a(N, 5.0);
183-
std::vector<float> vec_b(N, 2.0);
184-
std::vector<float> vec_b_neg(N, -2.0);
179+
std::vector<float> vec_a(N, 5.0f);
180+
std::vector<float> vec_b(N, 2.0f);
181+
std::vector<float> vec_b_neg(N, -2.0f);
185182

186183
range<1> r(N);
187184
buffer<float, 1> a{vec_a.data(), r};
@@ -190,19 +187,32 @@ int run_tests() {
190187

191188
queue q{dev};
192189

193-
verify_conv_implicit(q, a, r, 5.0);
194-
verify_conv_explicit(q, a, r, 5.0);
195-
verify_add(q, a, b, r, 7.0);
196-
verify_sub(q, a, b, r, 3.0);
197-
verify_mul(q, a, b, r, 10.0);
198-
verify_div(q, a, b, r, 2.5);
199-
verify_logic(q, a, b, r, 7.0);
200-
verify_add(q, a, b_neg, r, 3.0);
201-
verify_sub(q, a, b_neg, r, 7.0);
202-
verify_minus(q, a, r, -5.0);
203-
verify_mul(q, a, b_neg, r, -10.0);
204-
verify_div(q, a, b_neg, r, -2.5);
205-
verify_logic(q, a, b_neg, r, 3.0);
190+
verify_conv_implicit(q, a, r, 5.0f);
191+
std::cout << "PASS verify_conv_implicit\n";
192+
verify_conv_explicit(q, a, r, 5.0f);
193+
std::cout << "PASS verify_conv_explicit\n";
194+
verify_add(q, a, b, r, 7.0f);
195+
std::cout << "PASS verify_add\n";
196+
verify_sub(q, a, b, r, 3.0f);
197+
std::cout << "PASS verify_sub\n";
198+
verify_mul(q, a, b, r, 10.0f);
199+
std::cout << "PASS verify_mul\n";
200+
verify_div(q, a, b, r, 2.5f);
201+
std::cout << "PASS verify_div\n";
202+
verify_logic(q, a, b, r, 7.0f);
203+
std::cout << "PASS verify_logic\n";
204+
verify_add(q, a, b_neg, r, 3.0f);
205+
std::cout << "PASS verify_add\n";
206+
verify_sub(q, a, b_neg, r, 7.0f);
207+
std::cout << "PASS verify_sub\n";
208+
verify_minus(q, a, r, -5.0f);
209+
std::cout << "PASS verify_minus\n";
210+
verify_mul(q, a, b_neg, r, -10.0f);
211+
std::cout << "PASS verify_mul\n";
212+
verify_div(q, a, b_neg, r, -2.5f);
213+
std::cout << "PASS verify_div\n";
214+
verify_logic(q, a, b_neg, r, 3.0f);
215+
std::cout << "PASS verify_logic\n";
206216

207217
return 0;
208218
}

SYCL/BFloat16/bfloat16_type_cuda.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,4 @@
1212

1313
#include "bfloat16_type.hpp"
1414

15-
int main() {
16-
bool has_bfloat16_aspect = false;
17-
for (const auto &plt : sycl::platform::get_platforms()) {
18-
if (plt.has(aspect::ext_oneapi_bfloat16))
19-
has_bfloat16_aspect = true;
20-
}
21-
22-
if (has_bfloat16_aspect)
23-
return run_tests();
24-
}
15+
int main() { return run_tests(); }

0 commit comments

Comments
 (0)