Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Test corrections after moving bfloat16 support out of experimental status. #1129

Merged
merged 92 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
92 commits
Select commit Hold shift + click to select a range
f6bb8a3
[SYCL] Localize variables declared in inline asms.
rdeodhar Jan 20, 2021
bb523e8
[SYCL] Test for disabling range rounding.
rdeodhar Jan 25, 2021
d2d1da9
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Jan 26, 2021
cda63e1
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Jan 26, 2021
69b6a46
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Feb 4, 2021
d49e0f7
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Mar 23, 2021
9af3a0d
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Mar 30, 2021
1e7b325
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Apr 1, 2021
acd2770
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Apr 6, 2021
a9b4c23
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Apr 22, 2021
49d9fca
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar May 8, 2021
8d3654d
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Jun 7, 2021
47f7ef9
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Aug 18, 2021
03a870a
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 8, 2021
23b318e
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 5, 2021
7dcf812
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Feb 9, 2022
37b6097
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Feb 24, 2022
84b320a
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Mar 4, 2022
2659c72
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Mar 23, 2022
de6173b
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Apr 7, 2022
fb1b230
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Apr 26, 2022
45b3951
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Jul 13, 2022
a4f01f3
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Aug 4, 2022
d69e8a1
[SYCL] Adjustments to tests to account for moving bfloat16 support ou…
rdeodhar Aug 4, 2022
8da975e
Formatting change.
rdeodhar Aug 4, 2022
9e34353
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Aug 24, 2022
d7b1b15
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Aug 25, 2022
aa5f69c
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Aug 31, 2022
0717d52
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 8, 2022
29452c0
Corrections for bfloat16 moved out of experimental.
rdeodhar Sep 9, 2022
b2ade7a
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 12, 2022
bef81ca
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 14, 2022
63b05d1
Correct test to run on multiple root devices.
rdeodhar Sep 14, 2022
17ad74e
Merge branch 'intel' of https://github.com/rdeodhar/llvm-test-suite i…
rdeodhar Sep 16, 2022
1ba36eb
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 16, 2022
cb629ff
Removed test correction since test is now disabled.
rdeodhar Sep 16, 2022
6b0e62b
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 16, 2022
30b81b1
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 20, 2022
df36f2d
Modifications and additions to bfloat16 tests.
rdeodhar Sep 20, 2022
5bf2384
Formatting change.
rdeodhar Sep 20, 2022
e9683a1
Formatting change.
rdeodhar Sep 20, 2022
e7a9191
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 21, 2022
603d4b6
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 22, 2022
23077db
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 22, 2022
c066b3a
Enable bfloat16 test on GPU only.
rdeodhar Sep 23, 2022
44e492b
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 26, 2022
3028e7e
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 27, 2022
b0c0848
Reenable bfloat16 test on CPU.
rdeodhar Sep 27, 2022
6cba127
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 27, 2022
64e4bbd
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Sep 29, 2022
7b0c9a2
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 1, 2022
8f51bc2
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 4, 2022
49a522a
Corrected test to run on host and device.
rdeodhar Oct 4, 2022
fcd6c3e
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 4, 2022
7e84648
Update SYCL/BFloat16/bfloat16_type.cpp
rdeodhar Oct 4, 2022
c60ab99
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 4, 2022
314719e
Merge branch 'l30' of https://github.com/rdeodhar/llvm-test-suite int…
rdeodhar Oct 4, 2022
3bb080c
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 6, 2022
2b77ff2
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 7, 2022
dfc9a67
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 8, 2022
e021ad7
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 12, 2022
af254b3
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 24, 2022
8f492d6
Added test for use of fallback bfloat16 library.
rdeodhar Oct 25, 2022
4805061
Formatting change
rdeodhar Oct 25, 2022
33c7d2b
Enhanced the bfloat16 fallback test.
rdeodhar Oct 25, 2022
21dcd20
Formatting change.
rdeodhar Oct 25, 2022
cbfe491
Adjusted bfloat16 aspect.
rdeodhar Oct 25, 2022
3721b97
Changes to test to set up required environment.
rdeodhar Oct 26, 2022
44c6ccb
Adjustment to test.
rdeodhar Oct 26, 2022
c7d9386
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 26, 2022
c3155bf
Test adjustment.
rdeodhar Oct 26, 2022
f2edee8
Reenabled some tests with specific GPU requirement.
rdeodhar Oct 26, 2022
9634b60
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Oct 27, 2022
6ee676a
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 7, 2022
2c6f4ce
Changes for bfloat16 moved out of experimental.
rdeodhar Nov 7, 2022
43768d8
Aspect bfloat16 has been removed.
rdeodhar Nov 8, 2022
5ba6d74
Aspect bfloat16 has been replaced by bfloat16_math_functions.
rdeodhar Nov 8, 2022
ccc85df
Check aspect fp16 before using sycl::half.
rdeodhar Nov 8, 2022
3b60cb1
Account for lack of fp16 support on some devices.
rdeodhar Nov 8, 2022
1b0d6f2
Reduce expected precision of bfloat16 calculations.
rdeodhar Nov 9, 2022
0bc6438
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 9, 2022
8d9f34f
Adjustments for bfloat16 header.
rdeodhar Nov 9, 2022
8792d0a
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 15, 2022
f1975c5
Test adjustment.
rdeodhar Nov 15, 2022
80c3468
Replace double constants with float.
rdeodhar Nov 18, 2022
db0a1bc
Replace double constants with float.
rdeodhar Nov 18, 2022
f6c14e0
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 18, 2022
33a36ce
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 21, 2022
8511e52
Fix test to return 0 from main.
rdeodhar Nov 22, 2022
6564c68
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 22, 2022
8d44444
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 23, 2022
6928b2a
Merge branch 'intel' of https://github.com/intel/llvm-test-suite into…
rdeodhar Nov 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions SYCL/BFloat16/bfloat16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <vector>

using namespace sycl;
using namespace sycl::ext::oneapi::experimental;
using namespace sycl::ext::oneapi;

constexpr int N = 60; // divisible by all tested array sizes
constexpr float bf16_eps = 0.00390625;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Line 225, should it be ext_oneapi_bfloat16_math_functions?

Thanks

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it should be ext_oneapi_bfloat16_math_functions. Made the change.

Expand Down Expand Up @@ -222,7 +222,7 @@ bool check(float a, float b) {
int main() {
queue q;

if (q.get_device().has(aspect::ext_oneapi_bfloat16)) {
if (q.get_device().has(aspect::ext_oneapi_bfloat16_math_functions)) {
std::vector<float> a(N), b(N), c(N);
int err = 0;

Expand Down
70 changes: 70 additions & 0 deletions SYCL/BFloat16/bfloat16_conversions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// UNSUPPORTED: hip
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out
// Currently the feature is supported only on CPU and GPU, natively or by
// software emulation.
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

//==---------- bfloat16_conversions.cpp - SYCL bfloat16 type test ---------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//

#include <iostream>
#include <sycl/sycl.hpp>

using namespace sycl;

template <typename T> T calculate(T a, T b) {
sycl::ext::oneapi::bfloat16 x = -a;
sycl::ext::oneapi::bfloat16 y = b;
sycl::ext::oneapi::bfloat16 z = x + y;
T result = z;
return result;
}

template <typename T> int test_device(queue Q) {
T data[3] = {-7.0f, 8.1f, 0.0f};

buffer<T, 1> buf{data, 3};
Q.submit([&](handler &cgh) {
accessor numbers{buf, cgh, read_write};
cgh.single_task([=]() { numbers[2] = calculate(numbers[0], numbers[1]); });
});

host_accessor hostOutAcc{buf, read_only};
std::cout << "Device Result = " << hostOutAcc[2] << std::endl;
if (hostOutAcc[2] == 15.125f)
return 0;
return 1;
}

template <typename T> int test_host() {
T a{-5.6f};
T b{-1.1f};
T result = calculate(a, b);
std::cout << "Host Result = " << result << std::endl;
if (result == 4.5f)
return 0;
return 1;
}

int main() {
queue Q;
int result;
result = test_host<sycl::half>();
result |= test_host<float>();
if (Q.get_device().has(aspect::fp16))
result |= test_device<sycl::half>(Q);
result |= test_device<float>(Q);
if (result)
std::cout << "FAIL\n";
else
std::cout << "PASS\n";

return result;
}
83 changes: 83 additions & 0 deletions SYCL/BFloat16/bfloat16_example.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
///
/// Check if bfloat16 example works using fallback libraries
///

// REQUIRES: opencl-aot, ocloc, cpu, gpu-intel-gen9
// UNSUPPORTED: cuda
// CUDA is not compatible with SPIR.

// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device gen9" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend "-device *" %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device gen9" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can add the following test cases:

// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

These are interesting because they compile for PVC (native bfloat support) and run on CPU (fallback support).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen "-device pvc" %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out

#include <sycl/sycl.hpp>

using namespace sycl;
using sycl::ext::oneapi::bfloat16;

float foo(float a, float b) {
// Convert from float to bfloat16.
bfloat16 A{a};
bfloat16 B{b};

// Convert A and B from bfloat16 to float, do addition on floating-point
// numbers, then convert the result to bfloat16 and store it in C.
bfloat16 C = A + B;

// Return the result converted from bfloat16 to float.
return C;
}

int main(int argc, char *argv[]) {
float data[3] = {7.0f, 8.1f, 0.0f};

float result_host = foo(7.0f, 8.1f);
std::cout << "CPU Result = " << result_host << std::endl;
if (std::abs(15.1f - result_host) > 0.1f) {
std::cout << "Test failed. Expected CPU Result ~= 15.1" << std::endl;
return 1;
}

queue deviceQueue;
buffer<float, 1> buf{data, 3};

deviceQueue.submit([&](handler &cgh) {
accessor numbers{buf, cgh, read_write};
cgh.single_task([=]() { numbers[2] = foo(numbers[0], numbers[1]); });
});

host_accessor hostOutAcc{buf, read_only};
float result_device = hostOutAcc[2];
std::cout << "GPU Result = " << result_device << std::endl;
if (std::abs(result_host - result_device) > 0.1f) {
std::cout << "Test failed. CPU Result !~= GPU result" << std::endl;
return 1;
}

return 0;
}
15 changes: 8 additions & 7 deletions SYCL/BFloat16/bfloat16_type.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// TODO currently the feature isn't supported on most of the devices
// need to enable the test when the aspect and device_if feature are
// introduced
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
// UNSUPPORTED: hip
// RUN: %if cuda %{%clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 %s -o %t.out %}
// TODO enable the below when CI supports >=sm_80
// RUNx: %if cuda %{%GPU_RUN_PLACEHOLDER %t.out %}
// RUN: %clangxx -fsycl %s -o %t.out
// TODO currently the feature isn't supported on FPGA.
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUNx: %ACC_RUN_PLACEHOLDER %t.out

//==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==//
Expand Down
96 changes: 53 additions & 43 deletions SYCL/BFloat16/bfloat16_type.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <iostream>
#include <sycl/ext/oneapi/experimental/bfloat16.hpp>
#include <sycl/ext/oneapi/bfloat16.hpp>
#include <sycl/sycl.hpp>

#include <cmath>
Expand All @@ -11,8 +11,7 @@ constexpr size_t N = 100;
template <typename T> void assert_close(const T &C, const float ref) {
for (size_t i = 0; i < N; i++) {
auto diff = C[i] - ref;
assert(std::fabs(static_cast<float>(diff)) <
std::numeric_limits<float>::epsilon());
assert(std::fabs(static_cast<float>(diff)) < 0.1);
}
}

Expand All @@ -21,7 +20,7 @@ void verify_conv_implicit(queue &q, buffer<float, 1> &a, range<1> &r,
q.submit([&](handler &cgh) {
auto A = a.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<class calc_conv>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 AVal{A[index]};
A[index] = AVal;
});
});
Expand All @@ -34,9 +33,8 @@ void verify_conv_explicit(queue &q, buffer<float, 1> &a, range<1> &r,
q.submit([&](handler &cgh) {
auto A = a.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<class calc_conv_impl>(r, [=](id<1> index) {
uint16_t AVal =
sycl::ext::oneapi::experimental::bfloat16::from_float(A[index]);
A[index] = sycl::ext::oneapi::experimental::bfloat16::to_float(AVal);
sycl::ext::oneapi::bfloat16 AVal = A[index];
A[index] = float(AVal);
});
});

Expand All @@ -52,9 +50,9 @@ void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_add_expl>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal + BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal + BVal;
C[index] = CVal;
});
});
Expand All @@ -71,9 +69,9 @@ void verify_sub(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_sub>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal - BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal - BVal;
C[index] = CVal;
});
});
Expand All @@ -88,8 +86,8 @@ void verify_minus(queue &q, buffer<float, 1> &a, range<1> &r, const float ref) {
auto A = a.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_minus>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = -AVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 CVal = -AVal;
C[index] = CVal;
});
});
Expand All @@ -106,9 +104,9 @@ void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_mul>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal * BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal * BVal;
C[index] = CVal;
});
});
Expand All @@ -125,9 +123,9 @@ void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class calc_div>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::experimental::bfloat16 CVal = AVal / BVal;
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 CVal = AVal / BVal;
C[index] = CVal;
});
});
Expand All @@ -144,19 +142,18 @@ void verify_logic(queue &q, buffer<float, 1> &a, buffer<float, 1> &b,
auto B = b.get_access<access::mode::read>(cgh);
auto C = c.get_access<access::mode::write>(cgh);
cgh.parallel_for<class logic>(r, [=](id<1> index) {
sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]};
sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]};
sycl::ext::oneapi::bfloat16 AVal{A[index]};
sycl::ext::oneapi::bfloat16 BVal{B[index]};
if (AVal) {
if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal ||
!BVal) {
sycl::ext::oneapi::experimental::bfloat16 CVal =
AVal != BVal ? AVal : BVal;
sycl::ext::oneapi::bfloat16 CVal = AVal != BVal ? AVal : BVal;
CVal--;
CVal++;
if (AVal == BVal) {
CVal -= AVal;
CVal *= 3.0;
CVal /= 2.0;
CVal *= 3.0f;
CVal /= 2.0f;
} else
CVal += BVal;
C[index] = CVal;
Expand All @@ -179,9 +176,9 @@ int run_tests() {
return 0;
}

std::vector<float> vec_a(N, 5.0);
std::vector<float> vec_b(N, 2.0);
std::vector<float> vec_b_neg(N, -2.0);
std::vector<float> vec_a(N, 5.0f);
std::vector<float> vec_b(N, 2.0f);
std::vector<float> vec_b_neg(N, -2.0f);

range<1> r(N);
buffer<float, 1> a{vec_a.data(), r};
Expand All @@ -190,19 +187,32 @@ int run_tests() {

queue q{dev};

verify_conv_implicit(q, a, r, 5.0);
verify_conv_explicit(q, a, r, 5.0);
verify_add(q, a, b, r, 7.0);
verify_sub(q, a, b, r, 3.0);
verify_mul(q, a, b, r, 10.0);
verify_div(q, a, b, r, 2.5);
verify_logic(q, a, b, r, 7.0);
verify_add(q, a, b_neg, r, 3.0);
verify_sub(q, a, b_neg, r, 7.0);
verify_minus(q, a, r, -5.0);
verify_mul(q, a, b_neg, r, -10.0);
verify_div(q, a, b_neg, r, -2.5);
verify_logic(q, a, b_neg, r, 3.0);
verify_conv_implicit(q, a, r, 5.0f);
std::cout << "PASS verify_conv_implicit\n";
verify_conv_explicit(q, a, r, 5.0f);
std::cout << "PASS verify_conv_explicit\n";
verify_add(q, a, b, r, 7.0f);
std::cout << "PASS verify_add\n";
verify_sub(q, a, b, r, 3.0f);
std::cout << "PASS verify_sub\n";
verify_mul(q, a, b, r, 10.0f);
std::cout << "PASS verify_mul\n";
verify_div(q, a, b, r, 2.5f);
std::cout << "PASS verify_div\n";
verify_logic(q, a, b, r, 7.0f);
std::cout << "PASS verify_logic\n";
verify_add(q, a, b_neg, r, 3.0f);
std::cout << "PASS verify_add\n";
verify_sub(q, a, b_neg, r, 7.0f);
std::cout << "PASS verify_sub\n";
verify_minus(q, a, r, -5.0f);
std::cout << "PASS verify_minus\n";
verify_mul(q, a, b_neg, r, -10.0f);
std::cout << "PASS verify_mul\n";
verify_div(q, a, b_neg, r, -2.5f);
std::cout << "PASS verify_div\n";
verify_logic(q, a, b_neg, r, 3.0f);
std::cout << "PASS verify_logic\n";

return 0;
}
11 changes: 1 addition & 10 deletions SYCL/BFloat16/bfloat16_type_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,4 @@

#include "bfloat16_type.hpp"

int main() {
bool has_bfloat16_aspect = false;
for (const auto &plt : sycl::platform::get_platforms()) {
if (plt.has(aspect::ext_oneapi_bfloat16))
has_bfloat16_aspect = true;
}

if (has_bfloat16_aspect)
return run_tests();
}
int main() { return run_tests(); }
Loading