This repository was archived by the owner on Mar 28, 2023. It is now read-only.
forked from llvm/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 130
[SYCL] Add BFloat16 feature end-to-end test #390
Merged
Merged
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
636443f
[SYCL] Add BFloat16 feature end-to-end test
MrSidims 1330c1a
Extend the test
MrSidims 6f65975
Address comments and resolve several issues
MrSidims dc13248
Fix clang-format
MrSidims e2210cc
Aspect -> is_gpu
MrSidims 48b9fcb
Add __SYCL_ENABLE_BF16_CONVERSION__
MrSidims 72909e4
Fix
MrSidims dc15733
Revert "Fix"
MrSidims 962daa9
Revert "Add __SYCL_ENABLE_BF16_CONVERSION__"
MrSidims 871d517
Fix logic test
MrSidims 7d3aa9a
Disable test
MrSidims 9092c64
Add codeowners
MrSidims 4e5d40e
Disable on CUDA
MrSidims File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
// UNSUPPORTED: cuda | ||
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out | ||
// TODO currently the feature isn't supported on most of the devices | ||
// need to enable the test when the aspect and device_if feature are | ||
// introduced | ||
// RUNx: %CPU_RUN_PLACEHOLDER %t.out | ||
// RUNx: %GPU_RUN_PLACEHOLDER %t.out | ||
// RUNx: %ACC_RUN_PLACEHOLDER %t.out | ||
|
||
//==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <CL/sycl.hpp> | ||
#include <sycl/ext/intel/experimental/bfloat16.hpp> | ||
|
||
#include <cmath> | ||
|
||
using namespace cl::sycl; | ||
|
||
constexpr size_t N = 100; | ||
|
||
template <typename T> void assert_close(const T &C, const float ref) { | ||
for (size_t i = 0; i < N; i++) { | ||
auto diff = C[i] - ref; | ||
assert(std::fabs(static_cast<float>(diff)) < | ||
std::numeric_limits<float>::epsilon()); | ||
} | ||
} | ||
|
||
void verify_conv_implicit(queue &q, buffer<float, 1> &a, range<1> &r, | ||
const float ref) { | ||
q.submit([&](handler &cgh) { | ||
auto A = a.get_access<access::mode::read_write>(cgh); | ||
cgh.parallel_for<class calc_conv>(r, [=](id<1> index) { | ||
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; | ||
A[index] = AVal; | ||
}); | ||
}); | ||
|
||
assert_close(a.get_access<access::mode::read>(), ref); | ||
} | ||
|
||
void verify_conv_explicit(queue &q, buffer<float, 1> &a, range<1> &r, | ||
const float ref) { | ||
q.submit([&](handler &cgh) { | ||
auto A = a.get_access<access::mode::read_write>(cgh); | ||
cgh.parallel_for<class calc_conv_impl>(r, [=](id<1> index) { | ||
uint16_t AVal = | ||
cl::sycl::ext::intel::experimental::bfloat16::from_float(A[index]); | ||
A[index] = cl::sycl::ext::intel::experimental::bfloat16::to_float(AVal); | ||
}); | ||
}); | ||
|
||
assert_close(a.get_access<access::mode::read>(), ref); | ||
} | ||
|
||
void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, | ||
const float ref) { | ||
buffer<float, 1> c{r}; | ||
|
||
q.submit([&](handler &cgh) { | ||
auto A = a.get_access<access::mode::read>(cgh); | ||
auto B = b.get_access<access::mode::read>(cgh); | ||
auto C = c.get_access<access::mode::write>(cgh); | ||
cgh.parallel_for<class calc_add_expl>(r, [=](id<1> index) { | ||
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal + BVal; | ||
C[index] = CVal; | ||
}); | ||
}); | ||
|
||
assert_close(c.get_access<access::mode::read>(), ref); | ||
} | ||
|
||
void verify_sub(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, | ||
const float ref) { | ||
buffer<float, 1> c{r}; | ||
|
||
q.submit([&](handler &cgh) { | ||
auto A = a.get_access<access::mode::read>(cgh); | ||
auto B = b.get_access<access::mode::read>(cgh); | ||
auto C = c.get_access<access::mode::write>(cgh); | ||
cgh.parallel_for<class calc_sub>(r, [=](id<1> index) { | ||
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal - BVal; | ||
C[index] = CVal; | ||
}); | ||
}); | ||
|
||
assert_close(c.get_access<access::mode::read>(), ref); | ||
} | ||
|
||
void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, | ||
const float ref) { | ||
buffer<float, 1> c{r}; | ||
|
||
q.submit([&](handler &cgh) { | ||
auto A = a.get_access<access::mode::read>(cgh); | ||
auto B = b.get_access<access::mode::read>(cgh); | ||
auto C = c.get_access<access::mode::write>(cgh); | ||
cgh.parallel_for<class calc_mul>(r, [=](id<1> index) { | ||
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal * BVal; | ||
C[index] = CVal; | ||
}); | ||
}); | ||
|
||
assert_close(c.get_access<access::mode::read>(), ref); | ||
} | ||
|
||
void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, | ||
const float ref) { | ||
buffer<float, 1> c{r}; | ||
|
||
q.submit([&](handler &cgh) { | ||
auto A = a.get_access<access::mode::read>(cgh); | ||
auto B = b.get_access<access::mode::read>(cgh); | ||
auto C = c.get_access<access::mode::write>(cgh); | ||
cgh.parallel_for<class calc_div>(r, [=](id<1> index) { | ||
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal / BVal; | ||
C[index] = CVal; | ||
}); | ||
}); | ||
|
||
assert_close(c.get_access<access::mode::read>(), ref); | ||
} | ||
|
||
void verify_logic(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, | ||
range<1> &r, const float ref) { | ||
buffer<float, 1> c{r}; | ||
|
||
q.submit([&](handler &cgh) { | ||
auto A = a.get_access<access::mode::read>(cgh); | ||
auto B = b.get_access<access::mode::read>(cgh); | ||
auto C = c.get_access<access::mode::write>(cgh); | ||
cgh.parallel_for<class logic>(r, [=](id<1> index) { | ||
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; | ||
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; | ||
if (AVal) { | ||
if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal || | ||
!BVal) { | ||
cl::sycl::ext::intel::experimental::bfloat16 CVal = | ||
AVal != BVal ? AVal : BVal; | ||
CVal--; | ||
CVal++; | ||
if (AVal == BVal) { | ||
CVal -= AVal; | ||
CVal *= 3.0; | ||
CVal /= 2.0; | ||
} else | ||
CVal += BVal; | ||
} | ||
} | ||
}); | ||
}); | ||
|
||
assert_close(c.get_access<access::mode::read>(), ref); | ||
} | ||
|
||
int main() { | ||
device dev{default_selector()}; | ||
|
||
// TODO: replace is_gpu check with extension check when the appropriate part | ||
// of implementation ready (aspect) | ||
if (!dev.is_gpu()) { | ||
std::cout << "This device doesn't support bfloat16 conversion feature" | ||
<< std::endl; | ||
return 0; | ||
} | ||
|
||
std::vector<float> vec_a(N, 5.0); | ||
std::vector<float> vec_b(N, 2.0); | ||
std::vector<float> vec_b_neg(N, -2.0); | ||
|
||
range<1> r(N); | ||
buffer<float, 1> a{vec_a.data(), r}; | ||
buffer<float, 1> b{vec_b.data(), r}; | ||
buffer<float, 1> b_neg{vec_b_neg.data(), r}; | ||
|
||
queue q{dev}; | ||
|
||
verify_conv_implicit(q, a, r, 5.0); | ||
verify_conv_explicit(q, a, r, 5.0); | ||
verify_add(q, a, b, r, 7.0); | ||
verify_sub(q, a, b, r, 3.0); | ||
verify_mul(q, a, b, r, 10.0); | ||
verify_div(q, a, b, r, 2.5); | ||
verify_logic(q, a, b, r, 7.0); | ||
verify_add(q, a, b_neg, r, 3.0); | ||
verify_sub(q, a, b_neg, r, 7.0); | ||
verify_mul(q, a, b_neg, r, -10.0); | ||
verify_div(q, a, b_neg, r, -2.5); | ||
verify_logic(q, a, b_neg, r, 3.0); | ||
|
||
return 0; | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.