Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[ESIMD] Add bfloat16 test cases. #1193

Merged
merged 6 commits into from
Sep 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 60 additions & 18 deletions SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//==--------------- bin_un_cmp_ops_heavy.cpp - DPC++ ESIMD on-device test -==//
//==-------------- bin_and_cmp_ops_heavy.cpp - DPC++ ESIMD on-device test -==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// Exclude PVC not to run same test cases twice (via the *_pvc.cpp variant).
// REQUIRES: gpu && !gpu-intel-pvc
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
Expand All @@ -29,6 +30,7 @@

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;

template <class T1, class T2, int VL, class OpClass, class Ops> class TestID;

Expand Down Expand Up @@ -68,9 +70,11 @@ template <class T1, class T2, int VL, class OpClass,
template <class, class, class> class VerifyF,
template <class, class, class> class InitF, class Ops>
bool test(Ops ops, queue &q, comp_t<T1, T2, OpClass> epsilon = 0) {
using T = comp_t<T1, T2, OpClass>;
// Log test case info
std::cout << "Testing T1=" << typeid(T1).name() << " T2=" << typeid(T2).name()
<< ", VL=" << VL << " ...\n";
std::cout << "Testing T1=" << esimd_test::type_name<T1>()
<< " T2=" << esimd_test::type_name<T2>() << ", VL=" << VL
<< " comp type: " << esimd_test::type_name<T>() << "...\n";
std::cout << "Operations:";
esimd_test::iterate_ops(ops, [=](OpClass op) {
std::cout << " '" << esimd_test::Op2Str(op) << "'";
Expand All @@ -83,7 +87,6 @@ bool test(Ops ops, queue &q, comp_t<T1, T2, OpClass> epsilon = 0) {
T2 *B = sycl::malloc_shared<T2>(Size, q);
constexpr int NumOps = (int)Ops::size;
int CSize = NumOps * Size;
using T = comp_t<T1, T2, OpClass>;
// Result array. For each pair of A[i] and B[i] elements it reserves NumOps
// elements to store result of all operations under test applied to the A[i]
// and B[i]
Expand Down Expand Up @@ -181,19 +184,19 @@ template <class T1, class T2, class OpClass> struct verify_strict {
bool operator()(T res, T gold, OpClass op) { return res == gold; }
};

#define EQ(x, y, epsilon) \
((x) > (y) ? (x) - (y) <= epsilon : (y) - (x) <= epsilon)
#define EQ(x, gold, epsilon) \
((x == gold) || (std::abs((double)(x - gold) / (double)gold) <= epsilon))

template <class T1, class T2, class OpClass> struct verify_epsilon {
template <class T1, class T2, class OpClass, bool AllOps = false>
struct verify_epsilon {
using T = comp_t<T1, T2, OpClass>;
T epsilon;
verify_epsilon(T epsilon) : epsilon(epsilon) {}
double epsilon;
verify_epsilon(double epsilon) : epsilon(epsilon) {}

bool operator()(T res, T gold, OpClass op) {
if constexpr (std::is_same_v<OpClass, esimd_test::BinaryOp>) {
if (op == esimd_test::BinaryOp::div) {
return EQ(res, gold, epsilon);
}
if (AllOps || ((std::is_same_v<OpClass, esimd_test::BinaryOp>)&&(
op == esimd_test::BinaryOp::div))) {
return EQ(res, gold, epsilon);
}
return res == gold;
}
Expand Down Expand Up @@ -245,6 +248,8 @@ template <class T1, class T2, class OpClass> struct init_for_shift {
// shortcuts for less clutter
template <class T1, class T2, class C> using VSf = verify_strict<T1, T2, C>;
template <class T1, class T2, class C> using VEf = verify_epsilon<T1, T2, C>;
template <class T1, class T2, class C>
using VEfa = verify_epsilon<T1, T2, C, true>;
template <class T1, class T2, class C> using VNf = verify_n<T1, T2, C>;
template <class T1, class T2, class C> using IDf = init_default<T1, T2, C>;
template <class T1, class T2, class C> using ISf = init_for_shift<T1, T2, C>;
Expand All @@ -257,7 +262,7 @@ int main(void) {
bool passed = true;
using BinOp = esimd_test::BinaryOp;

auto arith_ops = esimd_test::ArithBinaryOps;
auto arith_ops = esimd_test::ArithBinaryOpsNoDiv;
passed &= test<unsigned char, int, 1, BinOp, VSf, IDf>(arith_ops, q);
passed &= test<char, float, 7, BinOp, VEf, IDf>(arith_ops, q, 0.000001f);
passed &= test<short, double, 7, BinOp, VEf, IDf>(arith_ops, q, 1e-15);
Expand All @@ -266,16 +271,49 @@ int main(void) {
passed &= test<half, unsigned int, 32, BinOp, VSf, IDf>(arith_ops, q, 1);
passed &= test<double, half, 7, BinOp, VSf, IDf>(arith_ops, q);
passed &= test<short, uint64_t, 7, BinOp, VSf, IDf>(arith_ops, q);

auto int_ops =
esimd_test::IntBinaryOpsNoShift; // different data needed for shift
#ifdef USE_BF16
passed &= test<bfloat16, int, 8, BinOp, VSf, IDf>(arith_ops, q);
passed &= test<half, bfloat16, 7, BinOp, VEfa, IDf>(arith_ops, q, 0.03);
#endif // USE_BF16

// Test division separately, as error probability is higher.
auto div_op = esimd_test::BinaryOpSeq<BinOp::div>{};
passed &= test<unsigned char, int, 1, BinOp, VSf, IDf>(div_op, q);
passed &= test<char, float, 7, BinOp, VEf, IDf>(div_op, q, 0.000001f);
#ifndef WA_BUG
passed &= test<short, double, 7, BinOp, VSf, IDf>(div_op, q);
#endif // WA_BUG
passed &= test<float, float, 32, BinOp, VEf, IDf>(div_op, q, 0.000001f);
passed &= test<half, char, 1, BinOp, verify_n, IDf>(div_op, q, 1);
passed &= test<half, unsigned int, 32, BinOp, VSf, IDf>(div_op, q, 1);
#ifndef WA_BUG
passed &= test<double, half, 7, BinOp, VSf, IDf>(div_op, q);
#endif // WA_BUG
passed &= test<short, uint64_t, 7, BinOp, VSf, IDf>(div_op, q);
#ifdef USE_BF16
passed &= test<bfloat16, short, 8, BinOp, VSf, IDf>(div_op, q);
passed &= test<half, bfloat16, 7, BinOp, VEfa, IDf>(div_op, q, 0.03);
#endif // USE_BF16

auto int_ops = esimd_test::IntBinaryOpsNoShiftNoDivRem;
passed &= test<unsigned char, unsigned int, 1, BinOp, VSf, IDf>(int_ops, q);
passed &= test<char, uint64_t, 1, BinOp, VSf, IDf>(int_ops, q);
passed &= test<uint64_t, char, 32, BinOp, VSf, IDf>(int_ops, q);
passed &= test<int, short, 1, BinOp, VSf, IDf>(int_ops, q);
passed &= test<short, int, 8, BinOp, VSf, IDf>(int_ops, q);
passed &= test<int, int, 7, BinOp, VSf, IDf>(int_ops, q);

auto int_div_ops = esimd_test::IntBinaryOpsDivRem;
passed &=
test<unsigned char, unsigned int, 1, BinOp, VSf, IDf>(int_div_ops, q);
#ifndef WA_BUG
passed &= test<char, uint64_t, 1, BinOp, VSf, IDf>(int_div_ops, q);
#endif // WA_BUG
passed &= test<uint64_t, char, 32, BinOp, VSf, IDf>(int_div_ops, q);
passed &= test<int, short, 1, BinOp, VSf, IDf>(int_div_ops, q);
passed &= test<short, int, 8, BinOp, VSf, IDf>(int_div_ops, q);
passed &= test<int, int, 7, BinOp, VSf, IDf>(int_div_ops, q);

auto sh_ops = esimd_test::BinaryOpSeq<BinOp::shl, BinOp::shr>{};
passed &= test<unsigned char, unsigned int, 1, BinOp, VSf, ISf>(sh_ops, q);
passed &= test<char, int64_t, 1, BinOp, VSf, ISf>(sh_ops, q);
Expand All @@ -294,6 +332,10 @@ int main(void) {
passed &= test<half, unsigned int, 32, CmpOp, VSf, IDf>(cmp_ops, q, 1);
passed &= test<double, half, 7, CmpOp, VSf, IDf>(cmp_ops, q);
passed &= test<short, uint64_t, 7, CmpOp, VSf, IDf>(cmp_ops, q);
#ifdef USE_BF16
passed &= test<bfloat16, int, 32, CmpOp, VSf, IDf>(cmp_ops, q);
passed &= test<half, bfloat16, 7, CmpOp, VSf, IDf>(cmp_ops, q);
#endif // USE_BF16

std::cout << (passed ? "Test PASSED\n" : "Test FAILED\n");
return passed ? 0 : 1;
Expand Down
20 changes: 20 additions & 0 deletions SYCL/ESIMD/api/bin_and_cmp_ops_heavy_pvc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//==---------- bin_and_cmp_ops_heavy_pvc.cpp - DPC++ ESIMD on-device test -==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// Tests various binary operations applied to simd objects.
// PVC variant of the test - adds bfloat16.

// TODO Re-enable cases disabled via WA_BUG.

#define USE_BF16
#define WA_BUG

#include "bin_and_cmp_ops_heavy.cpp"
2 changes: 2 additions & 0 deletions SYCL/ESIMD/api/replicate_smoke.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;

template <class T> struct char_to_int {
using type = typename std::conditional<
Expand Down Expand Up @@ -178,6 +179,7 @@ int main(int argc, char **argv) {
bool passed = true;

passed &= test<half>(q);
passed &= test<bfloat16>(q);
passed &= test<unsigned char>(q);
passed &= test<short>(q);
passed &= test<unsigned short>(q);
Expand Down
3 changes: 3 additions & 0 deletions SYCL/ESIMD/api/simd_copy_to_from.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
using namespace sycl;
using namespace sycl::ext::intel;
using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;

template <typename T, int N, typename Flags>
bool testUSM(queue &Q, T *Src, T *Dst, unsigned Off, Flags) {
Expand Down Expand Up @@ -250,9 +251,11 @@ int main(void) {
#else
Pass &= testUSM<uint16_t>(Q);
Pass &= testUSM<float>(Q);
Pass &= testUSM<bfloat16>(Q);

Pass &= testAcc<int16_t>(Q);
Pass &= testAcc<float>(Q);
Pass &= testAcc<bfloat16>(Q);
#endif

std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
Expand Down
2 changes: 2 additions & 0 deletions SYCL/ESIMD/api/simd_subscript_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;

template <class T> bool test(queue &q) {
std::cout << "Testing " << typeid(T).name() << "...\n";
Expand Down Expand Up @@ -102,6 +103,7 @@ int main(int argc, char **argv) {
passed &= test<unsigned int>(q);
passed &= test<float>(q);
passed &= test<half>(q);
passed &= test<bfloat16>(q);

return passed ? 0 : 1;
}
2 changes: 2 additions & 0 deletions SYCL/ESIMD/api/simd_view_subscript_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;

template <class T> class TestID;

Expand Down Expand Up @@ -102,6 +103,7 @@ int main(int argc, char **argv) {
bool passed = true;
passed &= test<int>(q);
passed &= test<half>(q);
passed &= test<bfloat16>(q);

std::cout << (passed ? "Test Passed\n" : "Test FAILED\n");

Expand Down
9 changes: 9 additions & 0 deletions SYCL/ESIMD/api/svm_gather_scatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
using namespace sycl;
using namespace sycl::ext::intel;
using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;

template <typename T, int N> bool test(queue &Q) {
std::cout << " Running " << typeid(T).name() << " test, N=" << N << "...\n";
Expand Down Expand Up @@ -106,6 +107,14 @@ int main(void) {
Pass &= test<half, 8>(Q);
Pass &= test<half, 16>(Q);
Pass &= test<half, 32>(Q);

Pass &= test<bfloat16, 1>(Q);
Pass &= test<bfloat16, 2>(Q);
Pass &= test<bfloat16, 4>(Q);
Pass &= test<bfloat16, 8>(Q);
Pass &= test<bfloat16, 16>(Q);
Pass &= test<bfloat16, 32>(Q);

std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
return Pass ? 0 : 1;
}
29 changes: 19 additions & 10 deletions SYCL/ESIMD/api/unary_ops_heavy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu
// Exclude PVC not to run same test cases twice (via the *_pvc.cpp variant).
// REQUIRES: gpu && !gpu-intel-pvc
// UNSUPPORTED: cuda || hip
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
Expand All @@ -29,6 +30,7 @@

using namespace sycl;
using namespace sycl::ext::intel::esimd;
using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;

template <class T, int VL, class Ops> class TestID;

Expand All @@ -51,7 +53,8 @@ template <class T, int VL, class Ops, template <class, int> class SimdT = simd>
bool test(Ops ops, queue &q) {
using OpClass = esimd_test::UnaryOp;
// Log test case info
std::cout << "Testing T=" << typeid(T).name() << ", VL=" << VL << " ...\n";
std::cout << "Testing T=" << esimd_test::type_name<T>() << ", VL=" << VL
<< " ...\n";
std::cout << "Operations:";
esimd_test::iterate_ops(ops, [=](OpClass op) {
std::cout << " '" << esimd_test::Op2Str(op) << "'";
Expand Down Expand Up @@ -172,14 +175,20 @@ int main(void) {
passed &= test<float, 32>(mod_ops, q);
passed &= test<double, 7>(mod_ops, q);

auto singed_ops = esimd_test::OpSeq<UnOp, UnOp::minus, UnOp::plus>{};
passed &= test<char, 7>(singed_ops, q);
passed &= test<short, 7>(singed_ops, q);
passed &= test<int, 16>(singed_ops, q);
passed &= test<int64_t, 16>(singed_ops, q);
passed &= test<half, 16>(singed_ops, q);
passed &= test<float, 16>(singed_ops, q);
passed &= test<double, 16>(singed_ops, q);
auto signed_ops = esimd_test::OpSeq<UnOp, UnOp::minus, UnOp::plus>{};
passed &= test<char, 7>(signed_ops, q);
passed &= test<short, 7>(signed_ops, q);
passed &= test<int, 16>(signed_ops, q);
passed &= test<int64_t, 16>(signed_ops, q);
passed &= test<half, 16>(signed_ops, q);
passed &= test<float, 16>(signed_ops, q);
passed &= test<double, 16>(signed_ops, q);

#ifdef USE_BF16

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The obvious downside of this approach is the duplication of checks/tests.
I.e. on PVC the lines 178-184 started twice: 1st time as part of unary_ops_heavy.cpp test, and then (the same code + line 189) as part of unary_ops_heavy_pvc.cpp

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, good catch! I will add proper test selection.

// TODO: the rest unary operations are not yet supported for bfloat16 on host.
auto unary_plus_op = esimd_test::OpSeq<UnOp, UnOp::plus>{};
passed &= test<bfloat16, 16>(unary_plus_op, q);
#endif // USE_BF16

auto bit_ops = esimd_test::OpSeq<UnOp, UnOp::bit_not>{};
passed &= test<char, 7>(bit_ops, q);
Expand Down
17 changes: 17 additions & 0 deletions SYCL/ESIMD/api/unary_ops_heavy_pvc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//==--------------- unary_ops_heavy_pvc.cpp - DPC++ ESIMD on-device test --==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: gpu-intel-pvc
// RUN: %clangxx -fsycl %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// Tests various unary operations applied to simd objects.
// PVC variant of the test - adds bfloat16.

#define USE_BF16

#include "unary_ops_heavy.cpp"
29 changes: 26 additions & 3 deletions SYCL/ESIMD/esimd_test_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,16 +438,21 @@ static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul,
BinaryOp::div>
ArithBinaryOps{};

static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul>
ArithBinaryOpsNoDiv{};

static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul,
BinaryOp::div, BinaryOp::rem, BinaryOp::shl,
BinaryOp::shr, BinaryOp::bit_or, BinaryOp::bit_and,
BinaryOp::bit_xor>
IntBinaryOps{};

static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul,
BinaryOp::div, BinaryOp::rem, BinaryOp::bit_or,
BinaryOp::bit_and, BinaryOp::bit_xor>
IntBinaryOpsNoShift{};
BinaryOp::bit_or, BinaryOp::bit_and,
BinaryOp::bit_xor>
IntBinaryOpsNoShiftNoDivRem{};

static constexpr BinaryOpSeq<BinaryOp::div, BinaryOp::rem> IntBinaryOpsDivRem{};

static constexpr OpSeq<CmpOp, CmpOp::lt, CmpOp::lte, CmpOp::eq, CmpOp::ne,
CmpOp::gte, CmpOp::gt>
Expand Down Expand Up @@ -538,4 +543,22 @@ std::unique_ptr<T, USMDeleter> usm_malloc_shared(queue q, int n) {
return std::move(res);
}

template <class T> static const char *type_name();
#define TID(T) \
template <> const char *type_name<T>() { return #T; }
TID(char) // for some reason, 'char' does not match 'int8_t' during
// 'type_name' specialization
TID(int8_t)
TID(uint8_t)
TID(int16_t)
TID(uint16_t)
TID(int32_t)
TID(uint32_t)
TID(int64_t)
TID(uint64_t)
TID(half)
TID(sycl::ext::oneapi::experimental::bfloat16)
TID(float)
TID(double)

} // namespace esimd_test