intel · kbobrovs · Sep 4, 2022 · Aug 26, 2022 · Sep 1, 2022 · Sep 1, 2022
@@ -1,11 +1,12 @@
-//==--------------- bin_un_cmp_ops_heavy.cpp  - DPC++ ESIMD on-device test -==//
+//==-------------- bin_and_cmp_ops_heavy.cpp  - DPC++ ESIMD on-device test -==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu
+// Exclude PVC not to run same test cases twice (via the *_pvc.cpp variant).
+// REQUIRES: gpu && !gpu-intel-pvc
 // UNSUPPORTED: cuda || hip
 // RUN: %clangxx -fsycl %s -o %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -29,6 +30,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::intel::esimd;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
 
 template <class T1, class T2, int VL, class OpClass, class Ops> class TestID;
 
@@ -68,9 +70,11 @@ template <class T1, class T2, int VL, class OpClass,
           template <class, class, class> class VerifyF,
           template <class, class, class> class InitF, class Ops>
 bool test(Ops ops, queue &q, comp_t<T1, T2, OpClass> epsilon = 0) {
+  using T = comp_t<T1, T2, OpClass>;
   // Log test case info
-  std::cout << "Testing T1=" << typeid(T1).name() << " T2=" << typeid(T2).name()
-            << ", VL=" << VL << " ...\n";
+  std::cout << "Testing T1=" << esimd_test::type_name<T1>()
+            << " T2=" << esimd_test::type_name<T2>() << ", VL=" << VL
+            << " comp type: " << esimd_test::type_name<T>() << "...\n";
   std::cout << "Operations:";
   esimd_test::iterate_ops(ops, [=](OpClass op) {
     std::cout << " '" << esimd_test::Op2Str(op) << "'";
@@ -83,7 +87,6 @@ bool test(Ops ops, queue &q, comp_t<T1, T2, OpClass> epsilon = 0) {
   T2 *B = sycl::malloc_shared<T2>(Size, q);
   constexpr int NumOps = (int)Ops::size;
   int CSize = NumOps * Size;
-  using T = comp_t<T1, T2, OpClass>;
   // Result array. For each pair of A[i] and B[i] elements it reserves NumOps
   // elements to store result of all operations under test applied to the A[i]
   // and B[i]
@@ -181,19 +184,19 @@ template <class T1, class T2, class OpClass> struct verify_strict {
   bool operator()(T res, T gold, OpClass op) { return res == gold; }
 };
 
-#define EQ(x, y, epsilon)                                                      \
-  ((x) > (y) ? (x) - (y) <= epsilon : (y) - (x) <= epsilon)
+#define EQ(x, gold, epsilon)                                                   \
+  ((x == gold) || (std::abs((double)(x - gold) / (double)gold) <= epsilon))
 
-template <class T1, class T2, class OpClass> struct verify_epsilon {
+template <class T1, class T2, class OpClass, bool AllOps = false>
+struct verify_epsilon {
   using T = comp_t<T1, T2, OpClass>;
-  T epsilon;
-  verify_epsilon(T epsilon) : epsilon(epsilon) {}
+  double epsilon;
+  verify_epsilon(double epsilon) : epsilon(epsilon) {}
 
   bool operator()(T res, T gold, OpClass op) {
-    if constexpr (std::is_same_v<OpClass, esimd_test::BinaryOp>) {
-      if (op == esimd_test::BinaryOp::div) {
-        return EQ(res, gold, epsilon);
-      }
+    if (AllOps || ((std::is_same_v<OpClass, esimd_test::BinaryOp>)&&(
+                      op == esimd_test::BinaryOp::div))) {
+      return EQ(res, gold, epsilon);
     }
     return res == gold;
   }
@@ -245,6 +248,8 @@ template <class T1, class T2, class OpClass> struct init_for_shift {
 // shortcuts for less clutter
 template <class T1, class T2, class C> using VSf = verify_strict<T1, T2, C>;
 template <class T1, class T2, class C> using VEf = verify_epsilon<T1, T2, C>;
+template <class T1, class T2, class C>
+using VEfa = verify_epsilon<T1, T2, C, true>;
 template <class T1, class T2, class C> using VNf = verify_n<T1, T2, C>;
 template <class T1, class T2, class C> using IDf = init_default<T1, T2, C>;
 template <class T1, class T2, class C> using ISf = init_for_shift<T1, T2, C>;
@@ -257,7 +262,7 @@ int main(void) {
   bool passed = true;
   using BinOp = esimd_test::BinaryOp;
 
-  auto arith_ops = esimd_test::ArithBinaryOps;
+  auto arith_ops = esimd_test::ArithBinaryOpsNoDiv;
   passed &= test<unsigned char, int, 1, BinOp, VSf, IDf>(arith_ops, q);
   passed &= test<char, float, 7, BinOp, VEf, IDf>(arith_ops, q, 0.000001f);
   passed &= test<short, double, 7, BinOp, VEf, IDf>(arith_ops, q, 1e-15);
@@ -266,16 +271,49 @@ int main(void) {
   passed &= test<half, unsigned int, 32, BinOp, VSf, IDf>(arith_ops, q, 1);
   passed &= test<double, half, 7, BinOp, VSf, IDf>(arith_ops, q);
   passed &= test<short, uint64_t, 7, BinOp, VSf, IDf>(arith_ops, q);
-
-  auto int_ops =
-      esimd_test::IntBinaryOpsNoShift; // different data needed for shift
+#ifdef USE_BF16
+  passed &= test<bfloat16, int, 8, BinOp, VSf, IDf>(arith_ops, q);
+  passed &= test<half, bfloat16, 7, BinOp, VEfa, IDf>(arith_ops, q, 0.03);
+#endif // USE_BF16
+
+  // Test division separately, as error probability is higher.
+  auto div_op = esimd_test::BinaryOpSeq<BinOp::div>{};
+  passed &= test<unsigned char, int, 1, BinOp, VSf, IDf>(div_op, q);
+  passed &= test<char, float, 7, BinOp, VEf, IDf>(div_op, q, 0.000001f);
+#ifndef WA_BUG
+  passed &= test<short, double, 7, BinOp, VSf, IDf>(div_op, q);
+#endif // WA_BUG
+  passed &= test<float, float, 32, BinOp, VEf, IDf>(div_op, q, 0.000001f);
+  passed &= test<half, char, 1, BinOp, verify_n, IDf>(div_op, q, 1);
+  passed &= test<half, unsigned int, 32, BinOp, VSf, IDf>(div_op, q, 1);
+#ifndef WA_BUG
+  passed &= test<double, half, 7, BinOp, VSf, IDf>(div_op, q);
+#endif // WA_BUG
+  passed &= test<short, uint64_t, 7, BinOp, VSf, IDf>(div_op, q);
+#ifdef USE_BF16
+  passed &= test<bfloat16, short, 8, BinOp, VSf, IDf>(div_op, q);
+  passed &= test<half, bfloat16, 7, BinOp, VEfa, IDf>(div_op, q, 0.03);
+#endif // USE_BF16
+
+  auto int_ops = esimd_test::IntBinaryOpsNoShiftNoDivRem;
   passed &= test<unsigned char, unsigned int, 1, BinOp, VSf, IDf>(int_ops, q);
   passed &= test<char, uint64_t, 1, BinOp, VSf, IDf>(int_ops, q);
   passed &= test<uint64_t, char, 32, BinOp, VSf, IDf>(int_ops, q);
   passed &= test<int, short, 1, BinOp, VSf, IDf>(int_ops, q);
   passed &= test<short, int, 8, BinOp, VSf, IDf>(int_ops, q);
   passed &= test<int, int, 7, BinOp, VSf, IDf>(int_ops, q);
 
+  auto int_div_ops = esimd_test::IntBinaryOpsDivRem;
+  passed &=
+      test<unsigned char, unsigned int, 1, BinOp, VSf, IDf>(int_div_ops, q);
+#ifndef WA_BUG
+  passed &= test<char, uint64_t, 1, BinOp, VSf, IDf>(int_div_ops, q);
+#endif // WA_BUG
+  passed &= test<uint64_t, char, 32, BinOp, VSf, IDf>(int_div_ops, q);
+  passed &= test<int, short, 1, BinOp, VSf, IDf>(int_div_ops, q);
+  passed &= test<short, int, 8, BinOp, VSf, IDf>(int_div_ops, q);
+  passed &= test<int, int, 7, BinOp, VSf, IDf>(int_div_ops, q);
+
   auto sh_ops = esimd_test::BinaryOpSeq<BinOp::shl, BinOp::shr>{};
   passed &= test<unsigned char, unsigned int, 1, BinOp, VSf, ISf>(sh_ops, q);
   passed &= test<char, int64_t, 1, BinOp, VSf, ISf>(sh_ops, q);
@@ -294,6 +332,10 @@ int main(void) {
   passed &= test<half, unsigned int, 32, CmpOp, VSf, IDf>(cmp_ops, q, 1);
   passed &= test<double, half, 7, CmpOp, VSf, IDf>(cmp_ops, q);
   passed &= test<short, uint64_t, 7, CmpOp, VSf, IDf>(cmp_ops, q);
+#ifdef USE_BF16
+  passed &= test<bfloat16, int, 32, CmpOp, VSf, IDf>(cmp_ops, q);
+  passed &= test<half, bfloat16, 7, CmpOp, VSf, IDf>(cmp_ops, q);
+#endif // USE_BF16
 
   std::cout << (passed ? "Test PASSED\n" : "Test FAILED\n");
   return passed ? 0 : 1;

@@ -0,0 +1,20 @@
+//==---------- bin_and_cmp_ops_heavy_pvc.cpp  - DPC++ ESIMD on-device test -==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// REQUIRES: gpu-intel-pvc
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+// Tests various binary operations applied to simd objects.
+// PVC variant of the test - adds bfloat16.
+
+// TODO Re-enable cases disabled via WA_BUG.
+
+#define USE_BF16
+#define WA_BUG
+
+#include "bin_and_cmp_ops_heavy.cpp"
@@ -20,6 +20,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::intel::esimd;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
 
 template <class T> struct char_to_int {
   using type = typename std::conditional<
@@ -178,6 +179,7 @@ int main(int argc, char **argv) {
   bool passed = true;
 
   passed &= test<half>(q);
+  passed &= test<bfloat16>(q);
   passed &= test<unsigned char>(q);
   passed &= test<short>(q);
   passed &= test<unsigned short>(q);

@@ -38,6 +38,7 @@
 using namespace sycl;
 using namespace sycl::ext::intel;
 using namespace sycl::ext::intel::esimd;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
 
 template <typename T, int N, typename Flags>
 bool testUSM(queue &Q, T *Src, T *Dst, unsigned Off, Flags) {
@@ -250,9 +251,11 @@ int main(void) {
 #else
   Pass &= testUSM<uint16_t>(Q);
   Pass &= testUSM<float>(Q);
+  Pass &= testUSM<bfloat16>(Q);
 
   Pass &= testAcc<int16_t>(Q);
   Pass &= testAcc<float>(Q);
+  Pass &= testAcc<bfloat16>(Q);
 #endif
 
   std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");

@@ -24,6 +24,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::intel::esimd;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
 
 template <class T> bool test(queue &q) {
   std::cout << "Testing " << typeid(T).name() << "...\n";
@@ -102,6 +103,7 @@ int main(int argc, char **argv) {
   passed &= test<unsigned int>(q);
   passed &= test<float>(q);
   passed &= test<half>(q);
+  passed &= test<bfloat16>(q);
 
   return passed ? 0 : 1;
 }
@@ -25,6 +25,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::intel::esimd;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
 
 template <class T> class TestID;
 
@@ -102,6 +103,7 @@ int main(int argc, char **argv) {
   bool passed = true;
   passed &= test<int>(q);
   passed &= test<half>(q);
+  passed &= test<bfloat16>(q);
 
   std::cout << (passed ? "Test Passed\n" : "Test FAILED\n");
 

@@ -25,6 +25,7 @@
 using namespace sycl;
 using namespace sycl::ext::intel;
 using namespace sycl::ext::intel::esimd;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
 
 template <typename T, int N> bool test(queue &Q) {
   std::cout << "  Running " << typeid(T).name() << " test, N=" << N << "...\n";
@@ -106,6 +107,14 @@ int main(void) {
   Pass &= test<half, 8>(Q);
   Pass &= test<half, 16>(Q);
   Pass &= test<half, 32>(Q);
+
+  Pass &= test<bfloat16, 1>(Q);
+  Pass &= test<bfloat16, 2>(Q);
+  Pass &= test<bfloat16, 4>(Q);
+  Pass &= test<bfloat16, 8>(Q);
+  Pass &= test<bfloat16, 16>(Q);
+  Pass &= test<bfloat16, 32>(Q);
+
   std::cout << (Pass ? "Test Passed\n" : "Test FAILED\n");
   return Pass ? 0 : 1;
 }
@@ -5,7 +5,8 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// REQUIRES: gpu
+// Exclude PVC not to run same test cases twice (via the *_pvc.cpp variant).
+// REQUIRES: gpu && !gpu-intel-pvc
 // UNSUPPORTED: cuda || hip
 // RUN: %clangxx -fsycl %s -o %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -29,6 +30,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::intel::esimd;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
 
 template <class T, int VL, class Ops> class TestID;
 
@@ -51,7 +53,8 @@ template <class T, int VL, class Ops, template <class, int> class SimdT = simd>
 bool test(Ops ops, queue &q) {
   using OpClass = esimd_test::UnaryOp;
   // Log test case info
-  std::cout << "Testing T=" << typeid(T).name() << ", VL=" << VL << " ...\n";
+  std::cout << "Testing T=" << esimd_test::type_name<T>() << ", VL=" << VL
+            << " ...\n";
   std::cout << "Operations:";
   esimd_test::iterate_ops(ops, [=](OpClass op) {
     std::cout << " '" << esimd_test::Op2Str(op) << "'";
@@ -172,14 +175,20 @@ int main(void) {
   passed &= test<float, 32>(mod_ops, q);
   passed &= test<double, 7>(mod_ops, q);
 
-  auto singed_ops = esimd_test::OpSeq<UnOp, UnOp::minus, UnOp::plus>{};
-  passed &= test<char, 7>(singed_ops, q);
-  passed &= test<short, 7>(singed_ops, q);
-  passed &= test<int, 16>(singed_ops, q);
-  passed &= test<int64_t, 16>(singed_ops, q);
-  passed &= test<half, 16>(singed_ops, q);
-  passed &= test<float, 16>(singed_ops, q);
-  passed &= test<double, 16>(singed_ops, q);
+  auto signed_ops = esimd_test::OpSeq<UnOp, UnOp::minus, UnOp::plus>{};
+  passed &= test<char, 7>(signed_ops, q);
+  passed &= test<short, 7>(signed_ops, q);
+  passed &= test<int, 16>(signed_ops, q);
+  passed &= test<int64_t, 16>(signed_ops, q);
+  passed &= test<half, 16>(signed_ops, q);
+  passed &= test<float, 16>(signed_ops, q);
+  passed &= test<double, 16>(signed_ops, q);
+
+#ifdef USE_BF16
+  // TODO: the rest unary operations are not yet supported for bfloat16 on host.
+  auto unary_plus_op = esimd_test::OpSeq<UnOp, UnOp::plus>{};
+  passed &= test<bfloat16, 16>(unary_plus_op, q);
+#endif // USE_BF16
 
   auto bit_ops = esimd_test::OpSeq<UnOp, UnOp::bit_not>{};
   passed &= test<char, 7>(bit_ops, q);

@@ -0,0 +1,17 @@
+//==--------------- unary_ops_heavy_pvc.cpp  - DPC++ ESIMD on-device test --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// REQUIRES: gpu-intel-pvc
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+// Tests various unary operations applied to simd objects.
+// PVC variant of the test - adds bfloat16.
+
+#define USE_BF16
+
+#include "unary_ops_heavy.cpp"
@@ -438,16 +438,21 @@ static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul,
                              BinaryOp::div>
     ArithBinaryOps{};
 
+static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul>
+    ArithBinaryOpsNoDiv{};
+
 static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul,
                              BinaryOp::div, BinaryOp::rem, BinaryOp::shl,
                              BinaryOp::shr, BinaryOp::bit_or, BinaryOp::bit_and,
                              BinaryOp::bit_xor>
     IntBinaryOps{};
 
 static constexpr BinaryOpSeq<BinaryOp::add, BinaryOp::sub, BinaryOp::mul,
-                             BinaryOp::div, BinaryOp::rem, BinaryOp::bit_or,
-                             BinaryOp::bit_and, BinaryOp::bit_xor>
-    IntBinaryOpsNoShift{};
+                             BinaryOp::bit_or, BinaryOp::bit_and,
+                             BinaryOp::bit_xor>
+    IntBinaryOpsNoShiftNoDivRem{};
+
+static constexpr BinaryOpSeq<BinaryOp::div, BinaryOp::rem> IntBinaryOpsDivRem{};
 
 static constexpr OpSeq<CmpOp, CmpOp::lt, CmpOp::lte, CmpOp::eq, CmpOp::ne,
                        CmpOp::gte, CmpOp::gt>
@@ -538,4 +543,22 @@ std::unique_ptr<T, USMDeleter> usm_malloc_shared(queue q, int n) {
   return std::move(res);
 }
 
+template <class T> static const char *type_name();
+#define TID(T)                                                                 \
+  template <> const char *type_name<T>() { return #T; }
+TID(char) // for some reason, 'char' does not match 'int8_t' during
+          // 'type_name' specialization
+TID(int8_t)
+TID(uint8_t)
+TID(int16_t)
+TID(uint16_t)
+TID(int32_t)
+TID(uint32_t)
+TID(int64_t)
+TID(uint64_t)
+TID(half)
+TID(sycl::ext::oneapi::experimental::bfloat16)
+TID(float)
+TID(double)
+
 } // namespace esimd_test