[SYCL] Fix CUDA tests using bfloat16 (#1421)

steffenlarsen · web-flow · commit 4c2dc3361837 · 2022-11-29T22:07:59.000-08:00
* [SYCL] Fix CUDA tests using bfloat16
* Add missing using in element_wise_wi_marray_legacy

Signed-off-by: Larsen, Steffen &lt;steffen.larsen@intel.com&gt;
diff --git a/SYCL/BFloat16/bfloat16_builtins.cpp b/SYCL/BFloat16/bfloat16_builtins.cpp
@@ -13,6 +13,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::oneapi;
+using namespace sycl::ext::oneapi::experimental;
 
 constexpr int N = 60; // divisible by all tested array sizes
 constexpr float bf16_eps = 0.00390625;
diff --git a/SYCL/Matrix/element_wise_all_ops_bf16.cpp b/SYCL/Matrix/element_wise_all_ops_bf16.cpp
@@ -17,6 +17,7 @@
 
 using namespace sycl;
 using namespace sycl::ext::intel;
+using namespace sycl::ext::oneapi;
 using namespace sycl::ext::oneapi::experimental::matrix;
 
 #define SG_SZ 16
diff --git a/SYCL/Matrix/element_wise_wi_marray_legacy.cpp b/SYCL/Matrix/element_wise_wi_marray_legacy.cpp
@@ -13,6 +13,7 @@
 #include <sycl/sycl.hpp>
 
 using namespace sycl;
+using namespace sycl::ext::oneapi;
 using namespace sycl::ext::oneapi::experimental;
 using namespace sycl::ext::oneapi::experimental::matrix;
 
diff --git a/SYCL/Matrix/joint_matrix_tensorcores_legacy.cpp b/SYCL/Matrix/joint_matrix_tensorcores_legacy.cpp
@@ -66,13 +66,8 @@ T2 matrix_ref_mn(const int &m, const int &n, T1 *A, T1 *B, T2 *C) {
   if constexpr (std::is_same<T1, uint16_t>::value) {
     for (int k = 0; k < Big_K; k++)
       res += make_fp32(A[m * Big_K + k]) * make_fp32(B[k * Big_N + n]);
-  } else if constexpr (std::is_same<T1, bfloat16>::value) {
-    for (int k = 0; k < Big_K; k++)
-      res +=
-          make_fp32(A[m * Big_K + k].raw()) * make_fp32(B[k * Big_N + n].raw());
   } else {
     for (int k = 0; k < Big_K; k++)
-
       res +=
           static_cast<T2>(A[m * Big_K + k]) * static_cast<T2>(B[k * Big_N + n]);
   }