intel
diff --git a/‎SYCL/Matrix/joint_matrix_bfloat16_col_major.cpp
Lines changed: 0 additions & 170 deletions b/‎SYCL/Matrix/joint_matrix_bfloat16_col_major.cpp
Lines changed: 0 additions & 170 deletions
diff --git a/‎SYCL/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB.cpp
Lines changed: 29 additions & 0 deletions b/‎SYCL/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB.cpp
Lines changed: 29 additions & 0 deletions
diff --git a/‎SYCL/Matrix/joint_matrix_bfloat16_col_majorA.cpp renamed to ‎SYCL/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp
Lines changed: 4 additions & 31 deletions b/‎SYCL/Matrix/joint_matrix_bfloat16_col_majorA.cpp renamed to ‎SYCL/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp
Lines changed: 4 additions & 31 deletions
diff --git a/‎SYCL/Matrix/joint_matrix_bfloat16_rowmajorA_rowmajorB.cpp
Lines changed: 29 additions & 0 deletions b/‎SYCL/Matrix/joint_matrix_bfloat16_rowmajorA_rowmajorB.cpp
Lines changed: 29 additions & 0 deletions
diff --git a/‎SYCL/Matrix/joint_matrix_bfloat16_row_major.cpp renamed to ‎SYCL/Matrix/joint_matrix_bfloat16_rowmajorA_rowmajorB_impl.hpp
Lines changed: 3 additions & 30 deletions b/‎SYCL/Matrix/joint_matrix_bfloat16_row_major.cpp renamed to ‎SYCL/Matrix/joint_matrix_bfloat16_rowmajorA_rowmajorB_impl.hpp
Lines changed: 3 additions & 30 deletions
@@ -0,0 +1,29 @@
+//==-- joint_matrix_bfloat16_colmajorA_colmajorB.cpp  - DPC++ joint_matrix--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// REQUIRES: matrix
+
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// CHECK: passed
+
+// This tests support of col major layout for matrix B which does transpose and
+// then VNNI transform. This is currently only available on AMX
+
+// XFAIL: gpu
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental::matrix;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
+
+#define SG_SZ 16
+
+#include "joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp"
@@ -1,33 +1,7 @@
-//==----- joint_matrix_bfloat16_col_major.cpp  - DPC++ joint_matrix---------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// REQUIRES: matrix
-
-// RUN: %clangxx -fsycl %s -o %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-
-// This tests support of col major layout for matrix B which does transpose and
-// then VNNI transform. This is currently only available on AMX
-
-// XFAIL: gpu
-
-#include <iostream>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-using namespace sycl::ext::oneapi::experimental::matrix;
-using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
-
-#define SG_SZ 8
-
 #define TM 8
-#define TN 8
+#define TN SG_SZ
 #define TK 16
+#define BF16_EPSILON 0.00781250
 
 template <typename T, size_t NUM_ROWS, size_t NUM_COLS> struct big_matrix {
 private:
@@ -78,7 +52,7 @@ void matrix_multiply(big_matrix<T1, M, N> &C, big_matrix<T2, M, K> &A,
                              N, matrix_layout::row_major);
            for (int k = 0; k < K / TK; k += 1) { //
              joint_matrix_load(
-                 sg, sub_a, accA.get_pointer() + ( k* TK) * M + sg_startx * TM,
+                 sg, sub_a, accA.get_pointer() + (k * TK) * M + sg_startx * TM,
                  M, matrix_layout::col_major);
              joint_matrix_load(sg, sub_b,
                                accB.get_pointer() +
@@ -158,9 +132,8 @@ int main() {
   bool res = true;
   for (int i = 0; i < MATRIX_M; i++) {
     for (int j = 0; j < MATRIX_N; j++) {
-      if (C[i][j] != D[i][j]) {
+      if ((fabs(C[i][j]) - fabs(D[i][j])) > BF16_EPSILON)
         res = false;
-      }
     }
   }
   if (res)
 
@@ -0,0 +1,29 @@
+//==--joint_matrix_bfloat16_rowmajorA_rowmajorB.cpp  - DPC++ joint_matrix---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// REQUIRES: matrix
+
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// CHECK: passed
+
+// This tests support of row major layout for matrix B which does automatic VNNI
+// transform. This is currently only available on AMX
+
+// XFAIL: gpu
+
+#include <iostream>
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+using namespace sycl::ext::oneapi::experimental::matrix;
+using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
+
+#define SG_SZ 16
+
+#include "joint_matrix_bfloat16_rowmajorA_rowmajorB_impl.hpp"
@@ -1,33 +1,7 @@
-//==-------joint_matrix_bfloat16_row_major.cpp  - DPC++ joint_matrix--------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// REQUIRES: matrix
-
-// RUN: %clangxx -fsycl %s -o %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-
-// This tests support of row major layout for matrix B which does automatic VNNI
-// transform. This is currently only available on AMX
-
-// XFAIL: gpu
-
-#include <iostream>
-#include <sycl/sycl.hpp>
-
-using namespace sycl;
-using namespace sycl::ext::oneapi::experimental::matrix;
-using bfloat16 = sycl::ext::oneapi::experimental::bfloat16;
-
-#define SG_SZ 8
-
 #define TM 8
-#define TN 8
+#define TN SG_SZ
 #define TK 16
+#define BF16_EPSILON 0.00781250
 
 template <typename T, size_t NUM_ROWS, size_t NUM_COLS> struct big_matrix {
 private:
@@ -158,9 +132,8 @@ int main() {
   bool res = true;
   for (int i = 0; i < MATRIX_M; i++) {
     for (int j = 0; j < MATRIX_N; j++) {
-      if (C[i][j] != D[i][j]) {
+      if ((fabs(C[i][j]) - fabs(D[i][j])) > BF16_EPSILON)
         res = false;
-      }
     }
   }
   if (res)