intel · pvchupin · Oct 11, 2022 · Oct 4, 2022 · Oct 5, 2022 · Oct 5, 2022
@@ -22,11 +22,20 @@
 #endif
 
 #ifdef __SYCL_DEVICE_ONLY__
+
+#if (SYCL_EXT_ONEAPI_MATRIX_VERSION > 1)
+#define JOINT_MATRIX_INTEL(T, R, C, L, S, U)                                   \
+  __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U>
+#else
+#define JOINT_MATRIX_INTEL(T, R, C, L, S, U)                                   \
+  __spv::__spirv_JointMatrixINTEL<T, R, C, L, S>
+#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
+
 template <typename T, std::size_t R, std::size_t C,
           __spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
           __spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
-extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *
+extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T, R, C, L, S, U) *
 __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
                              __spv::MatrixLayout Layout = L,
                              __spv::Scope::Flag Sc = S, int MemOperand = 0);
@@ -36,7 +45,7 @@ template <typename T, std::size_t R, std::size_t C,
           __spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
 extern SYCL_EXTERNAL void __spirv_JointMatrixStoreINTEL(
-    T *Ptr, __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *Object,
+    T *Ptr, JOINT_MATRIX_INTEL(T, R, C, L, S, U) *Object,
     std::size_t Stride, __spv::MatrixLayout Layout = L,
     __spv::Scope::Flag Sc = S, int MemOperand = 0);
 
@@ -48,11 +57,11 @@ template <typename T1, typename T2, std::size_t M, std::size_t K, std::size_t N,
           __spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
           __spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
-extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T2, M, N, LC, S, UC> *
+extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T2, M, N, LC, S, UC) *
 __spirv_JointMatrixMadINTEL(
-    __spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
-    __spv::__spirv_JointMatrixINTEL<T1, K, N, LB, S, UB> *B,
-    __spv::__spirv_JointMatrixINTEL<T2, M, N, LC, S, UC> *C,
+    JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
+    JOINT_MATRIX_INTEL(T1, K, N, LB, S, UB) *B,
+    JOINT_MATRIX_INTEL(T2, M, N, LC, S, UC) *C,
     __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
 
 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
@@ -63,11 +72,11 @@ template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
           __spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
           __spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
-extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *
+extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T2, M, N, LC, S, UC) *
 __spirv_JointMatrixUUMadINTEL(
-    __spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
-    __spv::__spirv_JointMatrixINTEL<T2, K, N, LB, S, UB> *B,
-    __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *C,
+    JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
+    JOINT_MATRIX_INTEL(T2, K, N, LB, S, UB) *B,
+    JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *C,
     __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
 
 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
@@ -78,11 +87,11 @@ template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
           __spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
           __spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
-extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *
+extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *
 __spirv_JointMatrixUSMadINTEL(
-    __spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
-    __spv::__spirv_JointMatrixINTEL<T2, K, N, LB, S, UB> *B,
-    __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *C,
+    JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
+    JOINT_MATRIX_INTEL(T2, K, N, LB, S, UB) *B,
+    JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *C,
     __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
 
 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
@@ -93,38 +102,42 @@ template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
           __spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
           __spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
-extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *
+extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *
 __spirv_JointMatrixSUMadINTEL(
-    __spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
-    __spv::__spirv_JointMatrixINTEL<T2, K, N, LB, S, UB> *B,
-    __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *C,
+    JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
+    JOINT_MATRIX_INTEL(T2, K, N, LB, S, UB) *B,
+    JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *C,
     __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
 
 template <typename T, std::size_t R, std::size_t C,
           __spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
           __spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
-extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *
+extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T, R, C, L, S, U) *
 __spirv_CompositeConstruct(const T v);
 
-template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
-          __spv::MatrixLayout L,
+template <typename T, std::size_t R, std::size_t C,
+          __spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
+          __spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
 extern SYCL_EXTERNAL size_t __spirv_JointMatrixWorkItemLengthINTEL(
-    __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *);
+    JOINT_MATRIX_INTEL(T, R, C, L, S, U) *);
 
-template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
-          __spv::MatrixLayout L,
+template <typename T, std::size_t R, std::size_t C,
+          __spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
+          __spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
 extern SYCL_EXTERNAL T __spirv_VectorExtractDynamic(
-    __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *, size_t i);
+    JOINT_MATRIX_INTEL(T, R, C, L, S, U) *, size_t i);
 
-template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
-          __spv::MatrixLayout L,
+template <typename T, std::size_t R, std::size_t C,
+         __spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
+          __spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
           __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
-extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *
-__spirv_VectorInsertDynamic(__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *,
+extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T, R, C, L, S, U) *
+__spirv_VectorInsertDynamic(JOINT_MATRIX_INTEL(T, R, C, L, S, U) *,
                             T val, size_t i);
+#undef JOINT_MATRIX_INTEL
 
 #ifndef __SPIRV_BUILTIN_DECLARATIONS__
 #error                                                                         \

@@ -137,6 +137,7 @@ enum class MatrixUse : uint32_t {
 // information to SPIRV translator.
 // The long term solution would be to introduce a matrix type in Clang and use
 // it instead of this member.
+#if (SYCL_EXT_ONEAPI_MATRIX_VERSION > 1)
 template <typename T, std::size_t R, std::size_t C, MatrixLayout L,
           Scope::Flag S = Scope::Flag::Subgroup,
           MatrixUse U = MatrixUse::Unnecessary>
@@ -145,6 +146,14 @@ struct __spirv_JointMatrixINTEL {
   [R][C][static_cast<size_t>(L) + 1][static_cast<size_t>(S) + 1]
      [static_cast<size_t>(U) + 1];
 };
+#else
+template <typename T, std::size_t R, std::size_t C, MatrixLayout L,
+          Scope::Flag S = Scope::Flag::Subgroup>
+struct __spirv_JointMatrixINTEL {
+  T(*Value)
+  [R][C][static_cast<size_t>(L) + 1][static_cast<size_t>(S) + 1];
+};
+#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
 
 } // namespace __spv
 

@@ -73,8 +73,7 @@ struct joint_matrix {
 public:
   __spv::__spirv_JointMatrixINTEL<
       T, NumRows, NumCols, spv_matrix_layout_traits<Layout>::value,
-      spv_scope_traits<Group>::value,
-      spv_matrix_use_traits<matrix_use::unnecessary>::value> *spvm;
+      spv_scope_traits<Group>::value> *spvm;
   joint_matrix(Group sg) {
 #ifndef __SYCL_DEVICE_ONLY__
     (void)sg;

@@ -712,7 +712,7 @@ void joint_matrix_load(
   std::ignore = src;
   std::ignore = stride;
   throw runtime_error(
-      "When using SYCL_EXT_ONEAPI_MATRIX=3 joint_matrix_load is "
+      "When using SYCL_EXT_ONEAPI_MATRIX_VERSION=3 joint_matrix_load is "
       "only supported by CUDA devices",
       PI_ERROR_INVALID_DEVICE);
 #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
@@ -734,7 +734,7 @@ void joint_matrix_store(Group sg,
   std::ignore = dst;
   std::ignore = stride;
   throw runtime_error(
-      "When using SYCL_EXT_ONEAPI_MATRIX=3 joint_matrix_store is "
+      "When using SYCL_EXT_ONEAPI_MATRIX_VERSION=3 joint_matrix_store is "
       "only supported by CUDA devices",
       PI_ERROR_INVALID_DEVICE);
 #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
@@ -757,9 +757,10 @@ joint_matrix_mad(
   std::ignore = A;
   std::ignore = B;
   std::ignore = C;
-  throw runtime_error("When using SYCL_EXT_ONEAPI_MATRIX=3 joint_matrix_mad is "
-                      "only supported by CUDA devices",
-                      PI_ERROR_INVALID_DEVICE);
+  throw runtime_error(
+      "When using SYCL_EXT_ONEAPI_MATRIX_VERSION=3 joint_matrix_mad is "
+      "only supported by CUDA devices",
+      PI_ERROR_INVALID_DEVICE);
 #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
 }
 

@@ -14,18 +14,18 @@
 
 #pragma once
 
-#include <sycl/feature_test.hpp>
+#ifndef SYCL_EXT_ONEAPI_MATRIX_VERSION
+#define SYCL_EXT_ONEAPI_MATRIX_VERSION 1
+#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
 
-// the default is matrix-jit-use but existing tests in llvm-test-suite won't
-// fail because we have the "unnecessary" use value
-#if (SYCL_EXT_ONEAPI_MATRIX == 1)
+#if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 1)
 #include <sycl/ext/oneapi/matrix/matrix-jit.hpp>
 #include <sycl/ext/oneapi/matrix/static-query.hpp>
-#endif
-#if (SYCL_EXT_ONEAPI_MATRIX == 2)
+#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
+#if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 2)
 #include <sycl/ext/oneapi/matrix/matrix-jit-use.hpp>
 #include <sycl/ext/oneapi/matrix/static-query-use.hpp>
-#endif
-#if (SYCL_EXT_ONEAPI_MATRIX == 3)
+#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
+#if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 3)
 #include <sycl/ext/oneapi/matrix/matrix-tensorcore.hpp>
-#endif
+#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
@@ -32,14 +32,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
 #define SYCL_EXT_INTEL_DEVICE_INFO 3
 #define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1
 #define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1
-// As for SYCL_EXT_ONEAPI_MATRIX:
-// 1- provides AOT initial implementation for AMX for the experimental matrix
-// extension
-// 2- provides JIT implementation (target agnostic) for the
-// experimental matrix extension
-#ifndef SYCL_EXT_ONEAPI_MATRIX
-#define SYCL_EXT_ONEAPI_MATRIX 2
-#endif
+#define SYCL_EXT_ONEAPI_MATRIX 1
 #define SYCL_EXT_ONEAPI_ASSERT 1
 #define SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS 1
 #define SYCL_EXT_ONEAPI_DISCARD_QUEUE_EVENTS 1

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 // IMPORTANT: before updating sm version support beyond sm_86 read the following
 // NOTE!

@@ -1,6 +1,6 @@
 // REQUIRES: cuda
 
-// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
+// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
 
 #include <sycl/sycl.hpp>
 

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
+// RUN: %clangxx -fsycl -O2 %s -o %t.out
 #include <iostream>
 #include <sycl/sycl.hpp>
 

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
+// RUN: %clangxx -fsycl -O2 %s -o %t.out
 #include <iostream>
 #include <sycl/sycl.hpp>
 

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -O2 %s -o %t.out
+// RUN: %clangxx -fsycl -O2 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=2 %s -o %t.out
 #include <iostream>
 #include <sycl/sycl.hpp>
 

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
+// RUN: %clangxx -fsycl -O2 %s -o %t.out
 #include <iostream>
 #include <sycl/sycl.hpp>
 

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
+// RUN: %clangxx -fsycl -O2 %s -o %t.out
 
 #include <iostream>
 #include <sycl/sycl.hpp>

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
+// RUN: %clangxx -fsycl -O2 %s -o %t.out
 #include <iostream>
 #include <sycl/sycl.hpp>
 

@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl -fsycl-device-only -O2 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clangxx -fsycl -fsycl-device-only -DSYCL_EXT_ONEAPI_MATRIX_VERSION=2 -O2 -S -emit-llvm -o - %s | FileCheck %s
 
 // CHECK-DAG: %spirv.JointMatrixINTEL._char_12_48_4_3_0 = type { [12 x [48 x [5 x [4 x [1 x i8]]]]] addrspace(4)* }
 // CHECK-DAG: %spirv.JointMatrixINTEL._int_12_12_4_3_2 = type { [12 x [12 x [5 x [4 x [3 x i32]]]]] addrspace(4)* }

@@ -1,8 +1,8 @@
-// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX=1 -fsycl -fsycl-device-only -O2 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clangxx -fsycl -fsycl-device-only -O2 -S -emit-llvm -o - %s | FileCheck %s
 
-// CHECK-DAG: %spirv.JointMatrixINTEL._char_12_48_0_3_3 = type { [12 x [48 x [1 x [4 x [4 x i8]]]]] addrspace(4)* }
-// CHECK-DAG: %spirv.JointMatrixINTEL._int_12_12_0_3_3 = type { [12 x [12 x [1 x [4 x [4 x i32]]]]] addrspace(4)* }
-// CHECK-DAG: %spirv.JointMatrixINTEL._char_48_12_3_3_3 = type { [48 x [12 x [4 x [4 x [4 x i8]]]]] addrspace(4)* }
+// CHECK-DAG: %spirv.JointMatrixINTEL._char_12_48_0_3 = type { [12 x [48 x [1 x [4 x i8]]]] addrspace(4)* }
+// CHECK-DAG: %spirv.JointMatrixINTEL._int_12_12_0_3 = type { [12 x [12 x [1 x [4 x i32]]]] addrspace(4)* }
+// CHECK-DAG: %spirv.JointMatrixINTEL._char_48_12_3_3 = type { [48 x [12 x [4 x [4 x i8]]]] addrspace(4)* }
 
 #include <iostream>
 #include <sycl/sycl.hpp>

@@ -1,4 +1,4 @@
-// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX=1 -fsycl -o query %s
+// RUN: %clangxx -fsycl -o query %s
 #include <iostream>
 #include <sycl/sycl.hpp>