Skip to content

[SYCL][Matrix] Fix __spirv_JointMatrixINTEL signature #6957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 42 additions & 29 deletions sycl/include/CL/__spirv/spirv_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,20 @@
#endif

#ifdef __SYCL_DEVICE_ONLY__

#if (SYCL_EXT_ONEAPI_MATRIX_VERSION > 1)
#define JOINT_MATRIX_INTEL(T, R, C, L, S, U) \
__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U>
#else
#define JOINT_MATRIX_INTEL(T, R, C, L, S, U) \
__spv::__spirv_JointMatrixINTEL<T, R, C, L, S>
#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION

template <typename T, std::size_t R, std::size_t C,
__spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
__spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *
extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T, R, C, L, S, U) *
__spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
__spv::MatrixLayout Layout = L,
__spv::Scope::Flag Sc = S, int MemOperand = 0);
Expand All @@ -36,7 +45,7 @@ template <typename T, std::size_t R, std::size_t C,
__spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL void __spirv_JointMatrixStoreINTEL(
T *Ptr, __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *Object,
T *Ptr, JOINT_MATRIX_INTEL(T, R, C, L, S, U) *Object,
std::size_t Stride, __spv::MatrixLayout Layout = L,
__spv::Scope::Flag Sc = S, int MemOperand = 0);

Expand All @@ -48,11 +57,11 @@ template <typename T1, typename T2, std::size_t M, std::size_t K, std::size_t N,
__spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
__spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T2, M, N, LC, S, UC> *
extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T2, M, N, LC, S, UC) *
__spirv_JointMatrixMadINTEL(
__spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
__spv::__spirv_JointMatrixINTEL<T1, K, N, LB, S, UB> *B,
__spv::__spirv_JointMatrixINTEL<T2, M, N, LC, S, UC> *C,
JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
JOINT_MATRIX_INTEL(T1, K, N, LB, S, UB) *B,
JOINT_MATRIX_INTEL(T2, M, N, LC, S, UC) *C,
__spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);

template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
Expand All @@ -63,11 +72,11 @@ template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
__spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
__spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *
extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T2, M, N, LC, S, UC) *
__spirv_JointMatrixUUMadINTEL(
__spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
__spv::__spirv_JointMatrixINTEL<T2, K, N, LB, S, UB> *B,
__spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *C,
JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
JOINT_MATRIX_INTEL(T2, K, N, LB, S, UB) *B,
JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *C,
__spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);

template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
Expand All @@ -78,11 +87,11 @@ template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
__spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
__spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *
extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *
__spirv_JointMatrixUSMadINTEL(
__spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
__spv::__spirv_JointMatrixINTEL<T2, K, N, LB, S, UB> *B,
__spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *C,
JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
JOINT_MATRIX_INTEL(T2, K, N, LB, S, UB) *B,
JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *C,
__spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);

template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
Expand All @@ -93,38 +102,42 @@ template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
__spv::MatrixLayout LB = __spv::MatrixLayout::RowMajor,
__spv::MatrixLayout LC = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *
extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *
__spirv_JointMatrixSUMadINTEL(
__spv::__spirv_JointMatrixINTEL<T1, M, K, LA, S, UA> *A,
__spv::__spirv_JointMatrixINTEL<T2, K, N, LB, S, UB> *B,
__spv::__spirv_JointMatrixINTEL<T3, M, N, LC, S, UC> *C,
JOINT_MATRIX_INTEL(T1, M, K, LA, S, UA) *A,
JOINT_MATRIX_INTEL(T2, K, N, LB, S, UB) *B,
JOINT_MATRIX_INTEL(T3, M, N, LC, S, UC) *C,
__spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);

template <typename T, std::size_t R, std::size_t C,
__spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
__spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *
extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T, R, C, L, S, U) *
__spirv_CompositeConstruct(const T v);

template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
__spv::MatrixLayout L,
template <typename T, std::size_t R, std::size_t C,
__spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
__spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL size_t __spirv_JointMatrixWorkItemLengthINTEL(
__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *);
JOINT_MATRIX_INTEL(T, R, C, L, S, U) *);

template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
__spv::MatrixLayout L,
template <typename T, std::size_t R, std::size_t C,
__spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
__spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL T __spirv_VectorExtractDynamic(
__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *, size_t i);
JOINT_MATRIX_INTEL(T, R, C, L, S, U) *, size_t i);

template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
__spv::MatrixLayout L,
template <typename T, std::size_t R, std::size_t C,
__spv::MatrixUse U = __spv::MatrixUse::Unnecessary,
__spv::MatrixLayout L = __spv::MatrixLayout::RowMajor,
__spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
extern SYCL_EXTERNAL __spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *
__spirv_VectorInsertDynamic(__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *,
extern SYCL_EXTERNAL JOINT_MATRIX_INTEL(T, R, C, L, S, U) *
__spirv_VectorInsertDynamic(JOINT_MATRIX_INTEL(T, R, C, L, S, U) *,
T val, size_t i);
#undef JOINT_MATRIX_INTEL

#ifndef __SPIRV_BUILTIN_DECLARATIONS__
#error \
Expand Down
9 changes: 9 additions & 0 deletions sycl/include/CL/__spirv/spirv_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ enum class MatrixUse : uint32_t {
// information to SPIRV translator.
// The long term solution would be to introduce a matrix type in Clang and use
// it instead of this member.
#if (SYCL_EXT_ONEAPI_MATRIX_VERSION > 1)
template <typename T, std::size_t R, std::size_t C, MatrixLayout L,
Scope::Flag S = Scope::Flag::Subgroup,
MatrixUse U = MatrixUse::Unnecessary>
Expand All @@ -145,6 +146,14 @@ struct __spirv_JointMatrixINTEL {
[R][C][static_cast<size_t>(L) + 1][static_cast<size_t>(S) + 1]
[static_cast<size_t>(U) + 1];
};
#else
template <typename T, std::size_t R, std::size_t C, MatrixLayout L,
Scope::Flag S = Scope::Flag::Subgroup>
struct __spirv_JointMatrixINTEL {
T(*Value)
[R][C][static_cast<size_t>(L) + 1][static_cast<size_t>(S) + 1];
};
#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION

} // namespace __spv

Expand Down
3 changes: 1 addition & 2 deletions sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ struct joint_matrix {
public:
__spv::__spirv_JointMatrixINTEL<
T, NumRows, NumCols, spv_matrix_layout_traits<Layout>::value,
spv_scope_traits<Group>::value,
spv_matrix_use_traits<matrix_use::unnecessary>::value> *spvm;
spv_scope_traits<Group>::value> *spvm;
joint_matrix(Group sg) {
#ifndef __SYCL_DEVICE_ONLY__
(void)sg;
Expand Down
11 changes: 6 additions & 5 deletions sycl/include/sycl/ext/oneapi/matrix/matrix-tensorcore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ void joint_matrix_load(
std::ignore = src;
std::ignore = stride;
throw runtime_error(
"When using SYCL_EXT_ONEAPI_MATRIX=3 joint_matrix_load is "
"When using SYCL_EXT_ONEAPI_MATRIX_VERSION=3 joint_matrix_load is "
"only supported by CUDA devices",
PI_ERROR_INVALID_DEVICE);
#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
Expand All @@ -734,7 +734,7 @@ void joint_matrix_store(Group sg,
std::ignore = dst;
std::ignore = stride;
throw runtime_error(
"When using SYCL_EXT_ONEAPI_MATRIX=3 joint_matrix_store is "
"When using SYCL_EXT_ONEAPI_MATRIX_VERSION=3 joint_matrix_store is "
"only supported by CUDA devices",
PI_ERROR_INVALID_DEVICE);
#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
Expand All @@ -757,9 +757,10 @@ joint_matrix_mad(
std::ignore = A;
std::ignore = B;
std::ignore = C;
throw runtime_error("When using SYCL_EXT_ONEAPI_MATRIX=3 joint_matrix_mad is "
"only supported by CUDA devices",
PI_ERROR_INVALID_DEVICE);
throw runtime_error(
"When using SYCL_EXT_ONEAPI_MATRIX_VERSION=3 joint_matrix_mad is "
"only supported by CUDA devices",
PI_ERROR_INVALID_DEVICE);
#endif // defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
}

Expand Down
18 changes: 9 additions & 9 deletions sycl/include/sycl/ext/oneapi/matrix/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@

#pragma once

#include <sycl/feature_test.hpp>
#ifndef SYCL_EXT_ONEAPI_MATRIX_VERSION
#define SYCL_EXT_ONEAPI_MATRIX_VERSION 1
#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION

// the default is matrix-jit-use but existing tests in llvm-test-suite won't
// fail because we have the "unnecessary" use value
#if (SYCL_EXT_ONEAPI_MATRIX == 1)
#if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 1)
#include <sycl/ext/oneapi/matrix/matrix-jit.hpp>
#include <sycl/ext/oneapi/matrix/static-query.hpp>
#endif
#if (SYCL_EXT_ONEAPI_MATRIX == 2)
#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
#if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 2)
#include <sycl/ext/oneapi/matrix/matrix-jit-use.hpp>
#include <sycl/ext/oneapi/matrix/static-query-use.hpp>
#endif
#if (SYCL_EXT_ONEAPI_MATRIX == 3)
#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
#if (SYCL_EXT_ONEAPI_MATRIX_VERSION == 3)
#include <sycl/ext/oneapi/matrix/matrix-tensorcore.hpp>
#endif
#endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
9 changes: 1 addition & 8 deletions sycl/include/sycl/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
#define SYCL_EXT_INTEL_DEVICE_INFO 3
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1
#define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1
// As for SYCL_EXT_ONEAPI_MATRIX:
// 1- provides AOT initial implementation for AMX for the experimental matrix
// extension
// 2- provides JIT implementation (target agnostic) for the
// experimental matrix extension
#ifndef SYCL_EXT_ONEAPI_MATRIX
#define SYCL_EXT_ONEAPI_MATRIX 2
#endif
#define SYCL_EXT_ONEAPI_MATRIX 1
#define SYCL_EXT_ONEAPI_ASSERT 1
#define SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS 1
#define SYCL_EXT_ONEAPI_DISCARD_QUEUE_EVENTS 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

#include <sycl/sycl.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

#include <sycl/sycl.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

#include <sycl/sycl.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

#include <sycl/sycl.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_70 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

#include <sycl/sycl.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

#include <sycl/sycl.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

// IMPORTANT: before updating sm version support beyond sm_86 read the following
// NOTE!
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda

// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s
// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_72 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=3 -S -Xclang -emit-llvm %s -o -| FileCheck %s

#include <sycl/sycl.hpp>

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/matrix-bf16-test-SG-16.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
// RUN: %clangxx -fsycl -O2 %s -o %t.out
#include <iostream>
#include <sycl/sycl.hpp>

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/matrix-bf16-test.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
// RUN: %clangxx -fsycl -O2 %s -o %t.out
#include <iostream>
#include <sycl/sycl.hpp>

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/matrix-bfloat16-test-use.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -O2 %s -o %t.out
// RUN: %clangxx -fsycl -O2 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=2 %s -o %t.out
#include <iostream>
#include <sycl/sycl.hpp>

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/matrix-bfloat16-test.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
// RUN: %clangxx -fsycl -O2 %s -o %t.out
#include <iostream>
#include <sycl/sycl.hpp>

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/matrix-elemwise-ops.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
// RUN: %clangxx -fsycl -O2 %s -o %t.out

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/matrix-int8-test-SG-16.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -O2 %s -o %t.out -DSYCL_EXT_ONEAPI_MATRIX=1
// RUN: %clangxx -fsycl -O2 %s -o %t.out
#include <iostream>
#include <sycl/sycl.hpp>

Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/matrix-int8-test-use.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-device-only -O2 -S -emit-llvm -o - %s | FileCheck %s
// RUN: %clangxx -fsycl -fsycl-device-only -DSYCL_EXT_ONEAPI_MATRIX_VERSION=2 -O2 -S -emit-llvm -o - %s | FileCheck %s

// CHECK-DAG: %spirv.JointMatrixINTEL._char_12_48_4_3_0 = type { [12 x [48 x [5 x [4 x [1 x i8]]]]] addrspace(4)* }
// CHECK-DAG: %spirv.JointMatrixINTEL._int_12_12_4_3_2 = type { [12 x [12 x [5 x [4 x [3 x i32]]]]] addrspace(4)* }
Expand Down
8 changes: 4 additions & 4 deletions sycl/test/matrix/matrix-int8-test.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX=1 -fsycl -fsycl-device-only -O2 -S -emit-llvm -o - %s | FileCheck %s
// RUN: %clangxx -fsycl -fsycl-device-only -O2 -S -emit-llvm -o - %s | FileCheck %s

// CHECK-DAG: %spirv.JointMatrixINTEL._char_12_48_0_3_3 = type { [12 x [48 x [1 x [4 x [4 x i8]]]]] addrspace(4)* }
// CHECK-DAG: %spirv.JointMatrixINTEL._int_12_12_0_3_3 = type { [12 x [12 x [1 x [4 x [4 x i32]]]]] addrspace(4)* }
// CHECK-DAG: %spirv.JointMatrixINTEL._char_48_12_3_3_3 = type { [48 x [12 x [4 x [4 x [4 x i8]]]]] addrspace(4)* }
// CHECK-DAG: %spirv.JointMatrixINTEL._char_12_48_0_3 = type { [12 x [48 x [1 x [4 x i8]]]] addrspace(4)* }
// CHECK-DAG: %spirv.JointMatrixINTEL._int_12_12_0_3 = type { [12 x [12 x [1 x [4 x i32]]]] addrspace(4)* }
// CHECK-DAG: %spirv.JointMatrixINTEL._char_48_12_3_3 = type { [48 x [12 x [4 x [4 x i8]]]] addrspace(4)* }

#include <iostream>
#include <sycl/sycl.hpp>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/matrix/query.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -DSYCL_EXT_ONEAPI_MATRIX=1 -fsycl -o query %s
// RUN: %clangxx -fsycl -o query %s
#include <iostream>
#include <sycl/sycl.hpp>

Expand Down