Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 1ebf191

Browse files
authored
[SYCL][CUDA] Enabled unified matrix tests (#1334)
1 parent 417b60f commit 1ebf191

File tree

3 files changed

+16
-84
lines changed

3 files changed

+16
-84
lines changed

SYCL/Matrix/element_wise_all_ops_cuda.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
//
77
//===----------------------------------------------------------------------===//
88
// REQUIRES: cuda
9-
// Temp xfail: test was merged early.
10-
// XFAIL: cuda
119
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 %s -o %t.out
1210
// RUN: %t.out
1311

@@ -65,15 +63,15 @@ void matrix_verify_op(queue q, big_matrix<T2, M * nWGperDim, N * nWGperDim> &C,
6563

6664
auto sg = spmd_item.get_sub_group();
6765

68-
joint_matrix<T, use::a, M, K, layout::row_major> sub_a;
69-
joint_matrix<T, use::b, K, N, layout::row_major> sub_b;
70-
joint_matrix<T2, use::accumulator, M, N> sub_c;
66+
joint_matrix<sub_group, T, use::a, M, K, layout::row_major> sub_a;
67+
joint_matrix<sub_group, T, use::b, K, N, layout::row_major> sub_b;
68+
joint_matrix<sub_group, T2, use::accumulator, M, N> sub_c;
7169

7270
joint_matrix_fill(sg, sub_a, 3);
7371
joint_matrix_fill(sg, sub_b, 1);
7472
joint_matrix_fill(sg, sub_c, -80);
7573

76-
auto wi_slice_a = sub_a.get_wi_data();
74+
auto wi_slice_a = get_wi_data(sg, sub_a);
7775
for (int i = 0; i < wi_slice_a.length(); i++) {
7876
if constexpr (std::is_same_v<Operation, Logical>) {
7977
if (wi_slice_a[i]) {

SYCL/Matrix/element_wise_wi_marray.cpp

Lines changed: 0 additions & 68 deletions
This file was deleted.

SYCL/Matrix/joint_matrix_tensorcores.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11

22
// REQUIRES: cuda
3-
// Temp xfail: test was merged early.
4-
// XFAIL: cuda
53
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 %s -o %t.out
64
// RUN: %t.out
75
//
@@ -14,7 +12,7 @@
1412
#include <sycl/sycl.hpp>
1513

1614
using namespace sycl;
17-
using namespace sycl::ext::oneapi::experimental;
15+
using namespace sycl::ext::oneapi;
1816
using namespace sycl::ext::oneapi::experimental::matrix;
1917
constexpr float bf16_eps = 0.00390625;
2018

@@ -146,9 +144,11 @@ void test(queue &q) {
146144
// column id of current submatrix of BIG C matrix
147145
const auto n = item.get_group().get_group_id()[1];
148146

149-
joint_matrix<T3, use::a, M, K, layout::row_major> sub_a;
150-
joint_matrix<T3, use::b, K, N, layout::row_major> sub_b;
151-
joint_matrix<std::remove_const_t<T2>, use::accumulator, M, N> sub_c;
147+
joint_matrix<sub_group, T3, use::a, M, K, layout::row_major> sub_a;
148+
joint_matrix<sub_group, T3, use::b, K, N, layout::row_major> sub_b;
149+
joint_matrix<sub_group, std::remove_const_t<T2>, use::accumulator,
150+
M, N>
151+
sub_c;
152152

153153
joint_matrix_load(sg, sub_c,
154154
accC.get_pointer() + (m * M) * Big_N + n * N,
@@ -165,11 +165,13 @@ void test(queue &q) {
165165

166166
// round values to correct precision if using tf32
167167
if constexpr (std::is_same<T3, precision::tf32>::value) {
168-
auto wi_size = sub_a.wi_marray.size();
169-
assert(wi_size == sub_b.wi_marray.size());
168+
auto wi_size = get_wi_data(sg, sub_a).length();
169+
assert(wi_size == get_wi_data(sg, sub_b).length());
170170
for (auto i = 0; i < wi_size; ++i) {
171-
sub_a.wi_marray[i] = round_to_tf32(sub_a.wi_marray[i]);
172-
sub_b.wi_marray[i] = round_to_tf32(sub_b.wi_marray[i]);
171+
get_wi_data(sg, sub_a)[i] =
172+
round_to_tf32(get_wi_data(sg, sub_a)[i]);
173+
get_wi_data(sg, sub_b)[i] =
174+
round_to_tf32(get_wi_data(sg, sub_b)[i]);
173175
}
174176
}
175177

0 commit comments

Comments
 (0)