Skip to content

Commit fd14f72

Browse files
[SYCL][Matrix] Move joint_matrix's implementation to experimental::matrix namespace
1 parent ac54a4a commit fd14f72

File tree

5 files changed

+80
-70
lines changed

5 files changed

+80
-70
lines changed

sycl/include/CL/sycl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#include <CL/sycl/ONEAPI/filter_selector.hpp>
1414
#include <CL/sycl/ONEAPI/function_pointer.hpp>
1515
#include <CL/sycl/ONEAPI/group_algorithm.hpp>
16-
#include <CL/sycl/ONEAPI/intel_matrix/matrix.hpp>
16+
#include <CL/sycl/ONEAPI/matrix/matrix.hpp>
1717
#include <CL/sycl/ONEAPI/reduction.hpp>
1818
#include <CL/sycl/ONEAPI/sub_group.hpp>
1919
#include <CL/sycl/accessor.hpp>

sycl/include/CL/sycl/ONEAPI/intel_matrix/matrix-amx.hpp renamed to sycl/include/CL/sycl/ONEAPI/matrix/matrix-amx.hpp

Lines changed: 74 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ template <typename T> class submatrix {
4646
short rows, cols;
4747
};
4848

49+
// TODO: we are adding it this way until sycl::dynamic_extent gets implemented.
4950
constexpr size_t dynamic_extent = std::numeric_limits<size_t>::max();
5051

5152
template <typename T> struct elems_per_dword {
@@ -62,7 +63,7 @@ ELEMS_PER_DWORD(unsigned short, 2)
6263

6364
} // namespace detail
6465

65-
namespace matrix {
66+
namespace experimental::matrix {
6667
using namespace cl::sycl;
6768
using namespace cl::sycl::ONEAPI;
6869

@@ -196,51 +197,55 @@ struct joint_matrix<
196197
joint_matrix(Group sg) {}
197198
};
198199

199-
} // namespace matrix
200+
} // namespace experimental::matrix
200201

201202
namespace detail {
202203

203204
template <typename Group, typename T, size_t NumRows, size_t NumCols,
204-
matrix::matrix_layout Layout>
205-
inline __SYCL_ALWAYS_INLINE static
206-
typename std::enable_if<(NumRows > matrix::tile_size) ||
207-
(NumCols * sizeof(T) / 4 > matrix::tile_size),
208-
void>::type
209-
submatrix_load(detail::submatrix<T> &sub_m,
210-
matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> jm,
211-
uint32_t row, uint32_t col, size_t stride,
212-
matrix::matrix_layout layout, bool shouldreload) {
205+
experimental::matrix::matrix_layout Layout>
206+
inline __SYCL_ALWAYS_INLINE static typename std::enable_if<
207+
(NumRows > experimental::matrix::tile_size) ||
208+
(NumCols * sizeof(T) / 4 > experimental::matrix::tile_size),
209+
void>::type
210+
submatrix_load(
211+
detail::submatrix<T> &sub_m,
212+
experimental::matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> jm,
213+
uint32_t row, uint32_t col, size_t stride,
214+
experimental::matrix::matrix_layout layout, bool shouldreload) {
213215
uint32_t offset = (row * stride + col);
214216
T *ptr = reinterpret_cast<T *>(jm.raw_storage);
215217
ptr += offset;
216218
stride *= sizeof(T);
217-
sub_m.rows = matrix::tile_size;
218-
sub_m.cols = matrix::tile_size * 4;
219-
sub_m.tile = matrix::tileloadd64_internal(
219+
sub_m.rows = experimental::matrix::tile_size;
220+
sub_m.cols = experimental::matrix::tile_size * 4;
221+
sub_m.tile = experimental::matrix::tileloadd64_internal(
220222
sub_m.rows, sub_m.cols, reinterpret_cast<char *>(ptr), stride);
221223
}
222224

223225
template <typename Group, typename T, size_t NumRows, size_t NumCols,
224-
matrix::matrix_layout Layout>
225-
inline __SYCL_ALWAYS_INLINE static
226-
typename std::enable_if<(NumRows <= matrix::tile_size) &&
227-
(NumCols * sizeof(T) / 4 <= matrix::tile_size),
228-
void>::type
229-
submatrix_load(detail::submatrix<T> &sub_m,
230-
matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> &jm,
231-
uint32_t row, uint32_t col, size_t stride,
232-
matrix::matrix_layout layout, bool shouldreload) {
226+
experimental::matrix::matrix_layout Layout>
227+
inline __SYCL_ALWAYS_INLINE static typename std::enable_if<
228+
(NumRows <= experimental::matrix::tile_size) &&
229+
(NumCols * sizeof(T) / 4 <= experimental::matrix::tile_size),
230+
void>::type
231+
submatrix_load(
232+
detail::submatrix<T> &sub_m,
233+
experimental::matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> &jm,
234+
uint32_t row, uint32_t col, size_t stride,
235+
experimental::matrix::matrix_layout layout, bool shouldreload) {
233236
if (shouldreload) {
234-
// Force sub_m.tile's shape to be matrix::tile_size * matrix::tile_size * 4
235-
int8_t NewjmC[matrix::tile_size * matrix::tile_size * 4];
236-
matrix::tilestored64_internal(NumRows, NumCols * sizeof(T),
237-
reinterpret_cast<char *>(NewjmC),
238-
matrix::tile_size * 4, jm.tile);
239-
sub_m.rows = matrix::tile_size;
240-
sub_m.cols = matrix::tile_size * 4;
241-
sub_m.tile = matrix::tileloadd64_internal(sub_m.rows, sub_m.cols,
242-
reinterpret_cast<char *>(NewjmC),
243-
matrix::tile_size * 4);
237+
// Force sub_m.tile's shape to be experimental::matrix::tile_size *
238+
// experimental::matrix::tile_size * 4
239+
int8_t NewjmC[experimental::matrix::tile_size *
240+
experimental::matrix::tile_size * 4];
241+
experimental::matrix::tilestored64_internal(
242+
NumRows, NumCols * sizeof(T), reinterpret_cast<char *>(NewjmC),
243+
experimental::matrix::tile_size * 4, jm.tile);
244+
sub_m.rows = experimental::matrix::tile_size;
245+
sub_m.cols = experimental::matrix::tile_size * 4;
246+
sub_m.tile = experimental::matrix::tileloadd64_internal(
247+
sub_m.rows, sub_m.cols, reinterpret_cast<char *>(NewjmC),
248+
experimental::matrix::tile_size * 4);
244249
return;
245250
}
246251
sub_m.rows = NumRows;
@@ -253,63 +258,70 @@ inline __SYCL_ALWAYS_INLINE static void
253258
submatrix_mad(detail::submatrix<int8_t> &sub_ma,
254259
detail::submatrix<int8_t> &sub_mb,
255260
detail::submatrix<int32_t> &sub_mc) {
256-
sub_mc.tile = matrix::tdpbssd_internal(sub_mc.rows, sub_mc.cols, sub_ma.cols,
257-
sub_mc.tile, sub_ma.tile, sub_mb.tile);
261+
sub_mc.tile = experimental::matrix::tdpbssd_internal(
262+
sub_mc.rows, sub_mc.cols, sub_ma.cols, sub_mc.tile, sub_ma.tile,
263+
sub_mb.tile);
258264
}
259265

260266
// This handles cases where T1 is int16(bfloat16), T2 is float.
261267
inline __SYCL_ALWAYS_INLINE static void
262268
submatrix_mad(detail::submatrix<unsigned short> &sub_ma,
263269
detail::submatrix<unsigned short> &sub_mb,
264270
detail::submatrix<float> &sub_mc) {
265-
sub_mc.tile =
266-
matrix::tdpbf16ps_internal(sub_mc.rows, sub_mc.cols, sub_ma.cols,
267-
sub_mc.tile, sub_ma.tile, sub_mb.tile);
271+
sub_mc.tile = experimental::matrix::tdpbf16ps_internal(
272+
sub_mc.rows, sub_mc.cols, sub_ma.cols, sub_mc.tile, sub_ma.tile,
273+
sub_mb.tile);
268274
}
269275

270276
template <typename Group, typename T, size_t NumRows, size_t NumCols>
271277
inline __SYCL_ALWAYS_INLINE static
272-
typename std::enable_if<(NumRows > matrix::tile_size) ||
273-
(NumCols * sizeof(T) / 4 > matrix::tile_size),
278+
typename std::enable_if<(NumRows > experimental::matrix::tile_size) ||
279+
(NumCols * sizeof(T) / 4 >
280+
experimental::matrix::tile_size),
274281
void>::type
275-
submatrix_store(detail::submatrix<T> &sub_m,
276-
matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
277-
uint32_t row, uint32_t col, size_t stride,
278-
matrix::matrix_layout layout, bool shouldreload) {
282+
submatrix_store(
283+
detail::submatrix<T> &sub_m,
284+
experimental::matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
285+
uint32_t row, uint32_t col, size_t stride,
286+
experimental::matrix::matrix_layout layout, bool shouldreload) {
279287
uint32_t offset = (row * stride + col);
280288
T *ptr = reinterpret_cast<T *>(jm.raw_storage);
281289
ptr += offset;
282290
stride *= sizeof(T);
283-
matrix::tilestored64_internal(sub_m.rows, sub_m.cols,
284-
reinterpret_cast<char *>(ptr), stride,
285-
sub_m.tile);
291+
experimental::matrix::tilestored64_internal(sub_m.rows, sub_m.cols,
292+
reinterpret_cast<char *>(ptr),
293+
stride, sub_m.tile);
286294
}
287295

288296
template <typename Group, typename T, size_t NumRows, size_t NumCols>
289297
inline __SYCL_ALWAYS_INLINE static
290-
typename std::enable_if<(NumRows <= matrix::tile_size) &&
291-
(NumCols * sizeof(T) / 4 <= matrix::tile_size),
298+
typename std::enable_if<(NumRows <= experimental::matrix::tile_size) &&
299+
(NumCols * sizeof(T) / 4 <=
300+
experimental::matrix::tile_size),
292301
void>::type
293-
submatrix_store(detail::submatrix<T> &sub_m,
294-
matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
295-
uint32_t row, uint32_t col, size_t stride,
296-
matrix::matrix_layout layout, bool shouldreload) {
302+
submatrix_store(
303+
detail::submatrix<T> &sub_m,
304+
experimental::matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
305+
uint32_t row, uint32_t col, size_t stride,
306+
experimental::matrix::matrix_layout layout, bool shouldreload) {
297307
if (shouldreload) {
298-
int8_t NewjmC[matrix::tile_size * matrix::tile_size * 4];
299-
matrix::tilestored64_internal(matrix::tile_size, matrix::tile_size * 4,
300-
reinterpret_cast<char *>(NewjmC),
301-
matrix::tile_size * 4, sub_m.tile);
302-
jm.tile = matrix::tileloadd64_internal(NumRows, NumCols * sizeof(T),
303-
reinterpret_cast<char *>(NewjmC),
304-
matrix::tile_size * 4);
308+
int8_t NewjmC[experimental::matrix::tile_size *
309+
experimental::matrix::tile_size * 4];
310+
experimental::matrix::tilestored64_internal(
311+
experimental::matrix::tile_size, experimental::matrix::tile_size * 4,
312+
reinterpret_cast<char *>(NewjmC), experimental::matrix::tile_size * 4,
313+
sub_m.tile);
314+
jm.tile = experimental::matrix::tileloadd64_internal(
315+
NumRows, NumCols * sizeof(T), reinterpret_cast<char *>(NewjmC),
316+
experimental::matrix::tile_size * 4);
305317
return;
306318
}
307319
jm.tile = sub_m.tile;
308320
}
309321

310322
} // namespace detail
311323

312-
namespace matrix {
324+
namespace experimental::matrix {
313325

314326
// This handles cases where matrix can't be accommodated by a tile
315327
template <typename Group, typename T, size_t NumRows, size_t NumCols,
@@ -439,7 +451,7 @@ joint_matrix_mad(Group sg,
439451
return;
440452
}
441453

442-
} // namespace matrix
454+
} // namespace experimental::matrix
443455
} // namespace intel
444456
} // namespace ext
445457
} // namespace sycl

sycl/include/CL/sycl/ONEAPI/intel_matrix/matrix.hpp renamed to sycl/include/CL/sycl/ONEAPI/matrix/matrix.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@
1818

1919
#if (SYCL_EXT_ONEAPI_MATRIX == 1)
2020
#if defined(__AMXTILE__) && defined(__AMXINT8__) && defined(__AMXBF16__)
21-
#include <CL/sycl/ONEAPI/intel_matrix/matrix-amx.hpp>
21+
#include <CL/sycl/ONEAPI/matrix/matrix-amx.hpp>
2222
#endif
2323
#endif

sycl/test/on-device/extensions/matrix-amx-bf16-test.cpp renamed to sycl/test/matrix/matrix-amx-bf16-test.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
#if (SYCL_EXT_ONEAPI_MATRIX == 1)
44
#include <iostream>
55

6-
using namespace cl::sycl;
7-
using namespace cl::sycl::intel;
8-
using namespace cl::sycl::ext::intel::matrix;
6+
using namespace sycl::intel;
7+
using namespace sycl::ext::intel::experimental::matrix;
98

109
#define TILE_SZ 16
1110
#define TM (3 * TILE_SZ-1)

sycl/test/on-device/extensions/matrix-amx-int8-test.cpp renamed to sycl/test/matrix/matrix-amx-int8-test.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
#if (SYCL_EXT_ONEAPI_MATRIX == 1)
44
#include <iostream>
55

6-
using namespace cl::sycl;
7-
using namespace cl::sycl::intel;
8-
using namespace cl::sycl::ext::intel::matrix;
6+
using namespace sycl::intel;
7+
using namespace sycl::ext::intel::experimental::matrix;
98

109
#define TILE_SZ 16
1110
#define TM (4 * TILE_SZ-4)

0 commit comments

Comments
 (0)