@@ -46,6 +46,7 @@ template <typename T> class submatrix {
46
46
short rows, cols;
47
47
};
48
48
49
+ // TODO: we are adding it this way until sycl::dynamic_extent gets implemented.
49
50
constexpr size_t dynamic_extent = std::numeric_limits<size_t >::max();
50
51
51
52
template <typename T> struct elems_per_dword {
@@ -62,7 +63,7 @@ ELEMS_PER_DWORD(unsigned short, 2)
62
63
63
64
} // namespace detail
64
65
65
- namespace matrix {
66
+ namespace experimental :: matrix {
66
67
using namespace cl ::sycl;
67
68
using namespace cl ::sycl::ONEAPI;
68
69
@@ -196,51 +197,55 @@ struct joint_matrix<
196
197
joint_matrix (Group sg) {}
197
198
};
198
199
199
- } // namespace matrix
200
+ } // namespace experimental:: matrix
200
201
201
202
namespace detail {
202
203
203
204
template <typename Group, typename T, size_t NumRows, size_t NumCols,
204
- matrix::matrix_layout Layout>
205
- inline __SYCL_ALWAYS_INLINE static
206
- typename std::enable_if<(NumRows > matrix::tile_size) ||
207
- (NumCols * sizeof (T) / 4 > matrix::tile_size),
208
- void >::type
209
- submatrix_load (detail::submatrix<T> &sub_m,
210
- matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> jm,
211
- uint32_t row, uint32_t col, size_t stride,
212
- matrix::matrix_layout layout, bool shouldreload) {
205
+ experimental::matrix::matrix_layout Layout>
206
+ inline __SYCL_ALWAYS_INLINE static typename std::enable_if<
207
+ (NumRows > experimental::matrix::tile_size) ||
208
+ (NumCols * sizeof (T) / 4 > experimental::matrix::tile_size),
209
+ void >::type
210
+ submatrix_load (
211
+ detail::submatrix<T> &sub_m,
212
+ experimental::matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> jm,
213
+ uint32_t row, uint32_t col, size_t stride,
214
+ experimental::matrix::matrix_layout layout, bool shouldreload) {
213
215
uint32_t offset = (row * stride + col);
214
216
T *ptr = reinterpret_cast <T *>(jm.raw_storage );
215
217
ptr += offset;
216
218
stride *= sizeof (T);
217
- sub_m.rows = matrix::tile_size;
218
- sub_m.cols = matrix::tile_size * 4 ;
219
- sub_m.tile = matrix::tileloadd64_internal (
219
+ sub_m.rows = experimental:: matrix::tile_size;
220
+ sub_m.cols = experimental:: matrix::tile_size * 4 ;
221
+ sub_m.tile = experimental:: matrix::tileloadd64_internal (
220
222
sub_m.rows , sub_m.cols , reinterpret_cast <char *>(ptr), stride);
221
223
}
222
224
223
225
template <typename Group, typename T, size_t NumRows, size_t NumCols,
224
- matrix::matrix_layout Layout>
225
- inline __SYCL_ALWAYS_INLINE static
226
- typename std::enable_if<(NumRows <= matrix::tile_size) &&
227
- (NumCols * sizeof (T) / 4 <= matrix::tile_size),
228
- void >::type
229
- submatrix_load (detail::submatrix<T> &sub_m,
230
- matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> &jm,
231
- uint32_t row, uint32_t col, size_t stride,
232
- matrix::matrix_layout layout, bool shouldreload) {
226
+ experimental::matrix::matrix_layout Layout>
227
+ inline __SYCL_ALWAYS_INLINE static typename std::enable_if<
228
+ (NumRows <= experimental::matrix::tile_size) &&
229
+ (NumCols * sizeof (T) / 4 <= experimental::matrix::tile_size),
230
+ void >::type
231
+ submatrix_load (
232
+ detail::submatrix<T> &sub_m,
233
+ experimental::matrix::joint_matrix<Group, T, NumRows, NumCols, Layout> &jm,
234
+ uint32_t row, uint32_t col, size_t stride,
235
+ experimental::matrix::matrix_layout layout, bool shouldreload) {
233
236
if (shouldreload) {
234
- // Force sub_m.tile's shape to be matrix::tile_size * matrix::tile_size * 4
235
- int8_t NewjmC[matrix::tile_size * matrix::tile_size * 4 ];
236
- matrix::tilestored64_internal (NumRows, NumCols * sizeof (T),
237
- reinterpret_cast <char *>(NewjmC),
238
- matrix::tile_size * 4 , jm.tile );
239
- sub_m.rows = matrix::tile_size;
240
- sub_m.cols = matrix::tile_size * 4 ;
241
- sub_m.tile = matrix::tileloadd64_internal (sub_m.rows , sub_m.cols ,
242
- reinterpret_cast <char *>(NewjmC),
243
- matrix::tile_size * 4 );
237
+ // Force sub_m.tile's shape to be experimental::matrix::tile_size *
238
+ // experimental::matrix::tile_size * 4
239
+ int8_t NewjmC[experimental::matrix::tile_size *
240
+ experimental::matrix::tile_size * 4 ];
241
+ experimental::matrix::tilestored64_internal (
242
+ NumRows, NumCols * sizeof (T), reinterpret_cast <char *>(NewjmC),
243
+ experimental::matrix::tile_size * 4 , jm.tile );
244
+ sub_m.rows = experimental::matrix::tile_size;
245
+ sub_m.cols = experimental::matrix::tile_size * 4 ;
246
+ sub_m.tile = experimental::matrix::tileloadd64_internal (
247
+ sub_m.rows , sub_m.cols , reinterpret_cast <char *>(NewjmC),
248
+ experimental::matrix::tile_size * 4 );
244
249
return ;
245
250
}
246
251
sub_m.rows = NumRows;
@@ -253,63 +258,70 @@ inline __SYCL_ALWAYS_INLINE static void
253
258
submatrix_mad (detail::submatrix<int8_t > &sub_ma,
254
259
detail::submatrix<int8_t > &sub_mb,
255
260
detail::submatrix<int32_t > &sub_mc) {
256
- sub_mc.tile = matrix::tdpbssd_internal (sub_mc.rows , sub_mc.cols , sub_ma.cols ,
257
- sub_mc.tile , sub_ma.tile , sub_mb.tile );
261
+ sub_mc.tile = experimental::matrix::tdpbssd_internal (
262
+ sub_mc.rows , sub_mc.cols , sub_ma.cols , sub_mc.tile , sub_ma.tile ,
263
+ sub_mb.tile );
258
264
}
259
265
260
266
// This handles cases where T1 is int16(bfloat16), T2 is float.
261
267
inline __SYCL_ALWAYS_INLINE static void
262
268
submatrix_mad (detail::submatrix<unsigned short > &sub_ma,
263
269
detail::submatrix<unsigned short > &sub_mb,
264
270
detail::submatrix<float > &sub_mc) {
265
- sub_mc.tile =
266
- matrix::tdpbf16ps_internal ( sub_mc.rows , sub_mc.cols , sub_ma.cols ,
267
- sub_mc. tile , sub_ma. tile , sub_mb.tile );
271
+ sub_mc.tile = experimental::matrix::tdpbf16ps_internal (
272
+ sub_mc.rows , sub_mc.cols , sub_ma.cols , sub_mc. tile , sub_ma. tile ,
273
+ sub_mb.tile );
268
274
}
269
275
270
276
template <typename Group, typename T, size_t NumRows, size_t NumCols>
271
277
inline __SYCL_ALWAYS_INLINE static
272
- typename std::enable_if<(NumRows > matrix::tile_size) ||
273
- (NumCols * sizeof (T) / 4 > matrix::tile_size),
278
+ typename std::enable_if<(NumRows > experimental::matrix::tile_size) ||
279
+ (NumCols * sizeof (T) / 4 >
280
+ experimental::matrix::tile_size),
274
281
void >::type
275
- submatrix_store (detail::submatrix<T> &sub_m,
276
- matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
277
- uint32_t row, uint32_t col, size_t stride,
278
- matrix::matrix_layout layout, bool shouldreload) {
282
+ submatrix_store (
283
+ detail::submatrix<T> &sub_m,
284
+ experimental::matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
285
+ uint32_t row, uint32_t col, size_t stride,
286
+ experimental::matrix::matrix_layout layout, bool shouldreload) {
279
287
uint32_t offset = (row * stride + col);
280
288
T *ptr = reinterpret_cast <T *>(jm.raw_storage );
281
289
ptr += offset;
282
290
stride *= sizeof (T);
283
- matrix::tilestored64_internal (sub_m.rows , sub_m.cols ,
284
- reinterpret_cast <char *>(ptr), stride ,
285
- sub_m.tile );
291
+ experimental:: matrix::tilestored64_internal (sub_m.rows , sub_m.cols ,
292
+ reinterpret_cast <char *>(ptr),
293
+ stride, sub_m.tile );
286
294
}
287
295
288
296
template <typename Group, typename T, size_t NumRows, size_t NumCols>
289
297
inline __SYCL_ALWAYS_INLINE static
290
- typename std::enable_if<(NumRows <= matrix::tile_size) &&
291
- (NumCols * sizeof (T) / 4 <= matrix::tile_size),
298
+ typename std::enable_if<(NumRows <= experimental::matrix::tile_size) &&
299
+ (NumCols * sizeof (T) / 4 <=
300
+ experimental::matrix::tile_size),
292
301
void >::type
293
- submatrix_store (detail::submatrix<T> &sub_m,
294
- matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
295
- uint32_t row, uint32_t col, size_t stride,
296
- matrix::matrix_layout layout, bool shouldreload) {
302
+ submatrix_store (
303
+ detail::submatrix<T> &sub_m,
304
+ experimental::matrix::joint_matrix<Group, T, NumRows, NumCols> &jm,
305
+ uint32_t row, uint32_t col, size_t stride,
306
+ experimental::matrix::matrix_layout layout, bool shouldreload) {
297
307
if (shouldreload) {
298
- int8_t NewjmC[matrix::tile_size * matrix::tile_size * 4 ];
299
- matrix::tilestored64_internal (matrix::tile_size, matrix::tile_size * 4 ,
300
- reinterpret_cast <char *>(NewjmC),
301
- matrix::tile_size * 4 , sub_m.tile );
302
- jm.tile = matrix::tileloadd64_internal (NumRows, NumCols * sizeof (T),
303
- reinterpret_cast <char *>(NewjmC),
304
- matrix::tile_size * 4 );
308
+ int8_t NewjmC[experimental::matrix::tile_size *
309
+ experimental::matrix::tile_size * 4 ];
310
+ experimental::matrix::tilestored64_internal (
311
+ experimental::matrix::tile_size, experimental::matrix::tile_size * 4 ,
312
+ reinterpret_cast <char *>(NewjmC), experimental::matrix::tile_size * 4 ,
313
+ sub_m.tile );
314
+ jm.tile = experimental::matrix::tileloadd64_internal (
315
+ NumRows, NumCols * sizeof (T), reinterpret_cast <char *>(NewjmC),
316
+ experimental::matrix::tile_size * 4 );
305
317
return ;
306
318
}
307
319
jm.tile = sub_m.tile ;
308
320
}
309
321
310
322
} // namespace detail
311
323
312
- namespace matrix {
324
+ namespace experimental :: matrix {
313
325
314
326
// This handles cases where matrix can't be accommodated by a tile
315
327
template <typename Group, typename T, size_t NumRows, size_t NumCols,
@@ -439,7 +451,7 @@ joint_matrix_mad(Group sg,
439
451
return ;
440
452
}
441
453
442
- } // namespace matrix
454
+ } // namespace experimental:: matrix
443
455
} // namespace intel
444
456
} // namespace ext
445
457
} // namespace sycl
0 commit comments