@@ -192,7 +192,6 @@ joint_matrix_fill(Group,
192
192
#if defined(__NVPTX__)
193
193
res.cuda_impl .wi_marray = v;
194
194
#elif defined(__HIP_PLATFORM_AMD_MFMA__)
195
- std::ignore = sg;
196
195
sycl::ext::oneapi::detail::joint_matrix_apply (res.hip_impl ,
197
196
[=](T) { return v; });
198
197
#else
@@ -219,7 +218,7 @@ template <
219
218
std::enable_if_t <std::is_same<S, std::remove_const_t <T>>::value, bool > =
220
219
true >
221
220
inline __SYCL_ALWAYS_INLINE void joint_matrix_load (
222
- Group,
221
+ Group &sg ,
223
222
joint_matrix<Group, S, use::accumulator, NumRows, NumCols,
224
223
sycl::ext::oneapi::experimental::matrix::layout::dynamic> &res,
225
224
multi_ptr<T, Space, IsDecorated> src, size_t stride,
@@ -228,6 +227,7 @@ inline __SYCL_ALWAYS_INLINE void joint_matrix_load(
228
227
static_assert (Space != access::address_space::private_space,
229
228
" Joint Matrix doesn't support load from private memory!" );
230
229
#if defined(__NVPTX__)
230
+ std::ignore = sg;
231
231
sycl::ext::oneapi::detail::load_accumulator_cuda (res.cuda_impl , src, stride,
232
232
Layout);
233
233
#elif defined(__HIP_PLATFORM_AMD_MFMA__)
@@ -266,6 +266,7 @@ inline __SYCL_ALWAYS_INLINE void joint_matrix_load(
266
266
}
267
267
#endif // defined(__NVPTX__)
268
268
#else
269
+ std::ignore = sg;
269
270
std::ignore = res;
270
271
std::ignore = src;
271
272
std::ignore = stride;
@@ -284,13 +285,14 @@ template <
284
285
std::is_same<std::remove_const_t <T>, float >::value),
285
286
bool > = true >
286
287
inline __SYCL_ALWAYS_INLINE void
287
- joint_matrix_load (Group,
288
+ joint_matrix_load (Group &sg ,
288
289
joint_matrix<Group, S, Use, NumRows, NumCols, Layout> &res,
289
290
multi_ptr<T, Space, IsDecorated> src, size_t stride) {
290
291
#if defined(__SYCL_DEVICE_ONLY__)
291
292
static_assert (Space != access::address_space::private_space,
292
293
" Joint Matrix doesn't support load from private memory!" );
293
294
#if defined(__NVPTX__)
295
+ std::ignore = sg;
294
296
sycl::ext::oneapi::detail::load_multiplicand_cuda<S, T, NumRows, NumCols, Use,
295
297
Layout, Space>(
296
298
res.cuda_impl , src, stride);
@@ -320,7 +322,7 @@ joint_matrix_load(Group,
320
322
template <typename Group, typename T, size_t NumRows, size_t NumCols,
321
323
access::address_space Space, access::decorated IsDecorated>
322
324
inline __SYCL_ALWAYS_INLINE void joint_matrix_store (
323
- Group,
325
+ Group &sg ,
324
326
const joint_matrix<Group, T, use::accumulator, NumRows, NumCols,
325
327
sycl::ext::oneapi::experimental::matrix::layout::dynamic>
326
328
&src,
@@ -330,6 +332,7 @@ inline __SYCL_ALWAYS_INLINE void joint_matrix_store(
330
332
static_assert (Space != access::address_space::private_space,
331
333
" Joint Matrix doesn't support store to private memory!" );
332
334
#if defined(__NVPTX__)
335
+ std::ignore = sg;
333
336
sycl::ext::oneapi::detail::joint_matrix_store_cuda<T, NumRows, NumCols,
334
337
Space>(src.cuda_impl , dst,
335
338
stride, Layout);
@@ -403,13 +406,9 @@ inline __SYCL_ALWAYS_INLINE void joint_matrix_mad(
403
406
}
404
407
#elif defined(__HIP_PLATFORM_AMD_MFMA__)
405
408
if constexpr (std::is_same<Ta, Tb>::value) {
406
- joint_matrix<Group, Tc, use::accumulator, M, N,
407
- sycl::ext::oneapi::experimental::matrix::layout::dynamic>
408
- D;
409
409
sycl::ext::oneapi::detail::joint_matrix_mad_hip<Ta, Tc, M, K, N, LayoutA,
410
410
LayoutB>(
411
411
D.hip_impl , A.hip_impl , B.hip_impl , C.hip_impl );
412
- return D;
413
412
} else {
414
413
assert (false && " Ta != Tb : In the HIP backend joint_matrix_mad "
415
414
" requires that joint_matrix data types Ta and Tb match" );
0 commit comments