Skip to content

Commit 178342c

Browse files
Implement a workaround to gemm issue in OneMKL (#2082)
* Implement a workaround to gemm issue in OneMKL * Fix codespell issue * Enable w/a also for float dtype * Add Battlemage G21 arhitecture to w/a * Disable w/a for Arrow Lake * Remove Lunar Lake architecture from the w/a * Applied the pre-commit hooks * Update dpnp/backend/extensions/blas/gemm.hpp Co-authored-by: Oleksandr Pavlyk <[email protected]> * Applied pre-commit black hook * Add more clarification to the comment * Remove excess semicolon * Removed const keyword from review comment because ext_oneapi_architecture_is() isn't marked as const * Applied review comment * Updated the changelog --------- Co-authored-by: Oleksandr Pavlyk <[email protected]>
1 parent 9b37b02 commit 178342c

File tree

5 files changed

+54
-0
lines changed

5 files changed

+54
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ In addition, this release completes implementation of `dpnp.fft` module and adds
128128
* Fixed a crash in `dpnp.choose` caused by missing control of releasing temporary allocated device memory [#2063](https://github.com/IntelPython/dpnp/pull/2063)
129129
* Resolved compilation warning and error while building in debug mode [#2066](https://github.com/IntelPython/dpnp/pull/2066)
130130
* Fixed an issue with asynchronous execution in `dpnp.fft` module [#2067](https://github.com/IntelPython/dpnp/pull/2067)
131+
* Added a workaround to fix the incorrect result from `dpnp.matmul` computing on Lunar Lake or Arrow Lake Battlemage graphics [#2082](https://github.com/IntelPython/dpnp/pull/2082)
131132

132133
## [0.15.0] - 05/25/2024
133134

dpnp/backend/extensions/blas/blas_py.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@ PYBIND11_MODULE(_blas_impl, m)
127127
py::arg("resultC"), py::arg("depends") = py::list());
128128
}
129129

130+
{
131+
m.def("_is_lnl_bm_architecture", &blas_ns::_is_lnl_bm_architecture,
132+
"Return ``True`` if SYCL device belongs to either Lunar Lake or "
133+
"Battlemage G21 Intel GPU architecture",
134+
py::arg("device"));
135+
}
136+
130137
{
131138
m.def("_gemm_batch", &blas_ns::gemm_batch,
132139
"Call `gemm_batch` from OneMKL BLAS library to compute "

dpnp/backend/extensions/blas/gemm.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,22 @@ std::tuple<sycl::event, sycl::event, bool>
323323
return std::make_tuple(args_ev, gemm_ev, is_row_major);
324324
}
325325

326+
bool _is_lnl_bm_architecture(const sycl::device &dev)
327+
{
328+
#if !defined(USE_ONEMKL_CUBLAS)
329+
namespace syclex = sycl::ext::oneapi::experimental;
330+
const auto arch = dev.get_info<syclex::info::device::architecture>();
331+
switch (arch) {
332+
case syclex::architecture::intel_gpu_lnl_m: /* Lunar Lake */
333+
case syclex::architecture::intel_gpu_bmg_g21: /* Battlemage G21 */
334+
return true;
335+
default:
336+
return false;
337+
}
338+
#endif // !defined(USE_ONEMKL_CUBLAS)
339+
return false;
340+
}
341+
326342
template <typename fnT, typename Tab, typename Tc>
327343
struct GemmContigFactory
328344
{

dpnp/backend/extensions/blas/gemm.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ extern std::tuple<sycl::event, sycl::event, bool>
3939
const dpctl::tensor::usm_ndarray &resultC,
4040
const std::vector<sycl::event> &depends);
4141

42+
extern bool _is_lnl_bm_architecture(const sycl::device &dev);
43+
4244
extern std::tuple<sycl::event, sycl::event, bool>
4345
gemm_batch(sycl::queue &exec_q,
4446
const dpctl::tensor::usm_ndarray &matrixA,

dpnp/dpnp_utils/dpnp_utils_linearalgebra.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,34 @@ def dpnp_matmul(
894894
)
895895
_manager.add_event_pair(ht_ev, gemv_ev)
896896
elif call_flag == "gemm":
897+
# MKLD-17976: due to known issue in OneMKL on Lunar Lake and
898+
# Battlemage G21 Intel GPU architectures, it forces
899+
# to implement a temporary workaround with extra copying of
900+
# an input array in case when it has a small size and
901+
# non-zero offset
902+
# The issue was detected by failing tests for eig/eigh
903+
# TODO: remove the workaround once OneMKL issue is resolved
904+
if bi._is_lnl_bm_architecture(exec_q.get_sycl_device()):
905+
906+
def _need_to_copy(a):
907+
a_usm = dpnp.get_usm_ndarray(a)
908+
if a_usm._element_offset > 0 and a_usm.size < 16:
909+
return True
910+
return False
911+
912+
x1 = _copy_array(
913+
x1,
914+
copy_flag=_need_to_copy(x1),
915+
dtype=compute_dtype,
916+
order=res_order,
917+
)
918+
x2 = _copy_array(
919+
x2,
920+
copy_flag=_need_to_copy(x2),
921+
dtype=compute_dtype,
922+
order=res_order,
923+
)
924+
897925
result = _gemm_matmul(
898926
exec_q,
899927
x1,

0 commit comments

Comments
 (0)