Skip to content

Commit a7e15b0

Browse files
committed
[SYCL] Move to Compile Time backend selection on oneMKL Interface for NVIDIA backend
Move to compile time selection to backend to avoid latency at run time. Add it to all mkl gemm calls and only for NVIDIA backend. Signed-off-by: nscipione <[email protected]>
1 parent 0f77aae commit a7e15b0

File tree

4 files changed

+32
-6
lines changed

4 files changed

+32
-6
lines changed

ggml/src/ggml-sycl/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ else()
6868
target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
6969
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
7070
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
71-
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
71+
add_compile_definitions(GGML_SYCL_NVIDIA)
72+
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl_blas_cublas)
7273
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
7374
if (NOT GGML_SYCL_DEVICE_ARCH)
7475
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")

ggml/src/ggml-sycl/dpct/helper.hpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,7 +1690,12 @@ namespace dpct
16901690
auto data_b = get_memory<const Tb>(b);
16911691
auto data_c = get_memory<Tc>(c);
16921692
oneapi::mkl::blas::column_major::gemm(
1693-
q, a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
1693+
#ifdef GGML_SYCL_NVIDIA
1694+
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{q},
1695+
#else
1696+
q,
1697+
#endif
1698+
a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
16941699
data_b, ldb, beta_value, data_c, ldc);
16951700
}
16961701

@@ -1755,7 +1760,12 @@ namespace dpct
17551760
matrix_info->groupsize_info = batch_size;
17561761

17571762
sycl::event e = oneapi::mkl::blas::column_major::gemm_batch(
1758-
q, matrix_info->transpose_info, matrix_info->transpose_info + 1,
1763+
#ifdef GGML_SYCL_NVIDIA
1764+
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{q},
1765+
#else
1766+
q,
1767+
#endif
1768+
matrix_info->transpose_info, matrix_info->transpose_info + 1,
17591769
matrix_info->size_info, matrix_info->size_info + 1,
17601770
matrix_info->size_info + 2, matrix_info->value_info,
17611771
reinterpret_cast<const Ta **>(a), matrix_info->ld_info,
@@ -1784,7 +1794,12 @@ namespace dpct
17841794
auto data_b = get_memory<const Tb>(b);
17851795
auto data_c = get_memory<Tc>(c);
17861796
oneapi::mkl::blas::column_major::gemm_batch(
1787-
q, a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
1797+
#ifdef GGML_SYCL_NVIDIA
1798+
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{q},
1799+
#else
1800+
q,
1801+
#endif
1802+
a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
17881803
stride_a, data_b, ldb, stride_b, beta_value,
17891804
data_c, ldc, stride_c, batch_size);
17901805
}

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2562,7 +2562,12 @@ inline void ggml_sycl_op_mul_mat_sycl(
25622562
const float beta = 0.0f;
25632563
#if !GGML_SYCL_DNNL
25642564
SYCL_CHECK(CHECK_TRY_ERROR(oneapi::mkl::blas::column_major::gemm(
2565-
*stream, oneapi::mkl::transpose::trans,
2565+
#ifdef GGML_SYCL_NVIDIA
2566+
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{*stream},
2567+
#else
2568+
*stream,
2569+
#endif
2570+
oneapi::mkl::transpose::trans,
25662571
oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
25672572
dpct::get_value(&alpha, *stream), src0_ddf_i, ne00,
25682573
src1_ddf1_i, ne10, dpct::get_value(&beta, *stream),

ggml/src/ggml-sycl/outprod.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,12 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* sr
4040

4141
try {
4242
// Perform matrix multiplication using oneMKL GEMM
43-
oneapi::mkl::blas::column_major::gemm(*stream,
43+
oneapi::mkl::blas::column_major::gemm(
44+
#ifdef GGML_SYCL_NVIDIA
45+
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{*stream},
46+
#else
47+
*stream,
48+
#endif
4449
oneapi::mkl::transpose::nontrans, src1_op,
4550
ne0, ne1, ne01,
4651
alpha,

0 commit comments

Comments
 (0)