Skip to content

Commit 54da1a2

Browse files
try fewer event waiting
1 parent 54f041b commit 54da1a2

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

ggml-cuda.cu

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5975,7 +5975,7 @@ static void ggml_cuda_op_mul_mat(
59755975

59765976
// if multiple devices are used they need to wait for the main device
59775977
// here an event is recorded that signals that the main device has finished calculating the input data
5978-
if (split) {
5978+
if (split && g_device_count > 1) {
59795979
CUDA_CHECK(cudaSetDevice(g_main_device));
59805980
CUDA_CHECK(cudaEventRecord(src0_extra->events[g_main_device][0], g_cudaStreams[g_main_device][0]));
59815981
}
@@ -6114,10 +6114,13 @@ static void ggml_cuda_op_mul_mat(
61146114
}
61156115

61166116
// main device waits for all other devices to be finished
6117-
if (split) {
6117+
if (split && g_device_count > 1) {
6118+
int64_t is_max = (ne11 + MUL_MAT_SRC1_COL_STRIDE - 1) / MUL_MAT_SRC1_COL_STRIDE;
6119+
is_max = is_max <= MAX_STREAMS ? is_max : MAX_STREAMS;
6120+
61186121
CUDA_CHECK(cudaSetDevice(g_main_device));
61196122
for (int64_t id = 0; id < g_device_count; ++id) {
6120-
for (int64_t is = 0; is < MAX_STREAMS; ++is) {
6123+
for (int64_t is = 0; is < is_max; ++is) {
61216124
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[g_main_device][0], src0_extra->events[id][is]));
61226125
}
61236126
}

0 commit comments

Comments
 (0)