Skip to content

Commit c42b303

Browse files
try fewer event waiting
1 parent 54f041b commit c42b303

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

ggml-cuda.cu

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5975,7 +5975,7 @@ static void ggml_cuda_op_mul_mat(
59755975

59765976
// if multiple devices are used they need to wait for the main device
59775977
// here an event is recorded that signals that the main device has finished calculating the input data
5978-
if (split) {
5978+
if (split && g_device_count > 1) {
59795979
CUDA_CHECK(cudaSetDevice(g_main_device));
59805980
CUDA_CHECK(cudaEventRecord(src0_extra->events[g_main_device][0], g_cudaStreams[g_main_device][0]));
59815981
}
@@ -6114,10 +6114,12 @@ static void ggml_cuda_op_mul_mat(
61146114
}
61156115

61166116
// main device waits for all other devices to be finished
6117-
if (split) {
6117+
if (split && g_device_count > 1) {
6118+
const int64_t is_max = ne11/MUL_MAT_SRC1_COL_STRIDE <= MAX_STREAMS ? ne11/MUL_MAT_SRC1_COL_STRIDE : MAX_STREAMS;
6119+
61186120
CUDA_CHECK(cudaSetDevice(g_main_device));
61196121
for (int64_t id = 0; id < g_device_count; ++id) {
6120-
for (int64_t is = 0; is < MAX_STREAMS; ++is) {
6122+
for (int64_t is = 0; is < is_max; ++is) {
61216123
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[g_main_device][0], src0_extra->events[id][is]));
61226124
}
61236125
}

0 commit comments

Comments
 (0)