File tree Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -5975,7 +5975,7 @@ static void ggml_cuda_op_mul_mat(
5975
5975
5976
5976
// if multiple devices are used they need to wait for the main device
5977
5977
// here an event is recorded that signals that the main device has finished calculating the input data
5978
- if (split) {
5978
+ if (split && g_device_count > 1 ) {
5979
5979
CUDA_CHECK (cudaSetDevice (g_main_device));
5980
5980
CUDA_CHECK (cudaEventRecord (src0_extra->events [g_main_device][0 ], g_cudaStreams[g_main_device][0 ]));
5981
5981
}
@@ -6114,10 +6114,13 @@ static void ggml_cuda_op_mul_mat(
6114
6114
}
6115
6115
6116
6116
// main device waits for all other devices to be finished
6117
- if (split) {
6117
+ if (split && g_device_count > 1 ) {
6118
+ int64_t is_max = (ne11 + MUL_MAT_SRC1_COL_STRIDE - 1 ) / MUL_MAT_SRC1_COL_STRIDE;
6119
+ is_max = is_max <= MAX_STREAMS ? is_max : MAX_STREAMS;
6120
+
6118
6121
CUDA_CHECK (cudaSetDevice (g_main_device));
6119
6122
for (int64_t id = 0 ; id < g_device_count; ++id) {
6120
- for (int64_t is = 0 ; is < MAX_STREAMS ; ++is) {
6123
+ for (int64_t is = 0 ; is < is_max ; ++is) {
6121
6124
CUDA_CHECK (cudaStreamWaitEvent (g_cudaStreams[g_main_device][0 ], src0_extra->events [id][is]));
6122
6125
}
6123
6126
}
You can’t perform that action at this time.
0 commit comments