File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -5975,7 +5975,7 @@ static void ggml_cuda_op_mul_mat(
5975
5975
5976
5976
// if multiple devices are used they need to wait for the main device
5977
5977
// here an event is recorded that signals that the main device has finished calculating the input data
5978
- if (split) {
5978
+ if (split && g_device_count > 1 ) {
5979
5979
CUDA_CHECK (cudaSetDevice (g_main_device));
5980
5980
CUDA_CHECK (cudaEventRecord (src0_extra->events [g_main_device][0 ], g_cudaStreams[g_main_device][0 ]));
5981
5981
}
@@ -6114,10 +6114,12 @@ static void ggml_cuda_op_mul_mat(
6114
6114
}
6115
6115
6116
6116
// main device waits for all other devices to be finished
6117
- if (split) {
6117
+ if (split && g_device_count > 1 ) {
6118
+ const int64_t is_max = ne11/MUL_MAT_SRC1_COL_STRIDE <= MAX_STREAMS ? ne11/MUL_MAT_SRC1_COL_STRIDE : MAX_STREAMS;
6119
+
6118
6120
CUDA_CHECK (cudaSetDevice (g_main_device));
6119
6121
for (int64_t id = 0 ; id < g_device_count; ++id) {
6120
- for (int64_t is = 0 ; is < MAX_STREAMS ; ++is) {
6122
+ for (int64_t is = 0 ; is < is_max ; ++is) {
6121
6123
CUDA_CHECK (cudaStreamWaitEvent (g_cudaStreams[g_main_device][0 ], src0_extra->events [id][is]));
6122
6124
}
6123
6125
}
You can’t perform that action at this time.
0 commit comments