Skip to content

Commit 3d7ebf6

Browse files
authored
Vulkan Mixture of Experts (MoE) support (#7628)
* Finish Vulkan mul_mat_id implementation * Add Vulkan sum_rows and div ops * Fix MUL_MAT_ID matrix matrix shader * Fix MUL_MAT_ID matrix vector shader dispatch size * Fix MUL_MAT_ID matrix vector shader and dispatch code * Update Vulkan CPU offload for MUL_MAT_ID * Fix crash when using split mode none and setting a main GPU
1 parent a10cda5 commit 3d7ebf6

File tree

5 files changed

+99976
-40426
lines changed

5 files changed

+99976
-40426
lines changed

common/common.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,9 +1002,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
10021002
return true;
10031003
}
10041004
params.main_gpu = std::stoi(argv[i]);
1005-
#ifndef GGML_USE_CUDA_SYCL
1006-
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n");
1007-
#endif // GGML_USE_CUDA_SYCL
1005+
#ifndef GGML_USE_CUDA_SYCL_VULKAN
1006+
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n");
1007+
#endif // GGML_USE_CUDA_SYCL_VULKAN
10081008
return true;
10091009
}
10101010
if (arg == "--split-mode" || arg == "-sm") {
@@ -1030,9 +1030,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
10301030
invalid_param = true;
10311031
return true;
10321032
}
1033-
#ifndef GGML_USE_CUDA_SYCL
1034-
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the split mode has no effect.\n");
1035-
#endif // GGML_USE_CUDA_SYCL
1033+
#ifndef GGML_USE_CUDA_SYCL_VULKAN
1034+
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n");
1035+
#endif // GGML_USE_CUDA_SYCL_VULKAN
10361036
return true;
10371037
}
10381038
if (arg == "--tensor-split" || arg == "-ts") {

0 commit comments

Comments
 (0)