Skip to content

Commit 683396c

Browse files
committed
arg : clean up handling --mmproj with -hf
1 parent ecda2ec commit 683396c

File tree

2 files changed

+37
-15
lines changed

2 files changed

+37
-15
lines changed

common/arg.cpp

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -641,11 +641,16 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
641641
// utils
642642
//
643643

644-
static void common_params_handle_model(
644+
struct handle_model_result {
645+
bool found_mmproj = false;
646+
common_params_model mmproj;
647+
};
648+
649+
static handle_model_result common_params_handle_model(
645650
struct common_params_model & model,
646651
const std::string & bearer_token,
647-
const std::string & model_path_default,
648-
bool is_mmproj = false) { // TODO: move is_mmproj to an enum when we have more files?
652+
const std::string & model_path_default) {
653+
handle_model_result result;
649654
// handle pre-fill default model path and url based on hf_repo and hf_file
650655
{
651656
if (!model.hf_repo.empty()) {
@@ -657,7 +662,12 @@ static void common_params_handle_model(
657662
exit(1); // built without CURL, error message already printed
658663
}
659664
model.hf_repo = auto_detected.repo;
660-
model.hf_file = is_mmproj ? auto_detected.mmprojFile : auto_detected.ggufFile;
665+
model.hf_file = auto_detected.ggufFile;
666+
if (!auto_detected.mmprojFile.empty()) {
667+
result.found_mmproj = true;
668+
result.mmproj.hf_repo = model.hf_repo;
669+
result.mmproj.hf_file = auto_detected.mmprojFile;
670+
}
661671
} else {
662672
model.hf_file = model.path;
663673
}
@@ -694,6 +704,8 @@ static void common_params_handle_model(
694704
exit(1);
695705
}
696706
}
707+
708+
return result;
697709
}
698710

699711
const std::vector<ggml_type> kv_cache_types = {
@@ -827,16 +839,17 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
827839
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
828840
}
829841

830-
common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
831-
common_params_handle_model(params.speculative.model, params.hf_token, "");
832-
common_params_handle_model(params.vocoder.model, params.hf_token, "");
833-
834-
// allow --mmproj to be set from -hf
835-
// assuming that mmproj is always in the same repo as text model
836-
if (!params.model.hf_repo.empty() && ctx_arg.ex == LLAMA_EXAMPLE_LLAVA) {
837-
params.mmproj.hf_repo = params.model.hf_repo;
842+
// handle model and download
843+
{
844+
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
845+
// optionally, handle mmproj model when -hf is specified
846+
if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
847+
params.mmproj = res.mmproj;
848+
}
849+
common_params_handle_model(params.mmproj, params.hf_token, "");
850+
common_params_handle_model(params.speculative.model, params.hf_token, "");
851+
common_params_handle_model(params.vocoder.model, params.hf_token, "");
838852
}
839-
common_params_handle_model(params.mmproj, params.hf_token, "", true);
840853

841854
if (params.escape) {
842855
string_process_escapes(params.prompt);
@@ -2095,18 +2108,25 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20952108
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
20962109
add_opt(common_arg(
20972110
{"--mmproj"}, "FILE",
2098-
"path to a multimodal projector file for LLaVA. see examples/llava/README.md",
2111+
"path to a multimodal projector file. see examples/llava/README.md",
20992112
[](common_params & params, const std::string & value) {
21002113
params.mmproj.path = value;
21012114
}
21022115
).set_examples({LLAMA_EXAMPLE_LLAVA}));
21032116
add_opt(common_arg(
21042117
{"--mmproj-url"}, "URL",
2105-
"URL to a multimodal projector file for LLaVA. see examples/llava/README.md",
2118+
"URL to a multimodal projector file. see examples/llava/README.md",
21062119
[](common_params & params, const std::string & value) {
21072120
params.mmproj.url = value;
21082121
}
21092122
).set_examples({LLAMA_EXAMPLE_LLAVA}));
2123+
add_opt(common_arg(
2124+
{"--no-mmproj"},
2125+
"explicitly disable multimodal projector, useful when using -hf",
2126+
[](common_params & params) {
2127+
params.no_mmproj = true;
2128+
}
2129+
).set_examples({LLAMA_EXAMPLE_LLAVA}));
21102130
add_opt(common_arg(
21112131
{"--image"}, "FILE",
21122132
"path to an image file. use with multimodal models. Specify multiple times for batching",
@@ -2381,6 +2401,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23812401
add_opt(common_arg(
23822402
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
23832403
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
2404+
"mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n"
23842405
"example: unsloth/phi-4-GGUF:q4_k_m\n"
23852406
"(default: unused)",
23862407
[](common_params & params, const std::string & value) {

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ struct common_params {
342342

343343
// multimodal models (see examples/llava)
344344
struct common_params_model mmproj;
345+
bool no_mmproj = false; // explicitly disable multimodal model
345346
std::vector<std::string> image; // path to image file(s)
346347

347348
// embedding

0 commit comments

Comments
 (0)