@@ -371,6 +371,8 @@ enum llm_kv {
371
371
LLM_KV_TOKENIZER_SUFFIX_ID,
372
372
LLM_KV_TOKENIZER_MIDDLE_ID,
373
373
LLM_KV_TOKENIZER_EOT_ID,
374
+
375
+ LLM_KV_TRAINING_TYPE,
374
376
};
375
377
376
378
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
@@ -464,6 +466,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
464
466
{ LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
465
467
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
466
468
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
469
+
470
+ { LLM_KV_TRAINING_TYPE, "training.type" },
467
471
};
468
472
469
473
struct LLM_KV {
@@ -18519,8 +18523,6 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
18519
18523
static const int n_out_tensors = 5; // see llama_model
18520
18524
LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
18521
18525
18522
- // TODO: check lora base model arch
18523
-
18524
18526
ggml_context * ctx = nullptr;
18525
18527
struct gguf_init_params meta_gguf_params = {
18526
18528
/* .no_alloc = */ false,
@@ -18532,6 +18534,25 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
18532
18534
throw std::exception();
18533
18535
}
18534
18536
18537
+ // check metadata
18538
+ {
18539
+ auto get_kv_str = [&](std::string key) -> std::string {
18540
+ std::vector<char> str_buf(32, 0); // we only get the arch, so no need big buffer here
18541
+ int id = gguf_find_key(ctx_gguf, key.c_str());
18542
+ return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id));
18543
+ };
18544
+ LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
18545
+ auto lora_arch_name = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
18546
+ auto lora_arch = llm_arch_from_string(lora_arch_name);
18547
+ if (lora_arch != model->arch) {
18548
+ throw std::runtime_error("model arch and LoRA arch mismatch");
18549
+ }
18550
+ auto train_type = get_kv_str(llm_kv(LLM_KV_TRAINING_TYPE));
18551
+ if (train_type != "finetune_lora") {
18552
+ throw std::runtime_error("expect training.type to be finetune_lora, but got: " + train_type);
18553
+ }
18554
+ }
18555
+
18535
18556
// calculate n_tensors_per_layer
18536
18557
int n_tensors_per_layer = 0;
18537
18558
{
@@ -18542,7 +18563,6 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
18542
18563
if (il == 0) n_tensors_per_layer++;
18543
18564
}
18544
18565
}
18545
- // printf("n_tensors_per_layer %d\n", n_tensors_per_layer);
18546
18566
18547
18567
// count layer buffer types
18548
18568
std::map<ggml_backend_buffer_type_t, int> buft_tensor_count;
0 commit comments