@@ -4653,16 +4653,7 @@ static void llm_load_vocab(
4653
4653
4654
4654
// for now, only BPE models have pre-tokenizers
4655
4655
if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
4656
- if (tokenizer_pre.empty()) {
4657
- LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
4658
- LLAMA_LOG_WARN("%s: \n", __func__);
4659
- LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
4660
- LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED! \n", __func__);
4661
- LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
4662
- LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
4663
- LLAMA_LOG_WARN("%s: \n", __func__);
4664
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4665
- } else if (
4656
+ if (
4666
4657
tokenizer_pre == "default") {
4667
4658
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4668
4659
} else if (
@@ -4715,7 +4706,8 @@ static void llm_load_vocab(
4715
4706
tokenizer_pre == "smaug-bpe") {
4716
4707
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMAUG;
4717
4708
} else {
4718
- throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
4709
+ LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
4710
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4719
4711
}
4720
4712
} else {
4721
4713
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
@@ -5569,7 +5561,7 @@ static bool llm_load_tensors(
5569
5561
layer.attn_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5570
5562
layer.attn_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
5571
5563
5572
- layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
5564
+ layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
5573
5565
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
5574
5566
5575
5567
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd});
@@ -6631,7 +6623,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
6631
6623
}
6632
6624
} catch (const std::exception & err) {
6633
6625
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
6634
- return -1 ;
6626
+ throw ;
6635
6627
}
6636
6628
6637
6629
return 0;
@@ -16254,16 +16246,23 @@ struct llama_model * llama_load_model_from_file(
16254
16246
}
16255
16247
model->rpc_servers.push_back(servers);
16256
16248
}
16257
- int status = llama_model_load(path_model, *model, params);
16258
- GGML_ASSERT(status <= 0);
16259
- if (status < 0) {
16260
- if (status == -1) {
16261
- LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
16262
- } else if (status == -2) {
16263
- LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
16249
+
16250
+ try {
16251
+ int status = llama_model_load(path_model, *model, params);
16252
+ GGML_ASSERT(status <= 0);
16253
+ if (status < 0) {
16254
+ if (status == -1) {
16255
+ LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
16256
+ } else if (status == -2) {
16257
+ LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
16258
+ }
16259
+ delete model;
16260
+ return nullptr;
16264
16261
}
16262
+ } catch (...) {
16263
+ LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
16265
16264
delete model;
16266
- return nullptr ;
16265
+ throw ;
16267
16266
}
16268
16267
16269
16268
return model;
0 commit comments