Skip to content

Commit c2df36d

Browse files
llama : consistently catch and throw only exceptions deriving from std::exception (#1599)
Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 9d0693b commit c2df36d

File tree

1 file changed

+30
-29
lines changed

1 file changed

+30
-29
lines changed

llama.cpp

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -289,15 +289,15 @@ template <typename T>
289289
static T checked_mul(T a, T b) {
290290
T ret = a * b;
291291
if (a != 0 && ret / a != b) {
292-
throw format("overflow multiplying %llu * %llu",
293-
(unsigned long long) a, (unsigned long long) b);
292+
throw std::runtime_error(format("overflow multiplying %llu * %llu",
293+
(unsigned long long) a, (unsigned long long) b));
294294
}
295295
return ret;
296296
}
297297

298298
static size_t checked_div(size_t a, size_t b) {
299299
if (b == 0 || a % b != 0) {
300-
throw format("error dividing %zu / %zu", a, b);
300+
throw std::runtime_error(format("error dividing %zu / %zu", a, b));
301301
}
302302
return a / b;
303303
}
@@ -361,7 +361,7 @@ struct llama_load_tensor {
361361
const auto & first_shard = shards.at(0);
362362
for (const auto & shard : shards) {
363363
if (shard.type != first_shard.type) {
364-
throw format("inconsistent tensor shard type in '%s'", name.c_str());
364+
throw std::runtime_error(format("inconsistent tensor shard type in '%s'", name.c_str()));
365365
}
366366
}
367367
type = first_shard.type;
@@ -384,8 +384,8 @@ struct llama_load_tensor {
384384
const auto & first_shard = shards.at(0);
385385
for (const auto & shard : shards) {
386386
if (shard.ne != first_shard.ne) {
387-
throw format("inconsistent tensor shard shape in '%s': first was %s, other was %s",
388-
name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str());
387+
throw std::runtime_error(format("inconsistent tensor shard shape in '%s': first was %s, other was %s",
388+
name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str()));
389389
}
390390
}
391391
ne = first_shard.ne;
@@ -463,8 +463,8 @@ struct llama_file_loader {
463463
}
464464
}
465465

466-
throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
467-
magic, version);
466+
throw std::runtime_error(format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
467+
magic, version));
468468
}
469469
void read_hparams() {
470470
hparams.n_vocab = file.read_u32();
@@ -504,7 +504,7 @@ struct llama_file_loader {
504504
file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims);
505505
std::string name = file.read_string(name_len);
506506
if (n_dims < 1 || n_dims > 2) {
507-
throw format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims);
507+
throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));
508508
}
509509
switch (shard.type) {
510510
case GGML_TYPE_F32:
@@ -521,7 +521,7 @@ struct llama_file_loader {
521521
case GGML_TYPE_Q6_K:
522522
break;
523523
default: {
524-
throw format("unrecognized tensor type %u\n", shard.type);
524+
throw std::runtime_error(format("unrecognized tensor type %u\n", shard.type));
525525
}
526526
}
527527

@@ -630,7 +630,7 @@ struct llama_model_loader {
630630
auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
631631
file_loaders.emplace_back(ith_file);
632632
if (ith_file->hparams != first_file->hparams) {
633-
throw format("llama.cpp: hparams inconsistent between files");
633+
throw std::runtime_error(format("llama.cpp: hparams inconsistent between files"));
634634
}
635635
}
636636
if (!llama_mmap::SUPPORTED) {
@@ -660,7 +660,7 @@ struct llama_model_loader {
660660
uint32_t guess_n_parts() const {
661661
auto it = tensors_map.name_to_idx.find("tok_embeddings.weight");
662662
if (it == tensors_map.name_to_idx.end()) {
663-
throw std::string("missing tok_embeddings.weight");
663+
throw std::runtime_error(std::string("missing tok_embeddings.weight"));
664664
}
665665
const llama_load_tensor & lt = tensors_map.tensors.at(it->second);
666666
return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0);
@@ -677,12 +677,12 @@ struct llama_model_loader {
677677
struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
678678
auto it = tensors_map.name_to_idx.find(name);
679679
if (it == tensors_map.name_to_idx.end()) {
680-
throw format("llama.cpp: tensor '%s' is missing from model", name.c_str());
680+
throw std::runtime_error(std::runtime_error(format("llama.cpp: tensor '%s' is missing from model", name.c_str())));
681681
}
682682
llama_load_tensor & lt = tensors_map.tensors.at(it->second);
683683
if (lt.ne != ne) {
684-
throw format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
685-
name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str());
684+
throw std::runtime_error(format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
685+
name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str()));
686686
}
687687

688688
return get_tensor_for(lt, backend);
@@ -706,7 +706,7 @@ struct llama_model_loader {
706706

707707
void done_getting_tensors() const {
708708
if (num_ggml_tensors_created != tensors_map.tensors.size()) {
709-
throw std::string("llama.cpp: file contained more tensors than expected");
709+
throw std::runtime_error(std::string("llama.cpp: file contained more tensors than expected"));
710710
}
711711
}
712712

@@ -994,15 +994,15 @@ static void llama_model_load_internal(
994994
if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&
995995
hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&
996996
hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
997-
throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)");
997+
throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"));
998998
}
999999
}
10001000

10011001
if (file_version < LLAMA_FILE_VERSION_GGJT_V3) {
10021002
if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
10031003
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
10041004
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
1005-
throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)");
1005+
throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
10061006
}
10071007
}
10081008

@@ -1033,7 +1033,7 @@ static void llama_model_load_internal(
10331033

10341034
model.ctx = ggml_init(params);
10351035
if (!model.ctx) {
1036-
throw format("ggml_init() failed");
1036+
throw std::runtime_error(format("ggml_init() failed"));
10371037
}
10381038
}
10391039

@@ -1214,8 +1214,8 @@ static bool llama_model_load(
12141214
llama_model_load_internal(fname, lctx, n_ctx, n_gpu_layers, memory_type, use_mmap, use_mlock,
12151215
vocab_only, progress_callback, progress_callback_user_data);
12161216
return true;
1217-
} catch (const std::string & err) {
1218-
fprintf(stderr, "error loading model: %s\n", err.c_str());
1217+
} catch (const std::exception & err) {
1218+
fprintf(stderr, "error loading model: %s\n", err.what());
12191219
return false;
12201220
}
12211221
}
@@ -2120,17 +2120,18 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
21202120
case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break;
21212121
case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break;
21222122
case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break;
2123+
21232124
// K-quants
2124-
case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
2125+
case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
21252126
case LLAMA_FTYPE_MOSTLY_Q3_K_S:
21262127
case LLAMA_FTYPE_MOSTLY_Q3_K_M:
21272128
case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
21282129
case LLAMA_FTYPE_MOSTLY_Q4_K_S:
21292130
case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break;
21302131
case LLAMA_FTYPE_MOSTLY_Q5_K_S:
21312132
case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;
2132-
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
2133-
default: throw format("invalid output file type %d\n", ftype);
2133+
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
2134+
default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
21342135
}
21352136

21362137
if (nthread <= 0) {
@@ -2231,7 +2232,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
22312232
f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
22322233
}
22332234
} else {
2234-
throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type));
2235+
throw std::runtime_error(format("type %s unsupported for integer quantization", ggml_type_name(tensor.type)));
22352236
}
22362237

22372238
printf("quantizing .. ");
@@ -2433,8 +2434,8 @@ int llama_model_quantize(
24332434
try {
24342435
llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);
24352436
return 0;
2436-
} catch (const std::string & err) {
2437-
fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.c_str());
2437+
} catch (const std::exception & err) {
2438+
fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what());
24382439
return 1;
24392440
}
24402441
}
@@ -2687,8 +2688,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
26872688
int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, const char * path_base_model, int n_threads) {
26882689
try {
26892690
return llama_apply_lora_from_file_internal(ctx, path_lora, path_base_model, n_threads);
2690-
} catch (const std::string & err) {
2691-
fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.c_str());
2691+
} catch (const std::exception & err) {
2692+
fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.what());
26922693
return 1;
26932694
}
26942695
}

0 commit comments

Comments
 (0)