@@ -289,15 +289,15 @@ template <typename T>
289
289
static T checked_mul (T a, T b) {
290
290
T ret = a * b;
291
291
if (a != 0 && ret / a != b) {
292
- throw format (" overflow multiplying %llu * %llu" ,
293
- (unsigned long long ) a, (unsigned long long ) b);
292
+ throw std::runtime_error ( format (" overflow multiplying %llu * %llu" ,
293
+ (unsigned long long ) a, (unsigned long long ) b)) ;
294
294
}
295
295
return ret;
296
296
}
297
297
298
298
static size_t checked_div (size_t a, size_t b) {
299
299
if (b == 0 || a % b != 0 ) {
300
- throw format (" error dividing %zu / %zu" , a, b);
300
+ throw std::runtime_error ( format (" error dividing %zu / %zu" , a, b) );
301
301
}
302
302
return a / b;
303
303
}
@@ -361,7 +361,7 @@ struct llama_load_tensor {
361
361
const auto & first_shard = shards.at (0 );
362
362
for (const auto & shard : shards) {
363
363
if (shard.type != first_shard.type ) {
364
- throw format (" inconsistent tensor shard type in '%s'" , name.c_str ());
364
+ throw std::runtime_error ( format (" inconsistent tensor shard type in '%s'" , name.c_str () ));
365
365
}
366
366
}
367
367
type = first_shard.type ;
@@ -384,8 +384,8 @@ struct llama_load_tensor {
384
384
const auto & first_shard = shards.at (0 );
385
385
for (const auto & shard : shards) {
386
386
if (shard.ne != first_shard.ne ) {
387
- throw format (" inconsistent tensor shard shape in '%s': first was %s, other was %s" ,
388
- name.c_str (), llama_format_tensor_shape (first_shard.ne ).c_str (), llama_format_tensor_shape (shard.ne ).c_str ());
387
+ throw std::runtime_error ( format (" inconsistent tensor shard shape in '%s': first was %s, other was %s" ,
388
+ name.c_str (), llama_format_tensor_shape (first_shard.ne ).c_str (), llama_format_tensor_shape (shard.ne ).c_str ())) ;
389
389
}
390
390
}
391
391
ne = first_shard.ne ;
@@ -463,8 +463,8 @@ struct llama_file_loader {
463
463
}
464
464
}
465
465
466
- throw format (" unknown (magic, version) combination: %08x, %08x; is this really a GGML file?" ,
467
- magic, version);
466
+ throw std::runtime_error ( format (" unknown (magic, version) combination: %08x, %08x; is this really a GGML file?" ,
467
+ magic, version)) ;
468
468
}
469
469
void read_hparams () {
470
470
hparams.n_vocab = file.read_u32 ();
@@ -504,7 +504,7 @@ struct llama_file_loader {
504
504
file.read_raw (shard.ne .data (), sizeof (shard.ne [0 ]) * n_dims);
505
505
std::string name = file.read_string (name_len);
506
506
if (n_dims < 1 || n_dims > 2 ) {
507
- throw format (" llama.cpp: tensor '%s' should not be %u-dimensional" , name.c_str (), n_dims);
507
+ throw std::runtime_error ( format (" llama.cpp: tensor '%s' should not be %u-dimensional" , name.c_str (), n_dims) );
508
508
}
509
509
switch (shard.type ) {
510
510
case GGML_TYPE_F32:
@@ -521,7 +521,7 @@ struct llama_file_loader {
521
521
case GGML_TYPE_Q6_K:
522
522
break ;
523
523
default : {
524
- throw format (" unrecognized tensor type %u\n " , shard.type );
524
+ throw std::runtime_error ( format (" unrecognized tensor type %u\n " , shard.type ) );
525
525
}
526
526
}
527
527
@@ -630,7 +630,7 @@ struct llama_model_loader {
630
630
auto * ith_file = new llama_file_loader (fname.c_str (), i, tensors_map);
631
631
file_loaders.emplace_back (ith_file);
632
632
if (ith_file->hparams != first_file->hparams ) {
633
- throw format (" llama.cpp: hparams inconsistent between files" );
633
+ throw std::runtime_error ( format (" llama.cpp: hparams inconsistent between files" ) );
634
634
}
635
635
}
636
636
if (!llama_mmap::SUPPORTED) {
@@ -660,7 +660,7 @@ struct llama_model_loader {
660
660
uint32_t guess_n_parts () const {
661
661
auto it = tensors_map.name_to_idx .find (" tok_embeddings.weight" );
662
662
if (it == tensors_map.name_to_idx .end ()) {
663
- throw std::string (" missing tok_embeddings.weight" );
663
+ throw std::runtime_error ( std:: string (" missing tok_embeddings.weight" ) );
664
664
}
665
665
const llama_load_tensor & lt = tensors_map.tensors .at (it->second );
666
666
return file_loaders.at (0 )->hparams .n_embd / lt.shards .at (0 ).ne .at (0 );
@@ -677,12 +677,12 @@ struct llama_model_loader {
677
677
struct ggml_tensor * get_tensor (const std::string & name, const std::vector<uint32_t > & ne, ggml_backend backend) {
678
678
auto it = tensors_map.name_to_idx .find (name);
679
679
if (it == tensors_map.name_to_idx .end ()) {
680
- throw format (" llama.cpp: tensor '%s' is missing from model" , name.c_str ());
680
+ throw std::runtime_error ( std::runtime_error ( format (" llama.cpp: tensor '%s' is missing from model" , name.c_str ()) ));
681
681
}
682
682
llama_load_tensor & lt = tensors_map.tensors .at (it->second );
683
683
if (lt.ne != ne) {
684
- throw format (" llama.cpp: tensor '%s' has wrong shape; expected %s, got %s" ,
685
- name.c_str (), llama_format_tensor_shape (ne).c_str (), llama_format_tensor_shape (lt.ne ).c_str ());
684
+ throw std::runtime_error ( format (" llama.cpp: tensor '%s' has wrong shape; expected %s, got %s" ,
685
+ name.c_str (), llama_format_tensor_shape (ne).c_str (), llama_format_tensor_shape (lt.ne ).c_str ())) ;
686
686
}
687
687
688
688
return get_tensor_for (lt, backend);
@@ -706,7 +706,7 @@ struct llama_model_loader {
706
706
707
707
void done_getting_tensors () const {
708
708
if (num_ggml_tensors_created != tensors_map.tensors .size ()) {
709
- throw std::string (" llama.cpp: file contained more tensors than expected" );
709
+ throw std::runtime_error ( std:: string (" llama.cpp: file contained more tensors than expected" ) );
710
710
}
711
711
}
712
712
@@ -994,15 +994,15 @@ static void llama_model_load_internal(
994
994
if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&
995
995
hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&
996
996
hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
997
- throw format (" this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)" );
997
+ throw std::runtime_error ( format (" this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)" ) );
998
998
}
999
999
}
1000
1000
1001
1001
if (file_version < LLAMA_FILE_VERSION_GGJT_V3) {
1002
1002
if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
1003
1003
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
1004
1004
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
1005
- throw format (" this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)" );
1005
+ throw std::runtime_error ( format (" this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)" ) );
1006
1006
}
1007
1007
}
1008
1008
@@ -1033,7 +1033,7 @@ static void llama_model_load_internal(
1033
1033
1034
1034
model.ctx = ggml_init (params);
1035
1035
if (!model.ctx ) {
1036
- throw format (" ggml_init() failed" );
1036
+ throw std::runtime_error ( format (" ggml_init() failed" ) );
1037
1037
}
1038
1038
}
1039
1039
@@ -1214,8 +1214,8 @@ static bool llama_model_load(
1214
1214
llama_model_load_internal (fname, lctx, n_ctx, n_gpu_layers, memory_type, use_mmap, use_mlock,
1215
1215
vocab_only, progress_callback, progress_callback_user_data);
1216
1216
return true ;
1217
- } catch (const std::string & err) {
1218
- fprintf (stderr, " error loading model: %s\n " , err.c_str ());
1217
+ } catch (const std::exception & err) {
1218
+ fprintf (stderr, " error loading model: %s\n " , err.what ());
1219
1219
return false ;
1220
1220
}
1221
1221
}
@@ -2120,17 +2120,18 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
2120
2120
case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break ;
2121
2121
case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break ;
2122
2122
case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break ;
2123
+
2123
2124
// K-quants
2124
- case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break ;
2125
+ case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break ;
2125
2126
case LLAMA_FTYPE_MOSTLY_Q3_K_S:
2126
2127
case LLAMA_FTYPE_MOSTLY_Q3_K_M:
2127
2128
case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break ;
2128
2129
case LLAMA_FTYPE_MOSTLY_Q4_K_S:
2129
2130
case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break ;
2130
2131
case LLAMA_FTYPE_MOSTLY_Q5_K_S:
2131
2132
case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break ;
2132
- case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break ;
2133
- default : throw format (" invalid output file type %d\n " , ftype);
2133
+ case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break ;
2134
+ default : throw std::runtime_error ( format (" invalid output file type %d\n " , ftype) );
2134
2135
}
2135
2136
2136
2137
if (nthread <= 0 ) {
@@ -2231,7 +2232,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
2231
2232
f32_data[i] = ggml_fp16_to_fp32 (f16_data[i]);
2232
2233
}
2233
2234
} else {
2234
- throw format (" type %s unsupported for integer quantization" , ggml_type_name (tensor.type ));
2235
+ throw std::runtime_error ( format (" type %s unsupported for integer quantization" , ggml_type_name (tensor.type ) ));
2235
2236
}
2236
2237
2237
2238
printf (" quantizing .. " );
@@ -2433,8 +2434,8 @@ int llama_model_quantize(
2433
2434
try {
2434
2435
llama_model_quantize_internal (fname_inp, fname_out, ftype, nthread);
2435
2436
return 0 ;
2436
- } catch (const std::string & err) {
2437
- fprintf (stderr, " %s: failed to quantize: %s\n " , __func__, err.c_str ());
2437
+ } catch (const std::exception & err) {
2438
+ fprintf (stderr, " %s: failed to quantize: %s\n " , __func__, err.what ());
2438
2439
return 1 ;
2439
2440
}
2440
2441
}
@@ -2687,8 +2688,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
2687
2688
int llama_apply_lora_from_file (struct llama_context * ctx, const char * path_lora, const char * path_base_model, int n_threads) {
2688
2689
try {
2689
2690
return llama_apply_lora_from_file_internal (ctx, path_lora, path_base_model, n_threads);
2690
- } catch (const std::string & err) {
2691
- fprintf (stderr, " %s: failed to apply lora adapter: %s\n " , __func__, err.c_str ());
2691
+ } catch (const std::exception & err) {
2692
+ fprintf (stderr, " %s: failed to apply lora adapter: %s\n " , __func__, err.what ());
2692
2693
return 1 ;
2693
2694
}
2694
2695
}
0 commit comments