llama : consistently catch and throw only exceptions deriving from std::exception (#1599)

mgroeber9110 · ggerganov · web-flow · commit c2df36d60dc0 · 2023-06-05T23:24:29.000+03:00
Co-authored-by: Georgi Gerganov &lt;ggerganov@gmail.com&gt;
diff --git a/llama.cpp b/llama.cpp
@@ -289,15 +289,15 @@ template <typename T>
 static T checked_mul(T a, T b) {
     T ret = a * b;
     if (a != 0 && ret / a != b) {
-        throw format("overflow multiplying %llu * %llu",
-                     (unsigned long long) a, (unsigned long long) b);
+        throw std::runtime_error(format("overflow multiplying %llu * %llu",
+                     (unsigned long long) a, (unsigned long long) b));
     }
     return ret;
 }
 
 static size_t checked_div(size_t a, size_t b) {
     if (b == 0 || a % b != 0) {
-        throw format("error dividing %zu / %zu", a, b);
+        throw std::runtime_error(format("error dividing %zu / %zu", a, b));
     }
     return a / b;
 }
@@ -361,7 +361,7 @@ struct llama_load_tensor {
         const auto & first_shard = shards.at(0);
         for (const auto & shard : shards) {
             if (shard.type != first_shard.type) {
-                throw format("inconsistent tensor shard type in '%s'", name.c_str());
+                throw std::runtime_error(format("inconsistent tensor shard type in '%s'", name.c_str()));
             }
         }
         type = first_shard.type;
@@ -384,8 +384,8 @@ struct llama_load_tensor {
         const auto & first_shard = shards.at(0);
         for (const auto & shard : shards) {
             if (shard.ne != first_shard.ne) {
-                throw format("inconsistent tensor shard shape in '%s': first was %s, other was %s",
-                             name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str());
+                throw std::runtime_error(format("inconsistent tensor shard shape in '%s': first was %s, other was %s",
+                             name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str()));
             }
         }
         ne = first_shard.ne;
@@ -463,8 +463,8 @@ struct llama_file_loader {
                 }
         }
 
-        throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
-                     magic, version);
+        throw std::runtime_error(format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
+                     magic, version));
     }
     void read_hparams() {
         hparams.n_vocab = file.read_u32();
@@ -504,7 +504,7 @@ struct llama_file_loader {
             file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims);
             std::string name = file.read_string(name_len);
             if (n_dims < 1 || n_dims > 2) {
-                throw format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims);
+                throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));
             }
             switch (shard.type) {
                 case GGML_TYPE_F32:
@@ -521,7 +521,7 @@ struct llama_file_loader {
                 case GGML_TYPE_Q6_K:
                     break;
                 default: {
-                    throw format("unrecognized tensor type %u\n", shard.type);
+                    throw std::runtime_error(format("unrecognized tensor type %u\n", shard.type));
                 }
             }
 
@@ -630,7 +630,7 @@ struct llama_model_loader {
             auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
             file_loaders.emplace_back(ith_file);
             if (ith_file->hparams != first_file->hparams) {
-                throw format("llama.cpp: hparams inconsistent between files");
+                throw std::runtime_error(format("llama.cpp: hparams inconsistent between files"));
             }
         }
         if (!llama_mmap::SUPPORTED) {
@@ -660,7 +660,7 @@ struct llama_model_loader {
     uint32_t guess_n_parts() const {
         auto it = tensors_map.name_to_idx.find("tok_embeddings.weight");
         if (it == tensors_map.name_to_idx.end()) {
-            throw std::string("missing tok_embeddings.weight");
+            throw std::runtime_error(std::string("missing tok_embeddings.weight"));
         }
         const llama_load_tensor & lt = tensors_map.tensors.at(it->second);
         return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0);
@@ -677,12 +677,12 @@ struct llama_model_loader {
     struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
         auto it = tensors_map.name_to_idx.find(name);
         if (it == tensors_map.name_to_idx.end()) {
-            throw format("llama.cpp: tensor '%s' is missing from model", name.c_str());
+            throw std::runtime_error(std::runtime_error(format("llama.cpp: tensor '%s' is missing from model", name.c_str())));
         }
         llama_load_tensor & lt = tensors_map.tensors.at(it->second);
         if (lt.ne != ne) {
-            throw format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
-                         name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str());
+            throw std::runtime_error(format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",
+                         name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str()));
         }
 
         return get_tensor_for(lt, backend);
@@ -706,7 +706,7 @@ struct llama_model_loader {
 
     void done_getting_tensors() const {
         if (num_ggml_tensors_created != tensors_map.tensors.size()) {
-            throw std::string("llama.cpp: file contained more tensors than expected");
+            throw std::runtime_error(std::string("llama.cpp: file contained more tensors than expected"));
         }
     }
 
@@ -994,15 +994,15 @@ static void llama_model_load_internal(
         if (hparams.ftype != LLAMA_FTYPE_ALL_F32     &&
             hparams.ftype != LLAMA_FTYPE_MOSTLY_F16  &&
             hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
-            throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)");
+            throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"));
         }
     }
 
     if (file_version < LLAMA_FILE_VERSION_GGJT_V3) {
         if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
             hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
             hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
-            throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)");
+            throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
         }
     }
 
@@ -1033,7 +1033,7 @@ static void llama_model_load_internal(
 
         model.ctx = ggml_init(params);
         if (!model.ctx) {
-            throw format("ggml_init() failed");
+            throw std::runtime_error(format("ggml_init() failed"));
         }
     }
 
@@ -1214,8 +1214,8 @@ static bool llama_model_load(
         llama_model_load_internal(fname, lctx, n_ctx, n_gpu_layers, memory_type, use_mmap, use_mlock,
                                   vocab_only, progress_callback, progress_callback_user_data);
         return true;
-    } catch (const std::string & err) {
-        fprintf(stderr, "error loading model: %s\n", err.c_str());
+    } catch (const std::exception & err) {
+        fprintf(stderr, "error loading model: %s\n", err.what());
         return false;
     }
 }
@@ -2120,17 +2120,18 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break;
         case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break;
         case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break;
+
         // K-quants
-        case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;
+        case LLAMA_FTYPE_MOSTLY_Q2_K:   quantized_type = GGML_TYPE_Q2_K; break;
         case LLAMA_FTYPE_MOSTLY_Q3_K_S:
         case LLAMA_FTYPE_MOSTLY_Q3_K_M:
         case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;
         case LLAMA_FTYPE_MOSTLY_Q4_K_S:
         case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break;
         case LLAMA_FTYPE_MOSTLY_Q5_K_S:
         case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;
-        case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
-        default: throw format("invalid output file type %d\n", ftype);
+        case LLAMA_FTYPE_MOSTLY_Q6_K:   quantized_type = GGML_TYPE_Q6_K; break;
+        default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
     }
 
     if (nthread <= 0) {
@@ -2231,7 +2232,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                     f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
                 }
             } else {
-                throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type));
+                throw std::runtime_error(format("type %s unsupported for integer quantization", ggml_type_name(tensor.type)));
             }
 
             printf("quantizing .. ");
@@ -2433,8 +2434,8 @@ int llama_model_quantize(
     try {
         llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);
         return 0;
-    } catch (const std::string & err) {
-        fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.c_str());
+    } catch (const std::exception & err) {
+        fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what());
         return 1;
     }
 }
@@ -2687,8 +2688,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
 int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, const char * path_base_model, int n_threads) {
     try {
         return llama_apply_lora_from_file_internal(ctx, path_lora, path_base_model, n_threads);
-    } catch (const std::string & err) {
-        fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.c_str());
+    } catch (const std::exception & err) {
+        fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.what());
         return 1;
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -289,15 +289,15 @@ template <typename T>`
`289`	`289`	`static T checked_mul(T a, T b) {`
`290`	`290`	`T ret = a * b;`
`291`	`291`	`if (a != 0 && ret / a != b) {`
`292`		`- throw format("overflow multiplying %llu * %llu",`
`293`		`- (unsigned long long) a, (unsigned long long) b);`
	`292`	`+ throw std::runtime_error(format("overflow multiplying %llu * %llu",`
	`293`	`+ (unsigned long long) a, (unsigned long long) b));`
`294`	`294`	`}`
`295`	`295`	`return ret;`
`296`	`296`	`}`
`297`	`297`
`298`	`298`	`static size_t checked_div(size_t a, size_t b) {`
`299`	`299`	`if (b == 0 \|\| a % b != 0) {`
`300`		`- throw format("error dividing %zu / %zu", a, b);`
	`300`	`+ throw std::runtime_error(format("error dividing %zu / %zu", a, b));`
`301`	`301`	`}`
`302`	`302`	`return a / b;`
`303`	`303`	`}`
`@@ -361,7 +361,7 @@ struct llama_load_tensor {`
`361`	`361`	`const auto & first_shard = shards.at(0);`
`362`	`362`	`for (const auto & shard : shards) {`
`363`	`363`	`if (shard.type != first_shard.type) {`
`364`		`- throw format("inconsistent tensor shard type in '%s'", name.c_str());`
	`364`	`+ throw std::runtime_error(format("inconsistent tensor shard type in '%s'", name.c_str()));`
`365`	`365`	`}`
`366`	`366`	`}`
`367`	`367`	`type = first_shard.type;`
`@@ -384,8 +384,8 @@ struct llama_load_tensor {`
`384`	`384`	`const auto & first_shard = shards.at(0);`
`385`	`385`	`for (const auto & shard : shards) {`
`386`	`386`	`if (shard.ne != first_shard.ne) {`
`387`		`- throw format("inconsistent tensor shard shape in '%s': first was %s, other was %s",`
`388`		`- name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str());`
	`387`	`+ throw std::runtime_error(format("inconsistent tensor shard shape in '%s': first was %s, other was %s",`
	`388`	`+ name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), llama_format_tensor_shape(shard.ne).c_str()));`
`389`	`389`	`}`
`390`	`390`	`}`
`391`	`391`	`ne = first_shard.ne;`
`@@ -463,8 +463,8 @@ struct llama_file_loader {`
`463`	`463`	`}`
`464`	`464`	`}`
`465`	`465`
`466`		`- throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",`
`467`		`- magic, version);`
	`466`	`+ throw std::runtime_error(format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",`
	`467`	`+ magic, version));`
`468`	`468`	`}`
`469`	`469`	`void read_hparams() {`
`470`	`470`	`hparams.n_vocab = file.read_u32();`
`@@ -504,7 +504,7 @@ struct llama_file_loader {`
`504`	`504`	`file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims);`
`505`	`505`	`std::string name = file.read_string(name_len);`
`506`	`506`	`if (n_dims < 1 \|\| n_dims > 2) {`
`507`		`- throw format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims);`
	`507`	`+ throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));`
`508`	`508`	`}`
`509`	`509`	`switch (shard.type) {`
`510`	`510`	`case GGML_TYPE_F32:`
`@@ -521,7 +521,7 @@ struct llama_file_loader {`
`521`	`521`	`case GGML_TYPE_Q6_K:`
`522`	`522`	`break;`
`523`	`523`	`default: {`
`524`		`- throw format("unrecognized tensor type %u\n", shard.type);`
	`524`	`+ throw std::runtime_error(format("unrecognized tensor type %u\n", shard.type));`
`525`	`525`	`}`
`526`	`526`	`}`
`527`	`527`
`@@ -630,7 +630,7 @@ struct llama_model_loader {`
`630`	`630`	`auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);`
`631`	`631`	`file_loaders.emplace_back(ith_file);`
`632`	`632`	`if (ith_file->hparams != first_file->hparams) {`
`633`		`- throw format("llama.cpp: hparams inconsistent between files");`
	`633`	`+ throw std::runtime_error(format("llama.cpp: hparams inconsistent between files"));`
`634`	`634`	`}`
`635`	`635`	`}`
`636`	`636`	`if (!llama_mmap::SUPPORTED) {`
`@@ -660,7 +660,7 @@ struct llama_model_loader {`
`660`	`660`	`uint32_t guess_n_parts() const {`
`661`	`661`	`auto it = tensors_map.name_to_idx.find("tok_embeddings.weight");`
`662`	`662`	`if (it == tensors_map.name_to_idx.end()) {`
`663`		`- throw std::string("missing tok_embeddings.weight");`
	`663`	`+ throw std::runtime_error(std::string("missing tok_embeddings.weight"));`
`664`	`664`	`}`
`665`	`665`	`const llama_load_tensor & lt = tensors_map.tensors.at(it->second);`
`666`	`666`	`return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0);`
`@@ -677,12 +677,12 @@ struct llama_model_loader {`
`677`	`677`	`struct ggml_tensor * get_tensor(const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {`
`678`	`678`	`auto it = tensors_map.name_to_idx.find(name);`
`679`	`679`	`if (it == tensors_map.name_to_idx.end()) {`
`680`		`- throw format("llama.cpp: tensor '%s' is missing from model", name.c_str());`
	`680`	`+ throw std::runtime_error(std::runtime_error(format("llama.cpp: tensor '%s' is missing from model", name.c_str())));`
`681`	`681`	`}`
`682`	`682`	`llama_load_tensor & lt = tensors_map.tensors.at(it->second);`
`683`	`683`	`if (lt.ne != ne) {`
`684`		`- throw format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",`
`685`		`- name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str());`
	`684`	`+ throw std::runtime_error(format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s",`
	`685`	`+ name.c_str(), llama_format_tensor_shape(ne).c_str(), llama_format_tensor_shape(lt.ne).c_str()));`
`686`	`686`	`}`
`687`	`687`
`688`	`688`	`return get_tensor_for(lt, backend);`
`@@ -706,7 +706,7 @@ struct llama_model_loader {`
`706`	`706`
`707`	`707`	`void done_getting_tensors() const {`
`708`	`708`	`if (num_ggml_tensors_created != tensors_map.tensors.size()) {`
`709`		`- throw std::string("llama.cpp: file contained more tensors than expected");`
	`709`	`+ throw std::runtime_error(std::string("llama.cpp: file contained more tensors than expected"));`
`710`	`710`	`}`
`711`	`711`	`}`
`712`	`712`
`@@ -994,15 +994,15 @@ static void llama_model_load_internal(`
`994`	`994`	`if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&`
`995`	`995`	`hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&`
`996`	`996`	`hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {`
`997`		`- throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)");`
	`997`	`+ throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"));`
`998`	`998`	`}`
`999`	`999`	`}`
`1000`	`1000`
`1001`	`1001`	`if (file_version < LLAMA_FILE_VERSION_GGJT_V3) {`
`1002`	`1002`	`if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 \|\|`
`1003`	`1003`	`hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 \|\|`
`1004`	`1004`	`hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {`
`1005`		`- throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)");`
	`1005`	`+ throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));`
`1006`	`1006`	`}`
`1007`	`1007`	`}`
`1008`	`1008`
`@@ -1033,7 +1033,7 @@ static void llama_model_load_internal(`
`1033`	`1033`
`1034`	`1034`	`model.ctx = ggml_init(params);`
`1035`	`1035`	`if (!model.ctx) {`
`1036`		`- throw format("ggml_init() failed");`
	`1036`	`+ throw std::runtime_error(format("ggml_init() failed"));`
`1037`	`1037`	`}`
`1038`	`1038`	`}`
`1039`	`1039`
`@@ -1214,8 +1214,8 @@ static bool llama_model_load(`
`1214`	`1214`	`llama_model_load_internal(fname, lctx, n_ctx, n_gpu_layers, memory_type, use_mmap, use_mlock,`
`1215`	`1215`	`vocab_only, progress_callback, progress_callback_user_data);`
`1216`	`1216`	`return true;`
`1217`		`- } catch (const std::string & err) {`
`1218`		`- fprintf(stderr, "error loading model: %s\n", err.c_str());`
	`1217`	`+ } catch (const std::exception & err) {`
	`1218`	`+ fprintf(stderr, "error loading model: %s\n", err.what());`
`1219`	`1219`	`return false;`
`1220`	`1220`	`}`
`1221`	`1221`	`}`
`@@ -2120,17 +2120,18 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s`
`2120`	`2120`	`case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break;`
`2121`	`2121`	`case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break;`
`2122`	`2122`	`case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break;`
	`2123`	`+`
`2123`	`2124`	`// K-quants`
`2124`		`- case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;`
	`2125`	`+ case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break;`
`2125`	`2126`	`case LLAMA_FTYPE_MOSTLY_Q3_K_S:`
`2126`	`2127`	`case LLAMA_FTYPE_MOSTLY_Q3_K_M:`
`2127`	`2128`	`case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break;`
`2128`	`2129`	`case LLAMA_FTYPE_MOSTLY_Q4_K_S:`
`2129`	`2130`	`case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break;`
`2130`	`2131`	`case LLAMA_FTYPE_MOSTLY_Q5_K_S:`
`2131`	`2132`	`case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;`
`2132`		`- case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;`
`2133`		`- default: throw format("invalid output file type %d\n", ftype);`
	`2133`	`+ case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;`
	`2134`	`+ default: throw std::runtime_error(format("invalid output file type %d\n", ftype));`
`2134`	`2135`	`}`
`2135`	`2136`
`2136`	`2137`	`if (nthread <= 0) {`
`@@ -2231,7 +2232,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s`
`2231`	`2232`	`f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);`
`2232`	`2233`	`}`
`2233`	`2234`	`} else {`
`2234`		`- throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type));`
	`2235`	`+ throw std::runtime_error(format("type %s unsupported for integer quantization", ggml_type_name(tensor.type)));`
`2235`	`2236`	`}`
`2236`	`2237`
`2237`	`2238`	`printf("quantizing .. ");`
`@@ -2433,8 +2434,8 @@ int llama_model_quantize(`
`2433`	`2434`	`try {`
`2434`	`2435`	`llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);`
`2435`	`2436`	`return 0;`
`2436`		`- } catch (const std::string & err) {`
`2437`		`- fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.c_str());`
	`2437`	`+ } catch (const std::exception & err) {`
	`2438`	`+ fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what());`
`2438`	`2439`	`return 1;`
`2439`	`2440`	`}`
`2440`	`2441`	`}`
`@@ -2687,8 +2688,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *`
`2687`	`2688`	`int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, const char * path_base_model, int n_threads) {`
`2688`	`2689`	`try {`
`2689`	`2690`	`return llama_apply_lora_from_file_internal(ctx, path_lora, path_base_model, n_threads);`
`2690`		`- } catch (const std::string & err) {`
`2691`		`- fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.c_str());`
	`2691`	`+ } catch (const std::exception & err) {`
	`2692`	`+ fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, err.what());`
`2692`	`2693`	`return 1;`
`2693`	`2694`	`}`
`2694`	`2695`	`}`