update

ngxson · ngxson · commit 49822bab15a5 · 2025-01-16T12:44:21.000+01:00
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
@@ -64,6 +64,33 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
     }
 }
 
+// return a list of splits for a given path
+// for example, given "<name>-00002-of-00004.gguf", returns list of all 4 splits
+static std::vector<std::string> llama_get_list_splits(const std::string & path, const int idx, const int n_split) {
+    std::vector<std::string> paths;
+    std::string split_prefix;
+    std::vector<char> buf(llama_path_max(), 0);
+
+    {
+        int ret = llama_split_prefix(buf.data(), buf.size(), path.c_str(), idx, n_split);
+        if (!ret) {
+            throw std::runtime_error(format("invalid split file name: %s", path.c_str()));
+        }
+        split_prefix = std::string(buf.data(), ret);
+    }
+
+    if (split_prefix.empty()) {
+        throw std::runtime_error(format("invalid split file: %s", path.c_str()));
+    }
+
+    for (int idx = 0; idx < n_split; ++idx) {
+        int ret = llama_split_path(buf.data(), buf.size(), split_prefix.c_str(), idx, n_split);
+        paths.push_back(std::string(buf.data(), ret));
+    }
+
+    return paths;
+}
+
 namespace GGUFMeta {
     template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int64_t)>
     struct GKV_Base_Type {
@@ -466,27 +493,29 @@ llama_model_loader::llama_model_loader(
 
     // Load additional GGML contexts
     if (n_split > 1) {
+        // make sure the main file is loaded first
+        uint16_t idx = 0;
+        const std::string kv_split_no = llm_kv(LLM_KV_SPLIT_NO);
+        get_key(kv_split_no, idx);
+        if (idx != 0) {
+            throw std::runtime_error(format("illegal split file idx: %d (file: %s), model must be loaded with the first split", idx, fname.c_str()));
+        }
+
         // generate list of splits if needed
         if (splits.empty()) {
-            splits = llama_get_list_splits(fname, n_split);
+            splits = llama_get_list_splits(fname, idx, n_split);
         }
 
         // in case user give a custom list of splits, check if it matches the expected number
         if (n_split != (uint16_t)splits.size()) {
             throw std::runtime_error(format("invalid split count, given: %zu splits, but expected %d", splits.size(), n_split));
         }
 
-        uint16_t idx = 0;
-        const std::string kv_split_no = llm_kv(LLM_KV_SPLIT_NO);
-        get_key(kv_split_no, idx);
-        if (idx != 0) {
-            throw std::runtime_error(format("illegal split file idx: %d (file: %s), model must be loaded with the first split", idx, fname.c_str()));
-        }
-
         if (trace > 0) {
             LLAMA_LOG_INFO("%s: loading additional %d GGUFs\n", __func__, n_split);
         }
 
+        // load other splits
         for (idx = 1; idx < n_split; idx++) {
             const char * fname_split = splits[idx].c_str();
 
@@ -1093,28 +1122,3 @@ void llama_model_loader::print_info() const {
         LLAMA_LOG_INFO("%s: file size   = %.2f GiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
     }
 }
-
-std::vector<std::string> llama_get_list_splits(const std::string & path, const int n_split) {
-    std::vector<std::string> paths;
-    std::string split_prefix;
-    std::vector<char> buf(llama_path_max(), 0);
-
-    // brute force to find the split prefix
-    for (int idx = 0; idx < n_split; ++idx) {
-        int ret = llama_split_prefix(buf.data(), buf.size(), path.c_str(), idx, n_split);
-        if (ret) {
-            split_prefix = std::string(buf.data(), ret);
-        }
-    }
-
-    if (split_prefix.empty()) {
-        throw std::runtime_error(format("invalid split file: %s", path.c_str()));
-    }
-
-    for (int idx = 0; idx < n_split; ++idx) {
-        int ret = llama_split_path(buf.data(), buf.size(), split_prefix.c_str(), idx, n_split);
-        paths.push_back(std::string(buf.data(), ret));
-    }
-
-    return paths;
-}
diff --git a/src/llama-model-loader.h b/src/llama-model-loader.h
@@ -165,7 +165,3 @@ struct llama_model_loader {
 
     void print_info() const;
 };
-
-// return a list of splits for a given path
-// for example, given "<name>-00002-of-00004.gguf", returns list of all 4 splits
-std::vector<std::string> llama_get_list_splits(const std::string & path, const int n_split);
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -9496,6 +9496,7 @@ static struct llama_model * llama_model_load_from_file_impl(
     return model;
 }
 
+// deprecated
 struct llama_model * llama_load_model_from_file(
         const char * path_model,
         struct llama_model_params params) {

Original file line number	Diff line number	Diff line change
`@@ -9496,6 +9496,7 @@ static struct llama_model * llama_model_load_from_file_impl(`
`9496`	`9496`	`return model;`
`9497`	`9497`	`}`
`9498`	`9498`
	`9499`	`+// deprecated`
`9499`	`9500`	`struct llama_model * llama_load_model_from_file(`
`9500`	`9501`	`const char * path_model,`
`9501`	`9502`	`struct llama_model_params params) {`