Fix vocab-only; silence copmiler warning about multichar literals

comex · comex · commit 64a5190b2cd9 · 2023-04-05T23:05:12.000-07:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -139,6 +139,7 @@ if (LLAMA_ALL_WARNINGS)
             -Wpedantic
             -Wcast-qual
             -Wno-unused-function
+            -Wno-multichar
         )
     else()
         # todo : msvc
diff --git a/Makefile b/Makefile
@@ -37,7 +37,7 @@ LDFLAGS  =
 
 # warnings
 CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
-CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
 
 # OS specific
 # TODO: support Windows
diff --git a/llama.cpp b/llama.cpp
@@ -558,10 +558,10 @@ struct llama_model_loader {
     struct ggml_context * ggml_ctx = NULL;
     std::unique_ptr<llama_mmap> mapping;
 
-    llama_model_loader(const std::string & fname_base, bool use_mmap) {
+    llama_model_loader(const std::string & fname_base, bool use_mmap, bool vocab_only) {
         auto first_file = new llama_file_loader(fname_base.c_str(), 0, tensors_map);
         file_loaders.emplace_back(first_file);
-        uint32_t n_parts = guess_n_parts();
+        uint32_t n_parts = vocab_only ? 1 : guess_n_parts();
         for (uint32_t i = 1; i < n_parts; i++) {
             std::string fname = fname_base + "." + std::to_string(i);
             auto ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
@@ -603,7 +603,6 @@ struct llama_model_loader {
         return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0);
     }
 
-
     void calc_sizes(size_t * ctx_size_p, size_t * mmapped_size_p) const {
         *ctx_size_p = *mmapped_size_p = 0;
         for (const llama_load_tensor & lt : tensors_map.tensors) {
@@ -817,7 +816,7 @@ static void llama_model_load_internal(
 
     lctx.t_start_us = ggml_time_us();
 
-    std::unique_ptr<llama_model_loader> ml(new llama_model_loader(fname, use_mmap));
+    std::unique_ptr<llama_model_loader> ml(new llama_model_loader(fname, use_mmap, vocab_only));
 
     lctx.vocab = std::move(ml->file_loaders.at(0)->vocab);
     auto & model = lctx.model;
@@ -1526,7 +1525,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         default: throw format("invalid quantization type %d\n", itype);
     };
 
-    std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp.c_str(), /*use_mmap*/ false));
+    std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp.c_str(), /*use_mmap*/ false,
+                                                                            /*vocab_only*/ false));
     llama_file_saver file_saver(fname_out.c_str(), model_loader->file_loaders.at(0).get(), (uint32_t) itype);
 
     size_t total_size_org = 0;

Original file line number	Diff line number	Diff line change
`@@ -139,6 +139,7 @@ if (LLAMA_ALL_WARNINGS)`
`139`	`139`	`-Wpedantic`
`140`	`140`	`-Wcast-qual`
`141`	`141`	`-Wno-unused-function`
	`142`	`+ -Wno-multichar`
`142`	`143`	`)`
`143`	`144`	`else()`
`144`	`145`	`# todo : msvc`