@@ -558,10 +558,10 @@ struct llama_model_loader {
558
558
struct ggml_context * ggml_ctx = NULL ;
559
559
std::unique_ptr<llama_mmap> mapping;
560
560
561
- llama_model_loader (const std::string & fname_base, bool use_mmap) {
561
+ llama_model_loader (const std::string & fname_base, bool use_mmap, bool vocab_only ) {
562
562
auto first_file = new llama_file_loader (fname_base.c_str (), 0 , tensors_map);
563
563
file_loaders.emplace_back (first_file);
564
- uint32_t n_parts = guess_n_parts ();
564
+ uint32_t n_parts = vocab_only ? 1 : guess_n_parts ();
565
565
for (uint32_t i = 1 ; i < n_parts; i++) {
566
566
std::string fname = fname_base + " ." + std::to_string (i);
567
567
auto ith_file = new llama_file_loader (fname.c_str (), i, tensors_map);
@@ -603,7 +603,6 @@ struct llama_model_loader {
603
603
return file_loaders.at (0 )->hparams .n_embd / lt.shards .at (0 ).ne .at (0 );
604
604
}
605
605
606
-
607
606
void calc_sizes (size_t * ctx_size_p, size_t * mmapped_size_p) const {
608
607
*ctx_size_p = *mmapped_size_p = 0 ;
609
608
for (const llama_load_tensor & lt : tensors_map.tensors ) {
@@ -817,7 +816,7 @@ static void llama_model_load_internal(
817
816
818
817
lctx.t_start_us = ggml_time_us ();
819
818
820
- std::unique_ptr<llama_model_loader> ml (new llama_model_loader (fname, use_mmap));
819
+ std::unique_ptr<llama_model_loader> ml (new llama_model_loader (fname, use_mmap, vocab_only ));
821
820
822
821
lctx.vocab = std::move (ml->file_loaders .at (0 )->vocab );
823
822
auto & model = lctx.model ;
@@ -1526,7 +1525,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1526
1525
default : throw format (" invalid quantization type %d\n " , itype);
1527
1526
};
1528
1527
1529
- std::unique_ptr<llama_model_loader> model_loader (new llama_model_loader (fname_inp.c_str (), /* use_mmap*/ false ));
1528
+ std::unique_ptr<llama_model_loader> model_loader (new llama_model_loader (fname_inp.c_str (), /* use_mmap*/ false ,
1529
+ /* vocab_only*/ false ));
1530
1530
llama_file_saver file_saver (fname_out.c_str (), model_loader->file_loaders .at (0 ).get (), (uint32_t ) itype);
1531
1531
1532
1532
size_t total_size_org = 0 ;
0 commit comments