Skip to content

Commit e4bb976

Browse files
committed
Remove multiple file loaders
1 parent d8147f2 commit e4bb976

File tree

1 file changed

+14
-26
lines changed

1 file changed

+14
-26
lines changed

llama.cpp

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,6 @@ struct llama_load_tensor_shard {
368368
std::vector<uint32_t> ne;
369369
size_t size;
370370
enum ggml_type type;
371-
size_t file_idx;
372371
size_t file_off;
373372

374373
void calc_size() {
@@ -427,13 +426,13 @@ struct llama_file_loader {
427426
llama_hparams hparams;
428427
llama_vocab vocab;
429428

430-
llama_file_loader(const char * fname, size_t file_idx, llama_load_tensors_map & tensors_map)
429+
llama_file_loader(const char * fname, llama_load_tensors_map & tensors_map)
431430
: file(fname, "rb") {
432431
fprintf(stderr, "llama.cpp: loading model from %s\n", fname);
433432
read_magic();
434433
read_hparams();
435434
read_vocab();
436-
read_tensor_metadata(file_idx, tensors_map);
435+
read_tensor_metadata(tensors_map);
437436
}
438437
void read_magic() {
439438
uint32_t magic = file.read_u32();
@@ -490,7 +489,7 @@ struct llama_file_loader {
490489
tok_score.score = score;
491490
}
492491
}
493-
void read_tensor_metadata(size_t file_idx, llama_load_tensors_map & tensors_map) {
492+
void read_tensor_metadata(llama_load_tensors_map & tensors_map) {
494493
while (file.tell() < file.size) {
495494
llama_load_tensor_shard shard;
496495
uint32_t n_dims = file.read_u32();
@@ -525,7 +524,7 @@ struct llama_file_loader {
525524
// skip to the next multiple of 32 bytes
526525
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
527526
}
528-
shard.file_idx = file_idx;
527+
529528
shard.file_off = file.tell();
530529

531530
shard.calc_size();
@@ -610,25 +609,15 @@ struct llama_file_saver {
610609
};
611610

612611
struct llama_model_loader {
613-
std::vector<std::unique_ptr<llama_file_loader>> file_loaders;
612+
std::unique_ptr<llama_file_loader> file_loader;
614613
llama_load_tensors_map tensors_map;
615614
bool use_mmap;
616615
size_t num_ggml_tensors_created = 0;
617616
struct ggml_context * ggml_ctx = NULL;
618617
std::unique_ptr<llama_mmap> mapping;
619618

620619
llama_model_loader(const std::string & fname_base, bool use_mmap, bool vocab_only) {
621-
auto * first_file = new llama_file_loader(fname_base.c_str(), 0, tensors_map);
622-
file_loaders.emplace_back(first_file);
623-
uint32_t n_parts = vocab_only ? 1 : guess_n_parts();
624-
for (uint32_t i = 1; i < n_parts; i++) {
625-
std::string fname = fname_base + "." + std::to_string(i);
626-
auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
627-
file_loaders.emplace_back(ith_file);
628-
if (ith_file->hparams != first_file->hparams) {
629-
throw std::runtime_error(format("llama.cpp: hparams inconsistent between files"));
630-
}
631-
}
620+
file_loader = std::unique_ptr<llama_file_loader>(new llama_file_loader(fname_base.c_str(), tensors_map));
632621
if (!llama_mmap::SUPPORTED) {
633622
use_mmap = false;
634623
}
@@ -657,7 +646,7 @@ struct llama_model_loader {
657646
throw std::runtime_error(std::string("missing tok_embeddings.weight"));
658647
}
659648
const llama_load_tensor & lt = tensors_map.tensors.at(it->second);
660-
return file_loaders.at(0)->hparams.n_embd / lt.first_shard.ne.at(0);
649+
return file_loader->hparams.n_embd / lt.first_shard.ne.at(0);
661650
}
662651

663652
void calc_sizes(size_t * ctx_size_p, size_t * mmapped_size_p) const {
@@ -723,7 +712,7 @@ struct llama_model_loader {
723712
}
724713

725714
if (use_mmap) {
726-
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size, ggml_is_numa()));
715+
mapping.reset(new llama_mmap(&file_loader->file, prefetch_size, ggml_is_numa()));
727716
if (lmlock) {
728717
lmlock->init(mapping->addr);
729718
}
@@ -781,7 +770,7 @@ struct llama_model_loader {
781770
if (use_mmap) {
782771
lt.data = (uint8_t *) mapping->addr + lt.first_shard.file_off;
783772
} else {
784-
llama_file & file = file_loaders.at(lt.first_shard.file_idx)->file;
773+
llama_file & file = file_loader->file;
785774
file.seek(lt.first_shard.file_off, SEEK_SET);
786775
file.read_raw(lt.data, lt.size);
787776
}
@@ -986,10 +975,10 @@ static void llama_model_load_internal(
986975

987976
std::unique_ptr<llama_model_loader> ml(new llama_model_loader(fname, use_mmap, vocab_only));
988977

989-
vocab = std::move(ml->file_loaders.at(0)->vocab);
990-
model.hparams = ml->file_loaders.at(0)->hparams;
978+
vocab = std::move(ml->file_loader->vocab);
979+
model.hparams = ml->file_loader->hparams;
991980
model.n_gpu_layers = n_gpu_layers;
992-
llama_file_version file_version = ml->file_loaders.at(0)->file_version;
981+
llama_file_version file_version = ml->file_loader->file_version;
993982
auto & hparams = model.hparams;
994983

995984
{
@@ -1023,7 +1012,6 @@ static void llama_model_load_internal(
10231012
fprintf(stderr, "%s: n_rot = %u\n", __func__, hparams.n_rot);
10241013
fprintf(stderr, "%s: ftype = %u (%s)\n", __func__, hparams.ftype, llama_ftype_name(hparams.ftype));
10251014
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
1026-
fprintf(stderr, "%s: n_parts = %zu\n", __func__, ml->file_loaders.size());
10271015
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
10281016
}
10291017

@@ -2370,7 +2358,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
23702358

23712359
std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp, /*use_mmap*/ false,
23722360
/*vocab_only*/ false));
2373-
llama_file_saver file_saver(fname_out.c_str(), model_loader->file_loaders.at(0).get(), params->ftype);
2361+
llama_file_saver file_saver(fname_out.c_str(), model_loader->file_loader.get(), params->ftype);
23742362

23752363
#ifdef GGML_USE_K_QUANTS
23762364
int n_attention_wv = 0;
@@ -2820,7 +2808,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
28202808

28212809
// maybe this should in llama_model_loader
28222810
if (model_loader->use_mmap) {
2823-
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0, ggml_is_numa()));
2811+
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loader->file, /* prefetch */ 0, ggml_is_numa()));
28242812
}
28252813
}
28262814

0 commit comments

Comments
 (0)