Skip to content

Commit 7675266

Browse files
committed
Remove llama_load_tensor_shard class
1 parent e4bb976 commit 7675266

File tree

1 file changed

+19
-43
lines changed

1 file changed

+19
-43
lines changed

llama.cpp

Lines changed: 19 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -364,44 +364,18 @@ static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml
364364
return size / ggml_blck_size(type);
365365
}
366366

367-
struct llama_load_tensor_shard {
368-
std::vector<uint32_t> ne;
369-
size_t size;
370-
enum ggml_type type;
371-
size_t file_off;
372-
373-
void calc_size() {
374-
size = llama_calc_tensor_size(ne, type);
375-
}
376-
};
377-
378367
struct llama_load_tensor {
379-
llama_load_tensor_shard first_shard;
380-
381368
std::string name;
382369
enum ggml_type type = GGML_TYPE_F32;
383370
std::vector<uint32_t> ne;
371+
size_t file_off;
384372
size_t size;
385373
struct ggml_tensor * ggml_tensor = NULL;
386374
uint8_t * data;
387375

388376
llama_load_tensor(const std::string & name) : name(name) {}
389377

390378
void calc_all() {
391-
calc_type();
392-
calc_ne();
393-
calc_size();
394-
}
395-
396-
void calc_type() {
397-
type = first_shard.type;
398-
}
399-
400-
void calc_ne() {
401-
ne = first_shard.ne;
402-
}
403-
404-
void calc_size() {
405379
size = llama_calc_tensor_size(ne, type);
406380
}
407381
};
@@ -491,17 +465,17 @@ struct llama_file_loader {
491465
}
492466
void read_tensor_metadata(llama_load_tensors_map & tensors_map) {
493467
while (file.tell() < file.size) {
494-
llama_load_tensor_shard shard;
495468
uint32_t n_dims = file.read_u32();
496469
uint32_t name_len = file.read_u32();
497-
shard.type = (enum ggml_type) file.read_u32();
498-
shard.ne.resize(n_dims);
499-
file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims);
470+
ggml_type type = (enum ggml_type) file.read_u32();
471+
std::vector<uint32_t> ne;
472+
ne.resize(n_dims);
473+
file.read_raw(ne.data(), sizeof(ne[0]) * n_dims);
500474
std::string name = file.read_string(name_len);
501475
if (n_dims < 1 || n_dims > 2) {
502476
throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));
503477
}
504-
switch (shard.type) {
478+
switch (type) {
505479
case GGML_TYPE_F32:
506480
case GGML_TYPE_F16:
507481
case GGML_TYPE_Q4_0:
@@ -516,7 +490,7 @@ struct llama_file_loader {
516490
case GGML_TYPE_Q6_K:
517491
break;
518492
default: {
519-
throw std::runtime_error(format("unrecognized tensor type %u\n", shard.type));
493+
throw std::runtime_error(format("unrecognized tensor type %u\n", type));
520494
}
521495
}
522496

@@ -525,11 +499,6 @@ struct llama_file_loader {
525499
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
526500
}
527501

528-
shard.file_off = file.tell();
529-
530-
shard.calc_size();
531-
file.seek(shard.size, SEEK_CUR);
532-
533502
auto it = tensors_map.name_to_idx.find(name);
534503
size_t idx;
535504
if (it != tensors_map.name_to_idx.end()) {
@@ -539,7 +508,14 @@ struct llama_file_loader {
539508
idx = tensors_map.tensors.size() - 1;
540509
tensors_map.name_to_idx.emplace(name, idx);
541510
}
542-
tensors_map.tensors.at(idx).first_shard = shard;
511+
auto tensor = tensors_map.tensors.at(idx);
512+
513+
tensor.ne = ne;
514+
tensor.type = type;
515+
tensor.file_off = file.tell();
516+
517+
tensor.calc_all();
518+
file.seek(tensor.size, SEEK_CUR);
543519
}
544520
}
545521
};
@@ -633,7 +609,7 @@ struct llama_model_loader {
633609

634610
bool alignment_prevents_mmap() {
635611
for (const llama_load_tensor & lt : tensors_map.tensors) {
636-
if (lt.first_shard.file_off & 3) {
612+
if (lt.file_off & 3) {
637613
return true;
638614
}
639615
}
@@ -646,7 +622,7 @@ struct llama_model_loader {
646622
throw std::runtime_error(std::string("missing tok_embeddings.weight"));
647623
}
648624
const llama_load_tensor & lt = tensors_map.tensors.at(it->second);
649-
return file_loader->hparams.n_embd / lt.first_shard.ne.at(0);
625+
return file_loader->hparams.n_embd / lt.ne.at(0);
650626
}
651627

652628
void calc_sizes(size_t * ctx_size_p, size_t * mmapped_size_p) const {
@@ -768,10 +744,10 @@ struct llama_model_loader {
768744

769745
void load_data_for(llama_load_tensor & lt) {
770746
if (use_mmap) {
771-
lt.data = (uint8_t *) mapping->addr + lt.first_shard.file_off;
747+
lt.data = (uint8_t *) mapping->addr + lt.file_off;
772748
} else {
773749
llama_file & file = file_loader->file;
774-
file.seek(lt.first_shard.file_off, SEEK_SET);
750+
file.seek(lt.file_off, SEEK_SET);
775751
file.read_raw(lt.data, lt.size);
776752
}
777753

0 commit comments

Comments
 (0)