Skip to content

Commit 712c127

Browse files
committed
Simplify load logic
1 parent 7675266 commit 712c127

File tree

1 file changed

+11
-30
lines changed

1 file changed

+11
-30
lines changed

llama.cpp

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -372,12 +372,6 @@ struct llama_load_tensor {
372372
size_t size;
373373
struct ggml_tensor * ggml_tensor = NULL;
374374
uint8_t * data;
375-
376-
llama_load_tensor(const std::string & name) : name(name) {}
377-
378-
void calc_all() {
379-
size = llama_calc_tensor_size(ne, type);
380-
}
381375
};
382376

383377
struct llama_load_tensors_map {
@@ -465,17 +459,17 @@ struct llama_file_loader {
465459
}
466460
void read_tensor_metadata(llama_load_tensors_map & tensors_map) {
467461
while (file.tell() < file.size) {
462+
llama_load_tensor tensor;
468463
uint32_t n_dims = file.read_u32();
469464
uint32_t name_len = file.read_u32();
470-
ggml_type type = (enum ggml_type) file.read_u32();
471-
std::vector<uint32_t> ne;
472-
ne.resize(n_dims);
473-
file.read_raw(ne.data(), sizeof(ne[0]) * n_dims);
465+
tensor.type = (enum ggml_type) file.read_u32();
466+
tensor.ne.resize(n_dims);
467+
file.read_raw(tensor.ne.data(), sizeof(tensor.ne[0]) * n_dims);
474468
std::string name = file.read_string(name_len);
475469
if (n_dims < 1 || n_dims > 2) {
476470
throw std::runtime_error(format("llama.cpp: tensor '%s' should not be %u-dimensional", name.c_str(), n_dims));
477471
}
478-
switch (type) {
472+
switch (tensor.type) {
479473
case GGML_TYPE_F32:
480474
case GGML_TYPE_F16:
481475
case GGML_TYPE_Q4_0:
@@ -490,7 +484,7 @@ struct llama_file_loader {
490484
case GGML_TYPE_Q6_K:
491485
break;
492486
default: {
493-
throw std::runtime_error(format("unrecognized tensor type %u\n", type));
487+
throw std::runtime_error(format("unrecognized tensor type %u\n", tensor.type));
494488
}
495489
}
496490

@@ -499,23 +493,13 @@ struct llama_file_loader {
499493
file.seek(-static_cast<ptrdiff_t>(file.tell()) & 31, SEEK_CUR);
500494
}
501495

502-
auto it = tensors_map.name_to_idx.find(name);
503-
size_t idx;
504-
if (it != tensors_map.name_to_idx.end()) {
505-
idx = it->second;
506-
} else {
507-
tensors_map.tensors.emplace_back(name);
508-
idx = tensors_map.tensors.size() - 1;
509-
tensors_map.name_to_idx.emplace(name, idx);
510-
}
511-
auto tensor = tensors_map.tensors.at(idx);
512-
513-
tensor.ne = ne;
514-
tensor.type = type;
515496
tensor.file_off = file.tell();
516-
517-
tensor.calc_all();
497+
tensor.name = name;
498+
tensor.size = llama_calc_tensor_size(tensor.ne, tensor.type);
518499
file.seek(tensor.size, SEEK_CUR);
500+
501+
tensors_map.tensors.push_back(tensor);
502+
tensors_map.name_to_idx[name] = tensors_map.tensors.size() - 1;
519503
}
520504
}
521505
};
@@ -602,9 +586,6 @@ struct llama_model_loader {
602586
use_mmap = false;
603587
}
604588
this->use_mmap = use_mmap;
605-
for (llama_load_tensor & lt : tensors_map.tensors) {
606-
lt.calc_all();
607-
}
608589
}
609590

610591
bool alignment_prevents_mmap() {

0 commit comments

Comments
 (0)