Skip to content

Commit 89e4caa

Browse files
authored
llama : save number of parameters and the size in llama_model (#10286)
fixes #10285
1 parent 74d73dc commit 89e4caa

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

src/llama.cpp

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2907,9 +2907,15 @@ struct llama_model {
29072907
// for quantize-stats only
29082908
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
29092909

2910-
int64_t t_load_us = 0;
2910+
int64_t t_load_us = 0;
29112911
int64_t t_start_us = 0;
29122912

2913+
// total number of parameters in the model
2914+
uint64_t n_elements = 0;
2915+
2916+
// total size of all the tensors in the model in bytes
2917+
size_t n_bytes = 0;
2918+
29132919
// keep track of loaded lora adapters
29142920
std::set<struct llama_lora_adapter *> lora_adapters;
29152921

@@ -4275,8 +4281,8 @@ struct llama_model_loader {
42754281
int n_tensors = 0;
42764282
int n_created = 0;
42774283

4278-
int64_t n_elements = 0;
4279-
size_t n_bytes = 0;
4284+
uint64_t n_elements = 0;
4285+
size_t n_bytes = 0;
42804286

42814287
bool use_mmap = false;
42824288
bool check_tensors;
@@ -5344,6 +5350,11 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
53445350
}
53455351
}
53465352

5353+
static void llm_load_stats(llama_model_loader & ml, llama_model & model) {
5354+
model.n_elements = ml.n_elements;
5355+
model.n_bytes = ml.n_bytes;
5356+
}
5357+
53475358
static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
53485359
model.arch = ml.get_arch();
53495360
if (model.arch == LLM_ARCH_UNKNOWN) {
@@ -9256,6 +9267,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
92569267
throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
92579268
}
92589269

9270+
llm_load_stats(ml, model);
92599271
llm_load_print_meta(ml, model);
92609272

92619273
if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE &&
@@ -18601,6 +18613,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1860118613
llama_model model;
1860218614
llm_load_arch(ml, model);
1860318615
llm_load_hparams(ml, model);
18616+
llm_load_stats(ml, model);
1860418617

1860518618
struct quantize_state_internal qs(model, params);
1860618619

@@ -19953,19 +19966,11 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu
1995319966
}
1995419967

1995519968
uint64_t llama_model_size(const struct llama_model * model) {
19956-
uint64_t size = 0;
19957-
for (const auto & it : model->tensors_by_name) {
19958-
size += ggml_nbytes(it.second);
19959-
}
19960-
return size;
19969+
return model->n_bytes;
1996119970
}
1996219971

1996319972
uint64_t llama_model_n_params(const struct llama_model * model) {
19964-
uint64_t nparams = 0;
19965-
for (const auto & it : model->tensors_by_name) {
19966-
nparams += ggml_nelements(it.second);
19967-
}
19968-
return nparams;
19973+
return model->n_elements;
1996919974
}
1997019975

1997119976
struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) {

0 commit comments

Comments
 (0)