Skip to content

Commit 0d12d86

Browse files
FirstTimeEZarthw
authored andcommitted
llama : save number of parameters and the size in llama_model (ggml-org#10286)
fixes ggml-org#10285
1 parent 157e7f3 commit 0d12d86

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

src/llama.cpp

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2911,9 +2911,15 @@ struct llama_model {
29112911
// for quantize-stats only
29122912
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
29132913

2914-
int64_t t_load_us = 0;
2914+
int64_t t_load_us = 0;
29152915
int64_t t_start_us = 0;
29162916

2917+
// total number of parameters in the model
2918+
uint64_t n_elements = 0;
2919+
2920+
// total size of all the tensors in the model in bytes
2921+
size_t n_bytes = 0;
2922+
29172923
// keep track of loaded lora adapters
29182924
std::set<struct llama_lora_adapter *> lora_adapters;
29192925

@@ -4279,8 +4285,8 @@ struct llama_model_loader {
42794285
int n_tensors = 0;
42804286
int n_created = 0;
42814287

4282-
int64_t n_elements = 0;
4283-
size_t n_bytes = 0;
4288+
uint64_t n_elements = 0;
4289+
size_t n_bytes = 0;
42844290

42854291
bool use_mmap = false;
42864292
bool check_tensors;
@@ -5348,6 +5354,11 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
53485354
}
53495355
}
53505356

5357+
static void llm_load_stats(llama_model_loader & ml, llama_model & model) {
5358+
model.n_elements = ml.n_elements;
5359+
model.n_bytes = ml.n_bytes;
5360+
}
5361+
53515362
static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
53525363
model.arch = ml.get_arch();
53535364
if (model.arch == LLM_ARCH_UNKNOWN) {
@@ -9265,6 +9276,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
92659276
throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
92669277
}
92679278

9279+
llm_load_stats(ml, model);
92689280
llm_load_print_meta(ml, model);
92699281

92709282
if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE &&
@@ -18610,6 +18622,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1861018622
llama_model model;
1861118623
llm_load_arch(ml, model);
1861218624
llm_load_hparams(ml, model);
18625+
llm_load_stats(ml, model);
1861318626

1861418627
struct quantize_state_internal qs(model, params);
1861518628

@@ -19962,19 +19975,11 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu
1996219975
}
1996319976

1996419977
uint64_t llama_model_size(const struct llama_model * model) {
19965-
uint64_t size = 0;
19966-
for (const auto & it : model->tensors_by_name) {
19967-
size += ggml_nbytes(it.second);
19968-
}
19969-
return size;
19978+
return model->n_bytes;
1997019979
}
1997119980

1997219981
uint64_t llama_model_n_params(const struct llama_model * model) {
19973-
uint64_t nparams = 0;
19974-
for (const auto & it : model->tensors_by_name) {
19975-
nparams += ggml_nelements(it.second);
19976-
}
19977-
return nparams;
19982+
return model->n_elements;
1997819983
}
1997919984

1998019985
struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) {

0 commit comments

Comments
 (0)