@@ -2911,9 +2911,15 @@ struct llama_model {
2911
2911
// for quantize-stats only
2912
2912
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
2913
2913
2914
- int64_t t_load_us = 0;
2914
+ int64_t t_load_us = 0;
2915
2915
int64_t t_start_us = 0;
2916
2916
2917
+ // total number of parameters in the model
2918
+ uint64_t n_elements = 0;
2919
+
2920
+ // total size of all the tensors in the model in bytes
2921
+ size_t n_bytes = 0;
2922
+
2917
2923
// keep track of loaded lora adapters
2918
2924
std::set<struct llama_lora_adapter *> lora_adapters;
2919
2925
@@ -4279,8 +4285,8 @@ struct llama_model_loader {
4279
4285
int n_tensors = 0;
4280
4286
int n_created = 0;
4281
4287
4282
- int64_t n_elements = 0;
4283
- size_t n_bytes = 0;
4288
+ uint64_t n_elements = 0;
4289
+ size_t n_bytes = 0;
4284
4290
4285
4291
bool use_mmap = false;
4286
4292
bool check_tensors;
@@ -5348,6 +5354,11 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
5348
5354
}
5349
5355
}
5350
5356
5357
+ static void llm_load_stats(llama_model_loader & ml, llama_model & model) {
5358
+ model.n_elements = ml.n_elements;
5359
+ model.n_bytes = ml.n_bytes;
5360
+ }
5361
+
5351
5362
static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
5352
5363
model.arch = ml.get_arch();
5353
5364
if (model.arch == LLM_ARCH_UNKNOWN) {
@@ -9265,6 +9276,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
9265
9276
throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
9266
9277
}
9267
9278
9279
+ llm_load_stats(ml, model);
9268
9280
llm_load_print_meta(ml, model);
9269
9281
9270
9282
if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE &&
@@ -18610,6 +18622,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
18610
18622
llama_model model;
18611
18623
llm_load_arch(ml, model);
18612
18624
llm_load_hparams(ml, model);
18625
+ llm_load_stats(ml, model);
18613
18626
18614
18627
struct quantize_state_internal qs(model, params);
18615
18628
@@ -19962,19 +19975,11 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu
19962
19975
}
19963
19976
19964
19977
uint64_t llama_model_size(const struct llama_model * model) {
19965
- uint64_t size = 0;
19966
- for (const auto & it : model->tensors_by_name) {
19967
- size += ggml_nbytes(it.second);
19968
- }
19969
- return size;
19978
+ return model->n_bytes;
19970
19979
}
19971
19980
19972
19981
uint64_t llama_model_n_params(const struct llama_model * model) {
19973
- uint64_t nparams = 0;
19974
- for (const auto & it : model->tensors_by_name) {
19975
- nparams += ggml_nelements(it.second);
19976
- }
19977
- return nparams;
19982
+ return model->n_elements;
19978
19983
}
19979
19984
19980
19985
struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) {
0 commit comments