@@ -2907,9 +2907,15 @@ struct llama_model {
2907
2907
// for quantize-stats only
2908
2908
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
2909
2909
2910
- int64_t t_load_us = 0;
2910
+ int64_t t_load_us = 0;
2911
2911
int64_t t_start_us = 0;
2912
2912
2913
+ // total number of parameters in the model
2914
+ uint64_t n_elements = 0;
2915
+
2916
+ // total size of all the tensors in the model in bytes
2917
+ size_t n_bytes = 0;
2918
+
2913
2919
// keep track of loaded lora adapters
2914
2920
std::set<struct llama_lora_adapter *> lora_adapters;
2915
2921
@@ -4275,8 +4281,8 @@ struct llama_model_loader {
4275
4281
int n_tensors = 0;
4276
4282
int n_created = 0;
4277
4283
4278
- int64_t n_elements = 0;
4279
- size_t n_bytes = 0;
4284
+ uint64_t n_elements = 0;
4285
+ size_t n_bytes = 0;
4280
4286
4281
4287
bool use_mmap = false;
4282
4288
bool check_tensors;
@@ -5344,6 +5350,11 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
5344
5350
}
5345
5351
}
5346
5352
5353
+ static void llm_load_stats(llama_model_loader & ml, llama_model & model) {
5354
+ model.n_elements = ml.n_elements;
5355
+ model.n_bytes = ml.n_bytes;
5356
+ }
5357
+
5347
5358
static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
5348
5359
model.arch = ml.get_arch();
5349
5360
if (model.arch == LLM_ARCH_UNKNOWN) {
@@ -9256,6 +9267,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
9256
9267
throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
9257
9268
}
9258
9269
9270
+ llm_load_stats(ml, model);
9259
9271
llm_load_print_meta(ml, model);
9260
9272
9261
9273
if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE &&
@@ -18601,6 +18613,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
18601
18613
llama_model model;
18602
18614
llm_load_arch(ml, model);
18603
18615
llm_load_hparams(ml, model);
18616
+ llm_load_stats(ml, model);
18604
18617
18605
18618
struct quantize_state_internal qs(model, params);
18606
18619
@@ -19953,19 +19966,11 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu
19953
19966
}
19954
19967
19955
19968
uint64_t llama_model_size(const struct llama_model * model) {
19956
- uint64_t size = 0;
19957
- for (const auto & it : model->tensors_by_name) {
19958
- size += ggml_nbytes(it.second);
19959
- }
19960
- return size;
19969
+ return model->n_bytes;
19961
19970
}
19962
19971
19963
19972
uint64_t llama_model_n_params(const struct llama_model * model) {
19964
- uint64_t nparams = 0;
19965
- for (const auto & it : model->tensors_by_name) {
19966
- nparams += ggml_nelements(it.second);
19967
- }
19968
- return nparams;
19973
+ return model->n_elements;
19969
19974
}
19970
19975
19971
19976
struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) {
0 commit comments