Skip to content

Commit 5a0a2c5

Browse files
authored
llama.cpp : print actual model size
1 parent 42f8fe1 commit 5a0a2c5

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

llama.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,7 @@ struct llama_model_loader {
10231023
int n_kv = 0;
10241024
int n_tensors = 0;
10251025
int n_created = 0;
1026+
size_t n_tot_elements = 0;
10261027

10271028
bool use_mmap = false;
10281029

@@ -1047,6 +1048,16 @@ struct llama_model_loader {
10471048

10481049
file_version = (enum llama_file_version) gguf_get_version(ctx_gguf);
10491050

1051+
for (int i = 0; i < n_tensors; i++) {
1052+
const char * name = gguf_get_tensor_name(ctx_gguf, i);
1053+
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name);
1054+
size_t elem = 1;
1055+
for (int j = 0; j < t->n_dims; j++) {
1056+
elem *= t->ne[j];
1057+
}
1058+
n_tot_elements += elem;
1059+
}
1060+
10501061
// print meta data
10511062
// TODO: make optional
10521063
{
@@ -1413,7 +1424,8 @@ static void llama_model_load_internal(
14131424
LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, hparams.rope_freq_base);
14141425
LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, hparams.rope_freq_scale);
14151426
LLAMA_LOG_INFO("%s: ftype = %u (%s)\n", __func__, hparams.ftype, llama_ftype_name(hparams.ftype));
1416-
LLAMA_LOG_INFO("%s: model size = %s\n", __func__, llama_model_type_name(model.type));
1427+
LLAMA_LOG_INFO("%s: model size = %.2f B\n", __func__, ml->n_tot_elements*1e-9);
1428+
14171429
}
14181430

14191431
if (vocab_only) {

0 commit comments

Comments
 (0)