Skip to content

Commit 154725c

Browse files
authored
llama-bench : add model sizes (#2771)
* llama-bench : add model sizes * more compact markdown output * back to GiB * adjust column sizes
1 parent 12e2e33 commit 154725c

File tree

3 files changed

+74
-10
lines changed

3 files changed

+74
-10
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,8 @@ struct test {
441441
static const std::string gpu_info;
442442
std::string model_filename;
443443
std::string model_type;
444+
uint64_t model_size;
445+
uint64_t model_n_params;
444446
int n_batch;
445447
int n_threads;
446448
bool f32_kv;
@@ -457,8 +459,10 @@ struct test {
457459
test(const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) {
458460
model_filename = inst.model;
459461
char buf[128];
460-
llama_model_type(lmodel, buf, sizeof(buf));
462+
llama_model_desc(lmodel, buf, sizeof(buf));
461463
model_type = buf;
464+
model_size = llama_model_size(lmodel);
465+
model_n_params = llama_model_n_params(lmodel);
462466
n_batch = inst.n_batch;
463467
n_threads = inst.n_threads;
464468
f32_kv = inst.f32_kv;
@@ -524,7 +528,7 @@ struct test {
524528
"build_commit", "build_number",
525529
"cuda", "opencl", "metal", "gpu_blas", "blas",
526530
"cpu_info", "gpu_info",
527-
"model_filename", "model_type",
531+
"model_filename", "model_type", "model_size", "model_n_params",
528532
"n_batch", "n_threads", "f16_kv",
529533
"n_gpu_layers", "main_gpu", "mul_mat_q", "low_vram", "tensor_split",
530534
"n_prompt", "n_gen", "test_time",
@@ -538,6 +542,7 @@ struct test {
538542

539543
static field_type get_field_type(const std::string & field) {
540544
if (field == "build_number" || field == "n_batch" || field == "n_threads" ||
545+
field == "model_size" || field == "model_n_params" ||
541546
field == "n_gpu_layers" || field == "main_gpu" ||
542547
field == "n_prompt" || field == "n_gen" ||
543548
field == "avg_ns" || field == "stddev_ns") {
@@ -573,7 +578,7 @@ struct test {
573578
build_commit, std::to_string(build_number),
574579
std::to_string(cuda), std::to_string(opencl), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
575580
cpu_info, gpu_info,
576-
model_filename, model_type,
581+
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
577582
std::to_string(n_batch), std::to_string(n_threads), std::to_string(!f32_kv),
578583
std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), std::to_string(low_vram), tensor_split_str,
579584
std::to_string(n_prompt), std::to_string(n_gen), test_time,
@@ -709,8 +714,15 @@ struct markdown_printer : public printer {
709714
return -30;
710715
}
711716
if (field == "t/s") {
712-
return 15;
717+
return 16;
713718
}
719+
if (field == "size" || field == "params") {
720+
return 10;
721+
}
722+
if (field == "n_gpu_layers") {
723+
return 3;
724+
}
725+
714726
int width = std::max((int)field.length(), 10);
715727

716728
if (test::get_field_type(field) == test::STRING) {
@@ -719,9 +731,28 @@ struct markdown_printer : public printer {
719731
return width;
720732
}
721733

734+
static std::string get_field_display_name(const std::string & field) {
735+
if (field == "n_gpu_layers") {
736+
return "ngl";
737+
}
738+
if (field == "n_threads") {
739+
return "threads";
740+
}
741+
if (field == "mul_mat_q") {
742+
return "mmq";
743+
}
744+
if (field == "tensor_split") {
745+
return "ts";
746+
}
747+
return field;
748+
}
749+
722750
void print_header(const cmd_params & params) override {
723751
// select fields to print
724-
fields = { "model", "backend" };
752+
fields.push_back("model");
753+
fields.push_back("size");
754+
fields.push_back("params");
755+
fields.push_back("backend");
725756
bool is_cpu_backend = test::get_backend() == "CPU" || test::get_backend() == "BLAS";
726757
if (!is_cpu_backend) {
727758
fields.push_back("n_gpu_layers");
@@ -752,7 +783,7 @@ struct markdown_printer : public printer {
752783

753784
fprintf(fout, "|");
754785
for (const auto & field : fields) {
755-
fprintf(fout, " %*s |", get_field_width(field), field.c_str());
786+
fprintf(fout, " %*s |", get_field_width(field), get_field_display_name(field).c_str());
756787
}
757788
fprintf(fout, "\n");
758789
fprintf(fout, "|");
@@ -769,12 +800,26 @@ struct markdown_printer : public printer {
769800
fprintf(fout, "|");
770801
for (const auto & field : fields) {
771802
std::string value;
803+
char buf[128];
772804
if (field == "model") {
773805
value = t.model_type;
806+
} else if (field == "size") {
807+
if (t.model_size < 1024*1024*1024) {
808+
snprintf(buf, sizeof(buf), "%.2f MiB", t.model_size / 1024.0 / 1024.0);
809+
} else {
810+
snprintf(buf, sizeof(buf), "%.2f GiB", t.model_size / 1024.0 / 1024.0 / 1024.0);
811+
}
812+
value = buf;
813+
} else if (field == "params") {
814+
if (t.model_n_params < 1000*1000*1000) {
815+
snprintf(buf, sizeof(buf), "%.2f M", t.model_n_params / 1e6);
816+
} else {
817+
snprintf(buf, sizeof(buf), "%.2f B", t.model_n_params / 1e9);
818+
}
819+
value = buf;
774820
} else if (field == "backend") {
775821
value = test::get_backend();
776822
} else if (field == "test") {
777-
char buf[128];
778823
if (t.n_prompt > 0 && t.n_gen == 0) {
779824
snprintf(buf, sizeof(buf), "pp %d", t.n_prompt);
780825
} else if (t.n_gen > 0 && t.n_prompt == 0) {
@@ -785,7 +830,6 @@ struct markdown_printer : public printer {
785830
}
786831
value = buf;
787832
} else if (field == "t/s") {
788-
char buf[128];
789833
snprintf(buf, sizeof(buf), "%.2f ± %.2f", t.avg_ts(), t.stdev_ts());
790834
value = buf;
791835
} else if (vmap.find(field) != vmap.end()) {

llama.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5297,13 +5297,29 @@ int llama_model_n_embd(const struct llama_model * model) {
52975297
return model->hparams.n_embd;
52985298
}
52995299

5300-
int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size) {
5300+
int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
53015301
return snprintf(buf, buf_size, "%s %s %s",
53025302
model->name.c_str(),
53035303
llama_model_type_name(model->type),
53045304
llama_model_ftype_name(model->ftype).c_str());
53055305
}
53065306

5307+
uint64_t llama_model_size(const struct llama_model * model) {
5308+
uint64_t size = 0;
5309+
for (const auto & it : model->tensors_by_name) {
5310+
size += ggml_nbytes(it.second);
5311+
}
5312+
return size;
5313+
}
5314+
5315+
uint64_t llama_model_n_params(const struct llama_model * model) {
5316+
uint64_t nparams = 0;
5317+
for (const auto & it : model->tensors_by_name) {
5318+
nparams += ggml_nelements(it.second);
5319+
}
5320+
return nparams;
5321+
}
5322+
53075323
int llama_model_quantize(
53085324
const char * fname_inp,
53095325
const char * fname_out,

llama.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,11 @@ extern "C" {
254254
LLAMA_API int llama_model_n_embd (const struct llama_model * model);
255255

256256
// Get a string describing the model type
257-
LLAMA_API int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size);
257+
LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
258+
// Returns the total size of all the tensors in the model in bytes
259+
LLAMA_API uint64_t llama_model_size(const struct llama_model * model);
260+
// Returns the total number of parameters in the model
261+
LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model);
258262

259263
// Returns 0 on success
260264
LLAMA_API int llama_model_quantize(

0 commit comments

Comments
 (0)