@@ -441,6 +441,8 @@ struct test {
441
441
static const std::string gpu_info;
442
442
std::string model_filename;
443
443
std::string model_type;
444
+ uint64_t model_size;
445
+ uint64_t model_n_params;
444
446
int n_batch;
445
447
int n_threads;
446
448
bool f32_kv;
@@ -457,8 +459,10 @@ struct test {
457
459
test (const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) {
458
460
model_filename = inst.model ;
459
461
char buf[128 ];
460
- llama_model_type (lmodel, buf, sizeof (buf));
462
+ llama_model_desc (lmodel, buf, sizeof (buf));
461
463
model_type = buf;
464
+ model_size = llama_model_size (lmodel);
465
+ model_n_params = llama_model_n_params (lmodel);
462
466
n_batch = inst.n_batch ;
463
467
n_threads = inst.n_threads ;
464
468
f32_kv = inst.f32_kv ;
@@ -524,7 +528,7 @@ struct test {
524
528
" build_commit" , " build_number" ,
525
529
" cuda" , " opencl" , " metal" , " gpu_blas" , " blas" ,
526
530
" cpu_info" , " gpu_info" ,
527
- " model_filename" , " model_type" ,
531
+ " model_filename" , " model_type" , " model_size " , " model_n_params " ,
528
532
" n_batch" , " n_threads" , " f16_kv" ,
529
533
" n_gpu_layers" , " main_gpu" , " mul_mat_q" , " low_vram" , " tensor_split" ,
530
534
" n_prompt" , " n_gen" , " test_time" ,
@@ -538,6 +542,7 @@ struct test {
538
542
539
543
static field_type get_field_type (const std::string & field) {
540
544
if (field == " build_number" || field == " n_batch" || field == " n_threads" ||
545
+ field == " model_size" || field == " model_n_params" ||
541
546
field == " n_gpu_layers" || field == " main_gpu" ||
542
547
field == " n_prompt" || field == " n_gen" ||
543
548
field == " avg_ns" || field == " stddev_ns" ) {
@@ -573,7 +578,7 @@ struct test {
573
578
build_commit, std::to_string (build_number),
574
579
std::to_string (cuda), std::to_string (opencl), std::to_string (metal), std::to_string (gpu_blas), std::to_string (blas),
575
580
cpu_info, gpu_info,
576
- model_filename, model_type,
581
+ model_filename, model_type, std::to_string (model_size), std::to_string (model_n_params),
577
582
std::to_string (n_batch), std::to_string (n_threads), std::to_string (!f32_kv),
578
583
std::to_string (n_gpu_layers), std::to_string (main_gpu), std::to_string (mul_mat_q), std::to_string (low_vram), tensor_split_str,
579
584
std::to_string (n_prompt), std::to_string (n_gen), test_time,
@@ -709,8 +714,15 @@ struct markdown_printer : public printer {
709
714
return -30 ;
710
715
}
711
716
if (field == " t/s" ) {
712
- return 15 ;
717
+ return 16 ;
713
718
}
719
+ if (field == " size" || field == " params" ) {
720
+ return 10 ;
721
+ }
722
+ if (field == " n_gpu_layers" ) {
723
+ return 3 ;
724
+ }
725
+
714
726
int width = std::max ((int )field.length (), 10 );
715
727
716
728
if (test::get_field_type (field) == test::STRING) {
@@ -719,9 +731,28 @@ struct markdown_printer : public printer {
719
731
return width;
720
732
}
721
733
734
+ static std::string get_field_display_name (const std::string & field) {
735
+ if (field == " n_gpu_layers" ) {
736
+ return " ngl" ;
737
+ }
738
+ if (field == " n_threads" ) {
739
+ return " threads" ;
740
+ }
741
+ if (field == " mul_mat_q" ) {
742
+ return " mmq" ;
743
+ }
744
+ if (field == " tensor_split" ) {
745
+ return " ts" ;
746
+ }
747
+ return field;
748
+ }
749
+
722
750
void print_header (const cmd_params & params) override {
723
751
// select fields to print
724
- fields = { " model" , " backend" };
752
+ fields.push_back (" model" );
753
+ fields.push_back (" size" );
754
+ fields.push_back (" params" );
755
+ fields.push_back (" backend" );
725
756
bool is_cpu_backend = test::get_backend () == " CPU" || test::get_backend () == " BLAS" ;
726
757
if (!is_cpu_backend) {
727
758
fields.push_back (" n_gpu_layers" );
@@ -752,7 +783,7 @@ struct markdown_printer : public printer {
752
783
753
784
fprintf (fout, " |" );
754
785
for (const auto & field : fields) {
755
- fprintf (fout, " %*s |" , get_field_width (field), field.c_str ());
786
+ fprintf (fout, " %*s |" , get_field_width (field), get_field_display_name ( field) .c_str ());
756
787
}
757
788
fprintf (fout, " \n " );
758
789
fprintf (fout, " |" );
@@ -769,12 +800,26 @@ struct markdown_printer : public printer {
769
800
fprintf (fout, " |" );
770
801
for (const auto & field : fields) {
771
802
std::string value;
803
+ char buf[128 ];
772
804
if (field == " model" ) {
773
805
value = t.model_type ;
806
+ } else if (field == " size" ) {
807
+ if (t.model_size < 1024 *1024 *1024 ) {
808
+ snprintf (buf, sizeof (buf), " %.2f MiB" , t.model_size / 1024.0 / 1024.0 );
809
+ } else {
810
+ snprintf (buf, sizeof (buf), " %.2f GiB" , t.model_size / 1024.0 / 1024.0 / 1024.0 );
811
+ }
812
+ value = buf;
813
+ } else if (field == " params" ) {
814
+ if (t.model_n_params < 1000 *1000 *1000 ) {
815
+ snprintf (buf, sizeof (buf), " %.2f M" , t.model_n_params / 1e6 );
816
+ } else {
817
+ snprintf (buf, sizeof (buf), " %.2f B" , t.model_n_params / 1e9 );
818
+ }
819
+ value = buf;
774
820
} else if (field == " backend" ) {
775
821
value = test::get_backend ();
776
822
} else if (field == " test" ) {
777
- char buf[128 ];
778
823
if (t.n_prompt > 0 && t.n_gen == 0 ) {
779
824
snprintf (buf, sizeof (buf), " pp %d" , t.n_prompt );
780
825
} else if (t.n_gen > 0 && t.n_prompt == 0 ) {
@@ -785,7 +830,6 @@ struct markdown_printer : public printer {
785
830
}
786
831
value = buf;
787
832
} else if (field == " t/s" ) {
788
- char buf[128 ];
789
833
snprintf (buf, sizeof (buf), " %.2f ± %.2f" , t.avg_ts (), t.stdev_ts ());
790
834
value = buf;
791
835
} else if (vmap.find (field) != vmap.end ()) {
0 commit comments