File tree Expand file tree Collapse file tree 2 files changed +10
-5
lines changed Expand file tree Collapse file tree 2 files changed +10
-5
lines changed Original file line number Diff line number Diff line change @@ -233,16 +233,13 @@ int main(int argc, char ** argv) {
233
233
const std::string fname (argv[1 ]);
234
234
const std::string mode (argv[2 ]);
235
235
236
- GGML_ASSERT ((mode == " r" || mode == " w" || mode == " q " ) && " mode must be r, w or q " );
236
+ GGML_ASSERT ((mode == " r" || mode == " w" ) && " mode must be r or w " );
237
237
238
238
if (mode == " w" ) {
239
239
GGML_ASSERT (gguf_ex_write (fname) && " failed to write gguf file" );
240
240
} else if (mode == " r" ) {
241
241
GGML_ASSERT (gguf_ex_read_0 (fname) && " failed to read gguf file" );
242
242
GGML_ASSERT (gguf_ex_read_1 (fname) && " failed to read gguf file" );
243
- } else if (mode == " q" ) {
244
- llama_model_quantize_params params = llama_model_quantize_default_params ();
245
- llama_model_quantize (fname.c_str (), " quant.gguf" , ¶ms);
246
243
}
247
244
248
245
return 0 ;
Original file line number Diff line number Diff line change @@ -1022,6 +1022,7 @@ struct llama_model_loader {
1022
1022
int n_kv = 0 ;
1023
1023
int n_tensors = 0 ;
1024
1024
int n_created = 0 ;
1025
+ size_t n_tot_elements = 0 ;
1025
1026
1026
1027
bool use_mmap = false ;
1027
1028
@@ -1046,6 +1047,12 @@ struct llama_model_loader {
1046
1047
1047
1048
file_version = (enum llama_file_version) gguf_get_version (ctx_gguf);
1048
1049
1050
+ for (int i = 0 ; i < n_tensors; i++) {
1051
+ const char * name = gguf_get_tensor_name (ctx_gguf, i);
1052
+ struct ggml_tensor * t = ggml_get_tensor (ctx_meta, name);
1053
+ n_tot_elements += ggml_nelements (t);
1054
+ }
1055
+
1049
1056
// print meta data
1050
1057
// TODO: make optional
1051
1058
{
@@ -1416,7 +1423,8 @@ static void llama_model_load_internal(
1416
1423
LLAMA_LOG_INFO (" %s: n_ff = %u\n " , __func__, hparams.n_ff );
1417
1424
LLAMA_LOG_INFO (" %s: freq_base = %.1f\n " , __func__, hparams.rope_freq_base );
1418
1425
LLAMA_LOG_INFO (" %s: freq_scale = %g\n " , __func__, hparams.rope_freq_scale );
1419
- LLAMA_LOG_INFO (" %s: model size = %s\n " , __func__, llama_model_type_name (model.type ));
1426
+ LLAMA_LOG_INFO (" %s: model type = %s\n " , __func__, llama_model_type_name (model.type ));
1427
+ LLAMA_LOG_INFO (" %s: model size = %.2f B\n " , __func__, ml->n_tot_elements *1e-9 );
1420
1428
1421
1429
// TODO: print number of tensors for each quantization
1422
1430
}
You can’t perform that action at this time.
0 commit comments