Skip to content

Commit 5d04440

Browse files
authored
Merge branch 'gguf' into gguf-convert
2 parents 39362f3 + d6fd53a commit 5d04440

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

examples/gguf/gguf.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -233,16 +233,13 @@ int main(int argc, char ** argv) {
233233
const std::string fname(argv[1]);
234234
const std::string mode (argv[2]);
235235

236-
GGML_ASSERT((mode == "r" || mode == "w" || mode == "q") && "mode must be r, w or q");
236+
GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
237237

238238
if (mode == "w") {
239239
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
240240
} else if (mode == "r") {
241241
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
242242
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
243-
} else if (mode == "q") {
244-
llama_model_quantize_params params = llama_model_quantize_default_params();
245-
llama_model_quantize(fname.c_str(), "quant.gguf", &params);
246243
}
247244

248245
return 0;

llama.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,7 @@ struct llama_model_loader {
10221022
int n_kv = 0;
10231023
int n_tensors = 0;
10241024
int n_created = 0;
1025+
size_t n_tot_elements = 0;
10251026

10261027
bool use_mmap = false;
10271028

@@ -1046,6 +1047,12 @@ struct llama_model_loader {
10461047

10471048
file_version = (enum llama_file_version) gguf_get_version(ctx_gguf);
10481049

1050+
for (int i = 0; i < n_tensors; i++) {
1051+
const char * name = gguf_get_tensor_name(ctx_gguf, i);
1052+
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, name);
1053+
n_tot_elements += ggml_nelements(t);
1054+
}
1055+
10491056
// print meta data
10501057
// TODO: make optional
10511058
{
@@ -1416,7 +1423,8 @@ static void llama_model_load_internal(
14161423
LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff);
14171424
LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, hparams.rope_freq_base);
14181425
LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, hparams.rope_freq_scale);
1419-
LLAMA_LOG_INFO("%s: model size = %s\n", __func__, llama_model_type_name(model.type));
1426+
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model.type));
1427+
LLAMA_LOG_INFO("%s: model size = %.2f B\n", __func__, ml->n_tot_elements*1e-9);
14201428

14211429
// TODO: print number of tensors for each quantization
14221430
}

0 commit comments

Comments
 (0)