@@ -1121,6 +1121,16 @@ struct llama_model_loader {
1121
1121
} break ;
1122
1122
}
1123
1123
1124
+ // this is a way to mark that we have "guessed" the file type
1125
+ ftype = (llama_ftype) (ftype | LLAMA_FTYPE_GUESSED);
1126
+
1127
+ {
1128
+ const int kid = gguf_find_key (ctx_gguf, " general.file_type" );
1129
+ if (kid >= 0 ) {
1130
+ ftype = (llama_ftype) gguf_get_val_u32 (ctx_gguf, kid);
1131
+ }
1132
+ }
1133
+
1124
1134
for (int i = 0 ; i < n_kv; i++) {
1125
1135
const char * name = gguf_get_key (ctx_gguf, i);
1126
1136
const enum gguf_type type = gguf_get_kv_type (ctx_gguf, i);
@@ -1323,7 +1333,11 @@ struct llama_model_loader {
1323
1333
// load LLaMA models
1324
1334
//
1325
1335
1326
- const char * llama_model_ftype_name (enum llama_ftype ftype) {
1336
+ std::string llama_model_ftype_name (enum llama_ftype ftype) {
1337
+ if (ftype & LLAMA_FTYPE_GUESSED) {
1338
+ return llama_model_ftype_name ((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)" ;
1339
+ }
1340
+
1327
1341
switch (ftype) {
1328
1342
case LLAMA_FTYPE_ALL_F32: return " all F32" ;
1329
1343
case LLAMA_FTYPE_MOSTLY_F16: return " mostly F16" ;
@@ -1552,7 +1566,7 @@ static void llama_model_load_internal(
1552
1566
LLAMA_LOG_INFO (" %s: freq_base = %.1f\n " , __func__, hparams.rope_freq_base );
1553
1567
LLAMA_LOG_INFO (" %s: freq_scale = %g\n " , __func__, hparams.rope_freq_scale );
1554
1568
LLAMA_LOG_INFO (" %s: model type = %s\n " , __func__, llama_model_type_name (model.type ));
1555
- LLAMA_LOG_INFO (" %s: model ftype = %s\n " , __func__, llama_model_ftype_name (model.ftype ));
1569
+ LLAMA_LOG_INFO (" %s: model ftype = %s\n " , __func__, llama_model_ftype_name (model.ftype ). c_str () );
1556
1570
LLAMA_LOG_INFO (" %s: model size = %.2f B\n " , __func__, ml->n_elements *1e-9 );
1557
1571
1558
1572
// general kv
@@ -3620,6 +3634,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
3620
3634
// copy the KV pairs from the input file
3621
3635
gguf_set_kv (ctx_out, model_loader->ctx_gguf );
3622
3636
gguf_set_val_u32 (ctx_out, " general.quantization_version" , GGML_QNT_VERSION);
3637
+ gguf_set_val_u32 (ctx_out, " general.file_type" , ftype);
3623
3638
3624
3639
#ifdef GGML_USE_K_QUANTS
3625
3640
int n_attention_wv = 0 ;
@@ -4471,7 +4486,7 @@ int llama_model_n_embd(const struct llama_model * model) {
4471
4486
}
4472
4487
4473
4488
int llama_model_type (const struct llama_model * model, char * buf, size_t buf_size) {
4474
- return snprintf (buf, buf_size, " LLaMA %s %s" , llama_model_type_name (model->type ), llama_model_ftype_name (model->ftype ));
4489
+ return snprintf (buf, buf_size, " LLaMA %s %s" , llama_model_type_name (model->type ), llama_model_ftype_name (model->ftype ). c_str () );
4475
4490
}
4476
4491
4477
4492
int llama_model_quantize (
0 commit comments