Skip to content

Commit bee1f0e

Browse files
committed
llama : add ftype meta info to the model
ggml-ci
1 parent ef3f333 commit bee1f0e

File tree

3 files changed

+24
-3
lines changed

3 files changed

+24
-3
lines changed

gguf.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
KEY_GENERAL_LICENSE = "general.license"
2727
KEY_GENERAL_SOURCE_URL = "general.source.url"
2828
KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository"
29+
KEY_GENERAL_FILE_TYPE = "general.file_type"
2930

3031
# LLM
3132
KEY_LLM_CONTEXT_LENGTH = "{arch}.context_length"
@@ -595,6 +596,9 @@ def add_source_url(self, url: str):
595596
def add_source_hf_repo(self, repo: str):
596597
self.add_string(KEY_GENERAL_SOURCE_HF_REPO, repo)
597598

599+
def add_file_type(self, ftype: int):
600+
self.add_string(KEY_GENERAL_FILE_TYPE, file_type)
601+
598602
def add_name(self, name: str):
599603
self.add_string(KEY_GENERAL_NAME, name)
600604

llama.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,6 +1121,16 @@ struct llama_model_loader {
11211121
} break;
11221122
}
11231123

1124+
// this is a way to mark that we have "guessed" the file type
1125+
ftype = (llama_ftype) (ftype | LLAMA_FTYPE_GUESSED);
1126+
1127+
{
1128+
const int kid = gguf_find_key(ctx_gguf, "general.file_type");
1129+
if (kid >= 0) {
1130+
ftype = (llama_ftype) gguf_get_val_u32(ctx_gguf, kid);
1131+
}
1132+
}
1133+
11241134
for (int i = 0; i < n_kv; i++) {
11251135
const char * name = gguf_get_key(ctx_gguf, i);
11261136
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
@@ -1323,7 +1333,11 @@ struct llama_model_loader {
13231333
// load LLaMA models
13241334
//
13251335

1326-
const char * llama_model_ftype_name(enum llama_ftype ftype) {
1336+
std::string llama_model_ftype_name(enum llama_ftype ftype) {
1337+
if (ftype & LLAMA_FTYPE_GUESSED) {
1338+
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
1339+
}
1340+
13271341
switch (ftype) {
13281342
case LLAMA_FTYPE_ALL_F32: return "all F32";
13291343
case LLAMA_FTYPE_MOSTLY_F16: return "mostly F16";
@@ -1552,7 +1566,7 @@ static void llama_model_load_internal(
15521566
LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, hparams.rope_freq_base);
15531567
LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, hparams.rope_freq_scale);
15541568
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model.type));
1555-
LLAMA_LOG_INFO("%s: model ftype = %s\n", __func__, llama_model_ftype_name(model.ftype));
1569+
LLAMA_LOG_INFO("%s: model ftype = %s\n", __func__, llama_model_ftype_name(model.ftype).c_str());
15561570
LLAMA_LOG_INFO("%s: model size = %.2f B\n", __func__, ml->n_elements*1e-9);
15571571

15581572
// general kv
@@ -3620,6 +3634,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
36203634
// copy the KV pairs from the input file
36213635
gguf_set_kv (ctx_out, model_loader->ctx_gguf);
36223636
gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
3637+
gguf_set_val_u32(ctx_out, "general.file_type", ftype);
36233638

36243639
#ifdef GGML_USE_K_QUANTS
36253640
int n_attention_wv = 0;
@@ -4471,7 +4486,7 @@ int llama_model_n_embd(const struct llama_model * model) {
44714486
}
44724487

44734488
int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size) {
4474-
return snprintf(buf, buf_size, "LLaMA %s %s", llama_model_type_name(model->type), llama_model_ftype_name(model->ftype));
4489+
return snprintf(buf, buf_size, "LLaMA %s %s", llama_model_type_name(model->type), llama_model_ftype_name(model->ftype).c_str());
44754490
}
44764491

44774492
int llama_model_quantize(

llama.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ extern "C" {
103103
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,// except 1d tensors
104104
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,// except 1d tensors
105105
LLAMA_FTYPE_MOSTLY_Q6_K = 18,// except 1d tensors
106+
107+
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
106108
};
107109

108110
typedef struct llama_token_data {

0 commit comments

Comments
 (0)