Skip to content

Commit 5d054a4

Browse files
committed
fix(llama.cpp): Use separate switch clause for granite in llm_load_hparams
Branch: GraniteLM Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 65c5bb9 commit 5d054a4

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

src/llama.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5438,7 +5438,6 @@ static void llm_load_hparams(
54385438
// arch-specific KVs
54395439
switch (model.arch) {
54405440
case LLM_ARCH_LLAMA:
5441-
case LLM_ARCH_GRANITE:
54425441
{
54435442
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
54445443

@@ -5455,20 +5454,13 @@ static void llm_load_hparams(
54555454
// granite uses a vocab with len 49152
54565455
case 32: model.type = hparams.n_vocab == 49152 ? e_model::MODEL_3B : (hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B); break;
54575456
case 36: model.type = e_model::MODEL_8B; break; // granite
5458-
case 40: model.type = (hparams.n_vocab == 49152 || hparams.n_vocab == 49156) ? e_model::MODEL_3B : e_model::MODEL_13B; break;
5457+
case 40: model.type = e_model::MODEL_13B; break;
54595458
case 48: model.type = e_model::MODEL_34B; break;
54605459
case 60: model.type = e_model::MODEL_30B; break;
54615460
case 80: model.type = hparams.n_head() == hparams.n_head_kv() ? e_model::MODEL_65B : e_model::MODEL_70B; break;
54625461
default: model.type = e_model::MODEL_UNKNOWN;
54635462
}
54645463
}
5465-
// Extra multipliers for Granite architecture
5466-
if (model.arch == LLM_ARCH_GRANITE) {
5467-
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
5468-
ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale);
5469-
ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale);
5470-
ml.get_key(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
5471-
}
54725464
} break;
54735465
case LLM_ARCH_MINICPM:
54745466
{
@@ -6059,6 +6051,20 @@ static void llm_load_hparams(
60596051
default: model.type = e_model::MODEL_UNKNOWN;
60606052
}
60616053
} break;
6054+
case LLM_ARCH_GRANITE:
6055+
{
6056+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
6057+
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
6058+
ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale);
6059+
ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale);
6060+
ml.get_key(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
6061+
6062+
switch (hparams.n_layer) {
6063+
case 40: model.type = e_model::MODEL_3B; break;
6064+
// Add additional layer/vocab/etc checks here for other model sizes
6065+
default: model.type = e_model::MODEL_UNKNOWN;
6066+
}
6067+
} break;
60626068
default: (void)0;
60636069
}
60646070

0 commit comments

Comments
 (0)