@@ -5438,7 +5438,6 @@ static void llm_load_hparams(
5438
5438
// arch-specific KVs
5439
5439
switch (model.arch) {
5440
5440
case LLM_ARCH_LLAMA:
5441
- case LLM_ARCH_GRANITE:
5442
5441
{
5443
5442
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
5444
5443
@@ -5455,20 +5454,13 @@ static void llm_load_hparams(
5455
5454
// granite uses a vocab with len 49152
5456
5455
case 32: model.type = hparams.n_vocab == 49152 ? e_model::MODEL_3B : (hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B); break;
5457
5456
case 36: model.type = e_model::MODEL_8B; break; // granite
5458
- case 40: model.type = (hparams.n_vocab == 49152 || hparams.n_vocab == 49156) ? e_model::MODEL_3B : e_model::MODEL_13B; break;
5457
+ case 40: model.type = e_model::MODEL_13B; break;
5459
5458
case 48: model.type = e_model::MODEL_34B; break;
5460
5459
case 60: model.type = e_model::MODEL_30B; break;
5461
5460
case 80: model.type = hparams.n_head() == hparams.n_head_kv() ? e_model::MODEL_65B : e_model::MODEL_70B; break;
5462
5461
default: model.type = e_model::MODEL_UNKNOWN;
5463
5462
}
5464
5463
}
5465
- // Extra multipliers for Granite architecture
5466
- if (model.arch == LLM_ARCH_GRANITE) {
5467
- ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
5468
- ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale);
5469
- ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale);
5470
- ml.get_key(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
5471
- }
5472
5464
} break;
5473
5465
case LLM_ARCH_MINICPM:
5474
5466
{
@@ -6059,6 +6051,20 @@ static void llm_load_hparams(
6059
6051
default: model.type = e_model::MODEL_UNKNOWN;
6060
6052
}
6061
6053
} break;
6054
+ case LLM_ARCH_GRANITE:
6055
+ {
6056
+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
6057
+ ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
6058
+ ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale);
6059
+ ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale);
6060
+ ml.get_key(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
6061
+
6062
+ switch (hparams.n_layer) {
6063
+ case 40: model.type = e_model::MODEL_3B; break;
6064
+ // Add additional layer/vocab/etc checks here for other model sizes
6065
+ default: model.type = e_model::MODEL_UNKNOWN;
6066
+ }
6067
+ } break;
6062
6068
default: (void)0;
6063
6069
}
6064
6070
0 commit comments