@@ -225,6 +225,7 @@ enum llm_arch {
225
225
LLM_ARCH_COMMAND_R,
226
226
LLM_ARCH_DBRX,
227
227
LLM_ARCH_OLMO,
228
+ LLM_ARCH_GRANITE_SMALL,
228
229
LLM_ARCH_UNKNOWN,
229
230
};
230
231
@@ -261,6 +262,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
261
262
{ LLM_ARCH_COMMAND_R, "command-r" },
262
263
{ LLM_ARCH_DBRX, "dbrx" },
263
264
{ LLM_ARCH_OLMO, "olmo" },
265
+ { LLM_ARCH_GRANITE_SMALL, "granite-small" },
264
266
{ LLM_ARCH_UNKNOWN, "(unknown)" },
265
267
};
266
268
@@ -1036,6 +1038,32 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
1036
1038
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1037
1039
},
1038
1040
},
1041
+ {
1042
+ LLM_ARCH_GRANITE_SMALL,
1043
+ {
1044
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1045
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1046
+ { LLM_TENSOR_OUTPUT, "output" },
1047
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1048
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1049
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1050
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1051
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1052
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1053
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1054
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1055
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1056
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1057
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1058
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1059
+ { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
1060
+ { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
1061
+ { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
1062
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1063
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1064
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1065
+ },
1066
+ },
1039
1067
{
1040
1068
LLM_ARCH_UNKNOWN,
1041
1069
{
@@ -4288,6 +4316,16 @@ static void llm_load_hparams(
4288
4316
default: model.type = e_model::MODEL_UNKNOWN;
4289
4317
}
4290
4318
} break;
4319
+ case LLM_ARCH_GRANITE_SMALL:
4320
+ {
4321
+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
4322
+
4323
+ switch (hparams.n_layer) {
4324
+ case 32: model.type = e_model::MODEL_3B; break;
4325
+ case 36: model.type = e_model::MODEL_8B; break;
4326
+ default: model.type = e_model::MODEL_UNKNOWN;
4327
+ }
4328
+ } break;
4291
4329
default: (void)0;
4292
4330
}
4293
4331
@@ -4397,6 +4435,9 @@ static void llm_load_vocab(
4397
4435
} else {
4398
4436
if (tokenizer_model == "gpt2") {
4399
4437
vocab.type = LLAMA_VOCAB_TYPE_BPE;
4438
+ if (model.arch == LLM_ARCH_GRANITE_SMALL) {
4439
+ vocab.add_space_prefix = false;
4440
+ }
4400
4441
} else {
4401
4442
LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_model.c_str());
4402
4443
LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
@@ -4967,6 +5008,7 @@ static bool llm_load_tensors(
4967
5008
case LLM_ARCH_LLAMA:
4968
5009
case LLM_ARCH_REFACT:
4969
5010
case LLM_ARCH_MINICPM:
5011
+ case LLM_ARCH_GRANITE_SMALL:
4970
5012
{
4971
5013
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
4972
5014
@@ -10668,6 +10710,7 @@ static struct ggml_cgraph * llama_build_graph(
10668
10710
10669
10711
switch (model.arch) {
10670
10712
case LLM_ARCH_LLAMA:
10713
+ case LLM_ARCH_GRANITE_SMALL:
10671
10714
{
10672
10715
result = llm.build_llama();
10673
10716
} break;
@@ -15811,6 +15854,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
15811
15854
case LLM_ARCH_PHI3:
15812
15855
case LLM_ARCH_GEMMA:
15813
15856
case LLM_ARCH_STARCODER2:
15857
+ case LLM_ARCH_GRANITE_SMALL:
15814
15858
return LLAMA_ROPE_TYPE_NEOX;
15815
15859
15816
15860
// all model arches should be listed explicitly here
0 commit comments