Skip to content

Commit 5b2ef0d

Browse files
committed
llama: add support for small granite models
it works only for the small models 3b and 8b. The convert-hf-to-gguf.py script uses the vocabulary size of the granite models to detect granite and set the correct configuration. Signed-off-by: Giuseppe Scrivano <[email protected]>
1 parent e82b74d commit 5b2ef0d

File tree

2 files changed

+17
-5
lines changed

2 files changed

+17
-5
lines changed

convert-hf-to-gguf.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,12 @@ def set_gguf_parameters(self):
13221322
if "add_prefix_space" in tokenizer_config_json:
13231323
self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
13241324

1325+
# Apply to granite small models only
1326+
if self.hparams.get("vocab_size", 32000) == 49152:
1327+
self.gguf_writer.add_add_bos_token(False)
1328+
self.gguf_writer.add_rope_type(gguf.RopeType.NEOX)
1329+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
1330+
13251331
@staticmethod
13261332
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
13271333
if n_head_kv is not None and n_head != n_head_kv:
@@ -1336,10 +1342,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
13361342
n_head = self.hparams["num_attention_heads"]
13371343
n_kv_head = self.hparams.get("num_key_value_heads")
13381344

1339-
if name.endswith("q_proj.weight"):
1340-
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
1341-
if name.endswith("k_proj.weight"):
1342-
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
1345+
# Skip for granite models
1346+
if self.hparams.get("vocab_size", 32000) != 49152:
1347+
if name.endswith("q_proj.weight"):
1348+
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
1349+
if name.endswith("k_proj.weight"):
1350+
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
13431351

13441352
# process the experts separately
13451353
if name.find("block_sparse_moe.experts") != -1:

llama.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4001,7 +4001,9 @@ static void llm_load_hparams(
40014001
switch (hparams.n_layer) {
40024002
case 22: model.type = e_model::MODEL_1B; break;
40034003
case 26: model.type = e_model::MODEL_3B; break;
4004-
case 32: model.type = hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B; break;
4004+
// granite uses a vocab with len 49152
4005+
case 32: model.type = hparams.n_vocab == 49152 ? e_model::MODEL_3B : (hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B); break;
4006+
case 36: model.type = e_model::MODEL_8B; break; // granite
40054007
case 40: model.type = e_model::MODEL_13B; break;
40064008
case 48: model.type = e_model::MODEL_34B; break;
40074009
case 60: model.type = e_model::MODEL_30B; break;
@@ -4271,6 +4273,8 @@ static void llm_load_hparams(
42714273
case 30: model.type = e_model::MODEL_3B; break;
42724274
case 32: model.type = e_model::MODEL_7B; break;
42734275
case 40: model.type = e_model::MODEL_15B; break;
4276+
case 52: model.type = e_model::MODEL_20B; break; // granite
4277+
case 88: model.type = e_model::MODEL_34B; break; // granite
42744278
default: model.type = e_model::MODEL_UNKNOWN;
42754279
}
42764280
} break;

0 commit comments

Comments
 (0)