Skip to content

Commit ed4851c

Browse files
committed
feat(convert_hf_to_gguf): Add GraniteMoeModel
GraniteMoe has the same configuration deltas as Granite Branch: GraniteMoE Signed-off-by: Gabe Goodhart <[email protected]>
1 parent b9c7052 commit ed4851c

File tree

3 files changed

+18
-9
lines changed

3 files changed

+18
-9
lines changed

convert_hf_to_gguf.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4102,14 +4102,23 @@ def set_gguf_parameters(self):
41024102
# consistency
41034103
if attention_scale := self.hparams.get("attention_multiplier"):
41044104
self.gguf_writer.add_attention_scale(attention_scale)
4105+
logger.info("gguf: (granite) attention_scale = %s", attention_scale)
41054106
if embedding_scale := self.hparams.get("embedding_multiplier"):
41064107
self.gguf_writer.add_embedding_scale(embedding_scale)
4108+
logger.info("gguf: (granite) embedding_scale = %s", embedding_scale)
41074109
if residual_scale := self.hparams.get("residual_multiplier"):
41084110
self.gguf_writer.add_residual_scale(residual_scale)
4109-
if logits_scaling := self.hparams.get("logits_scaling"):
4110-
self.gguf_writer.add_logit_scale(logits_scaling)
4111+
logger.info("gguf: (granite) residual_scale = %s", residual_scale)
4112+
if logits_scale := self.hparams.get("logits_scaling"):
4113+
self.gguf_writer.add_logit_scale(logits_scale)
4114+
logger.info("gguf: (granite) logits_scale = %s", logits_scale)
41114115

41124116

4117+
@Model.register("GraniteMoeForCausalLM")
4118+
class GraniteMoeModel(GraniteModel):
4119+
"""Conversion for IBM's GraniteMoeForCausalLM"""
4120+
model_arch = gguf.MODEL_ARCH.GRANITE_MOE
4121+
41134122
###### CONVERSION LOGIC ######
41144123

41154124
# tree of lazy tensors

gguf-py/gguf/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,7 @@ class MODEL_TENSOR(IntEnum):
12531253
MODEL_TENSOR.ATTN_V,
12541254
MODEL_TENSOR.ATTN_OUT,
12551255
MODEL_TENSOR.FFN_NORM,
1256-
MODEL_TENSOR.FFN_GATE_EXP,
1256+
MODEL_TENSOR.FFN_GATE_INP,
12571257
MODEL_TENSOR.FFN_DOWN_EXP,
12581258
MODEL_TENSOR.FFN_UP_EXP,
12591259
],

gguf-py/gguf/tensor_mapping.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,12 @@ class TensorNameMap:
251251
),
252252

253253
MODEL_TENSOR.FFN_GATE_INP: (
254-
"layers.{bid}.feed_forward.gate", # mixtral
255-
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
256-
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
257-
"transformer.decoder_layer.{bid}.router", # Grok
258-
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
254+
"layers.{bid}.feed_forward.gate", # mixtral
255+
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
256+
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
257+
"transformer.decoder_layer.{bid}.router", # Grok
258+
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
259+
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
259260
),
260261

261262
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
@@ -329,7 +330,6 @@ class TensorNameMap:
329330
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
330331
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
331332
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
332-
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
333333
),
334334

335335
MODEL_TENSOR.FFN_GATE_SHEXP: (

0 commit comments

Comments
 (0)