Skip to content

Commit 65c5bb9

Browse files
committed
fix(convert_hf_to_gguf/gguf-py): _multiplier -> _scale
The transformers names with _multiplier will now be converted to the _scale equivalent during conversion. Branch: GraniteLM Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 0bdf04e commit 65c5bb9

File tree

3 files changed

+20
-18
lines changed

3 files changed

+20
-18
lines changed

convert_hf_to_gguf.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4090,20 +4090,22 @@ def set_gguf_parameters(self):
40904090
40914091
- No head_dim support
40924092
- New multiplier params:
4093-
- attention_multiplier
4094-
- embedding_multiplier
4095-
- residual_multiplier
4093+
- attention_scale
4094+
- embedding_scale
4095+
- residual_scale
40964096
- logits_scaling
40974097
"""
40984098
if head_dim := self.hparams.pop("head_dim", None):
40994099
logger.warning("Ignoring head_dim (%s) from config for Granite", head_dim)
41004100
super().set_gguf_parameters()
4101-
if attention_multiplier := self.hparams.get("attention_multiplier"):
4102-
self.gguf_writer.add_attention_multiplier(attention_multiplier)
4103-
if embedding_multiplier := self.hparams.get("embedding_multiplier"):
4104-
self.gguf_writer.add_embedding_multiplier(embedding_multiplier)
4105-
if residual_multiplier := self.hparams.get("residual_multiplier"):
4106-
self.gguf_writer.add_residual_multiplier(residual_multiplier)
4101+
# NOTE: Convert _multiplier params to _scale params for naming
4102+
# consistency
4103+
if attention_scale := self.hparams.get("attention_multiplier"):
4104+
self.gguf_writer.add_attention_scale(attention_scale)
4105+
if embedding_scale := self.hparams.get("embedding_multiplier"):
4106+
self.gguf_writer.add_embedding_scale(embedding_scale)
4107+
if residual_scale := self.hparams.get("residual_multiplier"):
4108+
self.gguf_writer.add_residual_scale(residual_scale)
41074109
if logits_scaling := self.hparams.get("logits_scaling"):
41084110
self.gguf_writer.add_logit_scale(logits_scaling)
41094111

gguf-py/gguf/constants.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ class LLM:
9797
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
9898
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
9999
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
100-
RESIDUAL_MULTIPLIER = "{arch}.residual_multiplier"
101-
EMBEDDING_MULTIPLIER = "{arch}.embedding_multiplier"
100+
RESIDUAL_SCALE = "{arch}.residual_scale"
101+
EMBEDDING_SCALE = "{arch}.embedding_scale"
102102

103103
class Attention:
104104
HEAD_COUNT = "{arch}.attention.head_count"
@@ -114,7 +114,7 @@ class Attention:
114114
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
115115
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
116116
SLIDING_WINDOW = "{arch}.attention.sliding_window"
117-
MULTIPLIER = "{arch}.attention.multiplier"
117+
SCALE = "{arch}.attention.scale"
118118

119119
class Rope:
120120
DIMENSION_COUNT = "{arch}.rope.dimension_count"

gguf-py/gguf/gguf_writer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -679,11 +679,11 @@ def add_time_mix_extra_dim(self, dim: int) -> None:
679679
def add_time_decay_extra_dim(self, dim: int) -> None:
680680
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
681681

682-
def add_residual_multiplier(self, value: float) -> None:
683-
self.add_float32(Keys.LLM.RESIDUAL_MULTIPLIER.format(arch=self.arch), value)
682+
def add_residual_scale(self, value: float) -> None:
683+
self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
684684

685-
def add_embedding_multiplier(self, value: float) -> None:
686-
self.add_float32(Keys.LLM.EMBEDDING_MULTIPLIER.format(arch=self.arch), value)
685+
def add_embedding_scale(self, value: float) -> None:
686+
self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
687687

688688
def add_wkv_head_size(self, size: int) -> None:
689689
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
@@ -709,8 +709,8 @@ def add_relative_attn_buckets_count(self, value: int) -> None:
709709
def add_sliding_window(self, value: int) -> None:
710710
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
711711

712-
def add_attention_multiplier(self, value: float) -> None:
713-
self.add_float32(Keys.Attention.MULTIPLIER.format(arch=self.arch), value)
712+
def add_attention_scale(self, value: float) -> None:
713+
self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
714714

715715
def add_pooling_type(self, value: PoolingType) -> None:
716716
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)

0 commit comments

Comments
 (0)