Skip to content

Commit e171663

Browse files
Nexesenexnopperl
andcommitted
Add support for Chameleon ggml-org#8543
Co-Authored-By: nopperl <[email protected]>
1 parent 41ed02f commit e171663

File tree

8 files changed

+347
-2
lines changed

8 files changed

+347
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
608608
if chkhsh == "855059429035d75a914d1eda9f10a876752e281a054a7a3d421ef0533e5b6249":
609609
# ref: https://huggingface.co/HuggingFaceTB/SmolLM-135M
610610
res = "smollm"
611+
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
612+
# ref: https://huggingface.co/facebook/chameleon-7b
613+
res = "chameleon"
611614

612615
if res is None:
613616
logger.warning("\n")
@@ -3751,6 +3754,48 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
37513754
name = name.removeprefix("transformer.")
37523755
return [(self.map_tensor_name(name), data_torch)]
37533756

3757+
3758+
@Model.register("ChameleonForCausalLM")
3759+
class ChameleonModel(Model):
3760+
model_arch = gguf.MODEL_ARCH.CHAMELEON
3761+
3762+
def set_gguf_parameters(self):
3763+
super().set_gguf_parameters()
3764+
self.gguf_writer.add_swin_norm(self.hparams.get("swin_norm", False))
3765+
3766+
def set_vocab(self):
3767+
self._set_vocab_gpt2()
3768+
3769+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3770+
# ignore image tokenizer for now
3771+
# TODO: remove this once image support is implemented for Chameleon
3772+
if name.startswith("model.vqmodel"):
3773+
return []
3774+
3775+
n_head = self.hparams["num_attention_heads"]
3776+
n_kv_head = self.hparams.get("num_key_value_heads")
3777+
hidden_dim = self.hparams.get("hidden_size")
3778+
3779+
if name.endswith(("q_proj.weight", "q_proj.bias")):
3780+
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
3781+
if name.endswith(("k_proj.weight", "k_proj.bias")):
3782+
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
3783+
if name.endswith(("q_norm.weight", "q_norm.bias")):
3784+
data_torch = ChameleonModel._reverse_hf_permute(data_torch, n_head, hidden_dim)
3785+
if name.endswith(("k_norm.weight", "k_norm.bias")):
3786+
data_torch = ChameleonModel._reverse_hf_permute(data_torch, n_kv_head, hidden_dim)
3787+
3788+
return [(self.map_tensor_name(name), data_torch)]
3789+
3790+
# see: https://github.com/huggingface/transformers/blob/72fb02c47dbbe1999ae105319f24631cad6e2e00/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py#L176-L203
3791+
@staticmethod
3792+
def _reverse_hf_permute(data_torch, n_heads, hidden_dim):
3793+
head_dim = hidden_dim // n_heads
3794+
data_torch = data_torch[0].view(2, head_dim // 2).t().reshape(1, -1)
3795+
data_torch = data_torch.repeat_interleave(n_heads, 0)
3796+
return data_torch
3797+
3798+
37543799
###### CONVERSION LOGIC ######
37553800

37563801

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class TOKENIZER_TYPE(IntEnum):
9494
{"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
9595
{"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
9696
{"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
97+
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
9798
]
9899

99100

gguf-py/gguf/constants.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class LLM:
9494
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
9595
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
9696
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
97+
SWIN_NORM = "{arch}.swin_norm"
9798

9899
class Attention:
99100
HEAD_COUNT = "{arch}.attention.head_count"
@@ -219,6 +220,7 @@ class MODEL_ARCH(IntEnum):
219220
T5 = auto()
220221
T5ENCODER = auto()
221222
JAIS = auto()
223+
CHAMELEON = auto()
222224

223225

224226
class MODEL_TENSOR(IntEnum):
@@ -347,6 +349,7 @@ class MODEL_TENSOR(IntEnum):
347349
MODEL_ARCH.T5: "t5",
348350
MODEL_ARCH.T5ENCODER: "t5encoder",
349351
MODEL_ARCH.JAIS: "jais",
352+
MODEL_ARCH.CHAMELEON: "chameleon",
350353
}
351354

352355
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -1065,6 +1068,22 @@ class MODEL_TENSOR(IntEnum):
10651068
MODEL_TENSOR.FFN_GATE,
10661069
MODEL_TENSOR.FFN_UP,
10671070
],
1071+
MODEL_ARCH.CHAMELEON: [
1072+
MODEL_TENSOR.TOKEN_EMBD,
1073+
MODEL_TENSOR.OUTPUT_NORM,
1074+
MODEL_TENSOR.OUTPUT,
1075+
MODEL_TENSOR.ATTN_NORM,
1076+
MODEL_TENSOR.ATTN_Q,
1077+
MODEL_TENSOR.ATTN_Q_NORM,
1078+
MODEL_TENSOR.ATTN_K,
1079+
MODEL_TENSOR.ATTN_K_NORM,
1080+
MODEL_TENSOR.ATTN_V,
1081+
MODEL_TENSOR.ATTN_OUT,
1082+
MODEL_TENSOR.FFN_NORM,
1083+
MODEL_TENSOR.FFN_GATE,
1084+
MODEL_TENSOR.FFN_DOWN,
1085+
MODEL_TENSOR.FFN_UP,
1086+
],
10681087
# TODO
10691088
}
10701089

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,9 @@ def add_expert_shared_count(self, count: int) -> None:
670670
def add_expert_weights_scale(self, value: float) -> None:
671671
self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
672672

673+
def add_swin_norm(self, value: bool) -> None:
674+
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
675+
673676
def add_layer_norm_eps(self, value: float) -> None:
674677
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
675678

gguf-py/gguf/tensor_mapping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ class TensorNameMap:
364364
MODEL_TENSOR.ATTN_Q_NORM: (
365365
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
366366
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
367-
"model.layers.{bid}.self_attn.q_norm", # cohere
367+
"model.layers.{bid}.self_attn.q_norm", # cohere chameleon
368368
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
369369
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
370370
"transformer.layers.{bid}.attn.q_norm", # openelm
@@ -373,7 +373,7 @@ class TensorNameMap:
373373
MODEL_TENSOR.ATTN_K_NORM: (
374374
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
375375
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
376-
"model.layers.{bid}.self_attn.k_norm", # cohere
376+
"model.layers.{bid}.self_attn.k_norm", # cohere chameleon
377377
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
378378
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
379379
"transformer.layers.{bid}.attn.k_norm", # openelm

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ extern "C" {
9393
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
9494
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
9595
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
96+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 23,
9697
};
9798

9899
enum llama_rope_type {

src/llama-vocab.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,20 @@ struct llm_tokenizer_bpe {
660660
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
661661
};
662662
break;
663+
case LLAMA_VOCAB_PRE_TYPE_CHAMELEON:
664+
// Note: in theory, the special token (sentinel and image token) regex_exprs below
665+
// are unnecessary, as they are split in `tokenizer_st_partition` anyway.
666+
// However, since the upstream pre-tokenizer uses them, they are also
667+
// included here (see https://huggingface.co/facebook/chameleon-7b).
668+
regex_exprs = {
669+
"<sentinel:[0-9]+>", // Sentinel tokens
670+
"(IMGIMG)((A|B|C|D|E|F|G|H|I){1,4})Z", // Image tokens
671+
"([\\t\\n]| | )", // directly from tokenizer.json
672+
"\\p{N}", // Individual digits
673+
"[\\p{P}!-/:-@\\[-`{-~]", // Punctuation, Isolated
674+
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
675+
};
676+
break;
663677
default:
664678
// default regex for BPE tokenization pre-processing
665679
regex_exprs = {

0 commit comments

Comments
 (0)