Skip to content

Commit 7e1ae37

Browse files
authored
py : fix internlm2-hf convert to gguf (#5305)
* py : fix internlm2-hf convert to gguf * ggml-ci
1 parent 6fdfa2e commit 7e1ae37

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

convert-hf-to-gguf.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,8 +1416,32 @@ def set_vocab(self):
14161416
self.gguf_writer.add_add_space_prefix(add_prefix)
14171417

14181418
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
1419+
old_eos = special_vocab.special_token_ids["eos"]
1420+
if "chat" in os.path.basename(self.dir_model.absolute()):
1421+
# For the chat model, we replace the eos with '<|im_end|>'.
1422+
special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer)
1423+
print(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \
1424+
in chat mode so that the conversation can end normally.")
1425+
14191426
special_vocab.add_to_gguf(self.gguf_writer)
14201427

1428+
def _try_get_sft_eos(self, tokenizer):
1429+
unused_145_list = tokenizer.encode('[UNUSED_TOKEN_145]')
1430+
im_end_list = tokenizer.encode('<|im_end|>')
1431+
assert (len(unused_145_list) == 1) ^ (len(im_end_list) == 1)
1432+
if len(unused_145_list) == 1:
1433+
eos_token = unused_145_list[0]
1434+
if len(im_end_list) == 1:
1435+
eos_token = im_end_list[0]
1436+
return eos_token
1437+
1438+
def _hf_permute_qk(self, weights, n_head: int, n_head_kv: int):
1439+
if n_head_kv is not None and n_head != n_head_kv:
1440+
n_head = n_head_kv
1441+
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
1442+
.swapaxes(1, 2)
1443+
.reshape(weights.shape))
1444+
14211445
def set_gguf_parameters(self):
14221446
self.gguf_writer.add_name("InternLM2")
14231447
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
@@ -1486,8 +1510,9 @@ def write_tensors(self):
14861510
qkv = data_torch
14871511
qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim)
14881512
q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., q_per_kv + 1: q_per_kv + 2, :]
1489-
q = rearrange(q, " o g n i -> o (g n i)").T
1490-
k = rearrange(k, " o g n i -> o (g n i)").T
1513+
# The model weights of q and k equire additional reshape.
1514+
q = self._hf_permute_qk(rearrange(q, " o g n i -> o (g n i)").T, num_heads, num_heads)
1515+
k = self._hf_permute_qk(rearrange(k, " o g n i -> o (g n i)").T, num_heads, num_kv_heads)
14911516
v = rearrange(v, " o g n i -> o (g n i)").T
14921517
self.post_write_tensors(tensor_map, f"model.layers.{bid}.attention.wq.weight", q)
14931518
self.post_write_tensors(tensor_map, f"model.layers.{bid}.attention.wk.weight", k)

0 commit comments

Comments
 (0)