@@ -1416,8 +1416,32 @@ def set_vocab(self):
1416
1416
self .gguf_writer .add_add_space_prefix (add_prefix )
1417
1417
1418
1418
special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
1419
+ old_eos = special_vocab .special_token_ids ["eos" ]
1420
+ if "chat" in os .path .basename (self .dir_model .absolute ()):
1421
+ # For the chat model, we replace the eos with '<|im_end|>'.
1422
+ special_vocab .special_token_ids ["eos" ] = self ._try_get_sft_eos (tokenizer )
1423
+ print (f"Replace eos:{ old_eos } with a special token:{ special_vocab .special_token_ids ['eos' ]} \
1424
+ in chat mode so that the conversation can end normally." )
1425
+
1419
1426
special_vocab .add_to_gguf (self .gguf_writer )
1420
1427
1428
+ def _try_get_sft_eos (self , tokenizer ):
1429
+ unused_145_list = tokenizer .encode ('[UNUSED_TOKEN_145]' )
1430
+ im_end_list = tokenizer .encode ('<|im_end|>' )
1431
+ assert (len (unused_145_list ) == 1 ) ^ (len (im_end_list ) == 1 )
1432
+ if len (unused_145_list ) == 1 :
1433
+ eos_token = unused_145_list [0 ]
1434
+ if len (im_end_list ) == 1 :
1435
+ eos_token = im_end_list [0 ]
1436
+ return eos_token
1437
+
1438
+ def _hf_permute_qk (self , weights , n_head : int , n_head_kv : int ):
1439
+ if n_head_kv is not None and n_head != n_head_kv :
1440
+ n_head = n_head_kv
1441
+ return (weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
1442
+ .swapaxes (1 , 2 )
1443
+ .reshape (weights .shape ))
1444
+
1421
1445
def set_gguf_parameters (self ):
1422
1446
self .gguf_writer .add_name ("InternLM2" )
1423
1447
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
@@ -1486,8 +1510,9 @@ def write_tensors(self):
1486
1510
qkv = data_torch
1487
1511
qkv = rearrange (qkv .T , " o (g n i) ->o g n i" , g = num_groups , n = q_per_kv + 2 , i = head_dim )
1488
1512
q , k , v = qkv [..., : q_per_kv , :], qkv [..., q_per_kv : q_per_kv + 1 , :], qkv [..., q_per_kv + 1 : q_per_kv + 2 , :]
1489
- q = rearrange (q , " o g n i -> o (g n i)" ).T
1490
- k = rearrange (k , " o g n i -> o (g n i)" ).T
1513
+ # The model weights of q and k equire additional reshape.
1514
+ q = self ._hf_permute_qk (rearrange (q , " o g n i -> o (g n i)" ).T , num_heads , num_heads )
1515
+ k = self ._hf_permute_qk (rearrange (k , " o g n i -> o (g n i)" ).T , num_heads , num_kv_heads )
1491
1516
v = rearrange (v , " o g n i -> o (g n i)" ).T
1492
1517
self .post_write_tensors (tensor_map , f"model.layers.{ bid } .attention.wq.weight" , q )
1493
1518
self .post_write_tensors (tensor_map , f"model.layers.{ bid } .attention.wk.weight" , k )
0 commit comments