Skip to content

Commit fc05540

Browse files
Add fix for adding bos to added special tokens
1 parent d146334 commit fc05540

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,9 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
527527
if token in added_vocab:
528528
# We need to manually encode and decode the added tokens in case special characters
529529
# used for `\n` / `\t` have been manually added in the added tokens
530-
token = tokenizer.decode(tokenizer.encode(token))
530+
if len(token) == 1:
531+
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
532+
531533
if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
532534
toktypes.append(gguf.TokenType.CONTROL)
533535
else:

0 commit comments

Comments
 (0)