Skip to content

Commit 1caa20f

Browse files
committed
convert_hf : reduce usages of UNKNOWN for InternLM2
This makes the changes from #8321 more consistent with the other changes made here.
1 parent afa6119 commit 1caa20f

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2189,7 +2189,7 @@ def set_vocab(self):
21892189
toktype = SentencePieceTokenTypes.BYTE
21902190
# take care of ununsed raw token
21912191
if piece.startswith('[UNUSED'):
2192-
toktype = SentencePieceTokenTypes.UNKNOWN
2192+
toktype = SentencePieceTokenTypes.UNUSED
21932193

21942194
tokens.append(text)
21952195
scores.append(score)
@@ -2219,7 +2219,7 @@ def set_vocab(self):
22192219
if token == chat_eos_token:
22202220
chat_eos_token_id = token_id
22212221
token = token.encode("utf-8")
2222-
if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
2222+
if toktypes[token_id] != SentencePieceTokenTypes.UNUSED:
22232223
assert(tokens[token_id] == token)
22242224
tokens[token_id] = token
22252225
scores[token_id] = -1000.0
@@ -2238,7 +2238,7 @@ def set_vocab(self):
22382238
if token == chat_eos_token:
22392239
chat_eos_token_id = token_id
22402240
token = token.encode("utf-8")
2241-
if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
2241+
if toktypes[token_id] != SentencePieceTokenTypes.UNUSED:
22422242
assert(tokens[token_id] == token)
22432243
tokens[token_id] = token
22442244
scores[token_id] = -1000.0

0 commit comments

Comments
 (0)