Skip to content

Commit 8083e20

Browse files
committed
More vocab conversion fixes
1 parent 08959c8 commit 8083e20

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

convert-llama-ggmlv3-to-gguf.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,15 +181,15 @@ def add_vocab(self, gguf_writer):
181181
print(f'* Adding {hp.n_vocab} vocab item(s)')
182182
toktypes = []
183183
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
184-
tt = 1
185-
if len(vbytes) > 0 and vbytes[0] == 32:
186-
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
187-
elif len(vbytes) == 0:
188-
tt = 3
184+
tt = 1 # Normal
185+
if len(vbytes) == 0:
186+
tt = 3 # Control
189187
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
190188
hv = hex(vbytes[0])[2:].upper()
191189
vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8')
192-
tt = 6
190+
tt = 6 # Byte
191+
else:
192+
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
193193
toktypes.append(tt)
194194
tokens.append(vbytes)
195195
scores.append(vscore)

0 commit comments

Comments
 (0)