File tree Expand file tree Collapse file tree 1 file changed +20
-0
lines changed Expand file tree Collapse file tree 1 file changed +20
-0
lines changed Original file line number Diff line number Diff line change @@ -1380,6 +1380,11 @@ def n_vocab(self) -> int:
1380
1380
assert self .ctx is not None
1381
1381
return llama_cpp .llama_n_vocab (self .ctx )
1382
1382
1383
+ def tokenizer (self ) -> "LlamaTokenizer" :
1384
+ """Return the tokenizer for this model."""
1385
+ assert self .ctx is not None
1386
+ return LlamaTokenizer (self )
1387
+
1383
1388
@staticmethod
1384
1389
def token_eos () -> int :
1385
1390
"""Return the end-of-sequence token."""
@@ -1410,3 +1415,18 @@ def longest_token_prefix(a: Sequence[int], b: Sequence[int]):
1410
1415
else :
1411
1416
break
1412
1417
return longest_prefix
1418
+
1419
+
1420
+ class LlamaTokenizer :
1421
+ def __init__ (self , llama : Llama ):
1422
+ self .llama = llama
1423
+
1424
+ def encode (self , text : str ) -> List [int ]:
1425
+ return self .llama .tokenize (text .encode ("utf-8" , errors = "ignore" ))
1426
+
1427
+ def decode (self , tokens : List [int ]) -> str :
1428
+ return self .llama .detokenize (tokens ).decode ("utf-8" , errors = "ignore" )
1429
+
1430
+ @classmethod
1431
+ def from_ggml_file (cls , path : str ) -> "LlamaTokenizer" :
1432
+ return cls (Llama (model_path = path , vocab_only = True ))
You can’t perform that action at this time.
0 commit comments