14
14
class BaseLlamaTokenizer (abc .ABC ):
15
15
@abc .abstractmethod
16
16
def tokenize (
17
- self , text : bytes , add_bos : bool = True , special : bool = True
17
+ self , vocab : llama_cpp . llama_vocab_p , text : bytes , add_bos : bool = True , special : bool = True
18
18
) -> List [int ]:
19
19
"""Tokenize the text into tokens.
20
20
@@ -28,6 +28,7 @@ def tokenize(
28
28
@abc .abstractmethod
29
29
def detokenize (
30
30
self ,
31
+ vocab :llama_cpp .llama_vocab_p ,
31
32
tokens : List [int ],
32
33
prev_tokens : Optional [List [int ]] = None ,
33
34
special : bool = False ,
@@ -47,17 +48,18 @@ def __init__(self, llama: llama_cpp.Llama):
47
48
self ._model = llama ._model # type: ignore
48
49
49
50
def tokenize (
50
- self , text : bytes , add_bos : bool = True , special : bool = True
51
+ self , vocab : llama_cpp . llama_vocab_p , text : bytes , add_bos : bool = True , special : bool = True
51
52
) -> List [int ]:
52
- return self ._model .tokenize (text , add_bos = add_bos , special = special )
53
+ return self ._model .tokenize (vocab , text , add_bos = add_bos , special = special )
53
54
54
55
def detokenize (
55
56
self ,
57
+ vocab :llama_cpp .llama_vocab_p ,
56
58
tokens : List [int ],
57
59
prev_tokens : Optional [List [int ]] = None ,
58
60
special : bool = False ,
59
61
) -> bytes :
60
- return self ._model .detokenize (tokens , special = special )
62
+ return self ._model .detokenize (vocab , tokens , special = special )
61
63
62
64
def encode (
63
65
self , text : str , add_bos : bool = True , special : bool = True
0 commit comments