Skip to content

Commit baec8ff

Browse files
committed
add missing params in llama_tokenizer.py
1 parent 1162207 commit baec8ff

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

llama_cpp/llama_tokenizer.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
class BaseLlamaTokenizer(abc.ABC):
1515
@abc.abstractmethod
1616
def tokenize(
17-
self, text: bytes, add_bos: bool = True, special: bool = True
17+
self, vocab:llama_cpp.llama_vocab_p, text: bytes, add_bos: bool = True, special: bool = True
1818
) -> List[int]:
1919
"""Tokenize the text into tokens.
2020
@@ -28,6 +28,7 @@ def tokenize(
2828
@abc.abstractmethod
2929
def detokenize(
3030
self,
31+
vocab:llama_cpp.llama_vocab_p,
3132
tokens: List[int],
3233
prev_tokens: Optional[List[int]] = None,
3334
special: bool = False,
@@ -47,17 +48,18 @@ def __init__(self, llama: llama_cpp.Llama):
4748
self._model = llama._model # type: ignore
4849

4950
def tokenize(
50-
self, text: bytes, add_bos: bool = True, special: bool = True
51+
self, vocab:llama_cpp.llama_vocab_p, text: bytes, add_bos: bool = True, special: bool = True
5152
) -> List[int]:
52-
return self._model.tokenize(text, add_bos=add_bos, special=special)
53+
return self._model.tokenize(vocab, text, add_bos=add_bos, special=special)
5354

5455
def detokenize(
5556
self,
57+
vocab:llama_cpp.llama_vocab_p,
5658
tokens: List[int],
5759
prev_tokens: Optional[List[int]] = None,
5860
special: bool = False,
5961
) -> bytes:
60-
return self._model.detokenize(tokens, special=special)
62+
return self._model.detokenize(vocab, tokens, special=special)
6163

6264
def encode(
6365
self, text: str, add_bos: bool = True, special: bool = True

0 commit comments

Comments
 (0)