Skip to content

Commit db2a845

Browse files
committed
Add the vocab params fix patch
1 parent e9edb78 commit db2a845

File tree

4 files changed

+18
-15
lines changed

4 files changed

+18
-15
lines changed

llama_cpp/llama.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,7 @@ def logit_bias_processor(
14301430
)
14311431
)
14321432
top_logprob = {
1433-
self.detokenize([i]).decode(
1433+
self.detokenize(self._vocab, [i]).decode(
14341434
"utf-8", errors="ignore"
14351435
): logprob
14361436
for logprob, i in sorted_logprobs[:logprobs]
@@ -1559,6 +1559,7 @@ def logit_bias_processor(
15591559
)
15601560
text_offset = len(prompt) + len(
15611561
self.detokenize(
1562+
self._vocab,
15621563
completion_tokens[:returned_tokens],
15631564
prev_tokens=prompt_tokens
15641565
+ completion_tokens[:returned_tokens],
@@ -1574,7 +1575,7 @@ def logit_bias_processor(
15741575
)
15751576
)
15761577
top_logprob = {
1577-
self.detokenize([i]).decode("utf-8", errors="ignore"): logprob
1578+
self.detokenize(self._vocab, [i]).decode("utf-8", errors="ignore"): logprob
15781579
for logprob, i in sorted_logprobs[:logprobs]
15791580
}
15801581
top_logprob.update({token_str: current_logprobs[int(token)]})
@@ -1617,7 +1618,7 @@ def logit_bias_processor(
16171618
"model": model_name,
16181619
"choices": [
16191620
{
1620-
"text": self.detokenize([token]).decode(
1621+
"text": self.detokenize(self._vocab, [token]).decode(
16211622
"utf-8", errors="ignore"
16221623
),
16231624
"index": 0,
@@ -1680,7 +1681,7 @@ def logit_bias_processor(
16801681
all_tokens = completion_tokens
16811682

16821683
all_token_strs = [
1683-
self.detokenize([token], prev_tokens=all_tokens[:i]).decode(
1684+
self.detokenize(self._vocab, [token], prev_tokens=all_tokens[:i]).decode(
16841685
"utf-8", errors="ignore"
16851686
)
16861687
for i, token in enumerate(all_tokens)
@@ -1695,7 +1696,7 @@ def logit_bias_processor(
16951696
text_offsets.append(
16961697
text_offset
16971698
+ len(
1698-
self.detokenize(all_tokens[:idx]).decode(
1699+
self.detokenize(self._vocab, all_tokens[:idx]).decode(
16991700
"utf-8", errors="ignore"
17001701
)
17011702
)
@@ -1708,7 +1709,7 @@ def logit_bias_processor(
17081709
)
17091710
token_logprobs.append(logprobs_token[int(token)])
17101711
top_logprob: Optional[Dict[str, float]] = {
1711-
self.detokenize([i], prev_tokens=all_tokens[:idx]).decode(
1712+
self.detokenize(self._vocab, [i], prev_tokens=all_tokens[:idx]).decode(
17121713
"utf-8", errors="ignore"
17131714
): logprob
17141715
for logprob, i in sorted_logprobs[:logprobs]

llama_cpp/llama_chat_format.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ def chat_completion_handler(
594594
tool_choice=tool_choice,
595595
)
596596
prompt = llama.tokenize(
597-
vocab=llama.llama_model_get_vocab(model),
597+
vocab=llama.llama_model_get_vocab(llama.model),
598598
text=result.prompt.encode("utf-8"),
599599
add_bos=not result.added_special,
600600
special=True,
@@ -2813,8 +2813,8 @@ def __call__(
28132813
text = template.render(
28142814
messages=messages,
28152815
add_generation_prompt=True,
2816-
eos_token=llama.detokenize([llama.token_eos()]),
2817-
bos_token=llama.detokenize([llama.token_bos()]),
2816+
eos_token=llama.detokenize(vocab=llama.llama_model_get_vocab(llama.model), tokens=[llama.token_eos()]),
2817+
bos_token=llama.detokenize(vocab=llama.llama_model_get_vocab(llama.model), tokens=[llama.token_bos()]),
28182818
)
28192819
split_text = self.split_text_on_image_urls(text, image_urls)
28202820

@@ -2828,7 +2828,8 @@ def __call__(
28282828
for type_, value in split_text:
28292829
if type_ == "text":
28302830
tokens = llama.tokenize(
2831-
value.encode("utf8"), add_bos=False, special=True
2831+
vocab=llama.llama_model_get_vocab(llama.model),
2832+
text=value.encode("utf8"), add_bos=False, special=True
28322833
)
28332834
if llama.n_tokens + len(tokens) > llama.n_ctx():
28342835
raise ValueError(

llama_cpp/llama_tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,14 @@ def detokenize(
6262
return self._model.detokenize(vocab, tokens, special=special)
6363

6464
def encode(
65-
self, text: str, add_bos: bool = True, special: bool = True
65+
self, vocab:llama_cpp.llama_vocab_p, text: str, add_bos: bool = True, special: bool = True
6666
) -> List[int]:
6767
return self.tokenize(
68-
text.encode("utf-8", errors="ignore"), add_bos=add_bos, special=special
68+
vocab, text.encode("utf-8", errors="ignore"), add_bos=add_bos, special=special
6969
)
7070

71-
def decode(self, tokens: List[int]) -> str:
72-
return self.detokenize(tokens).decode("utf-8", errors="ignore")
71+
def decode(self, vocab:llama_cpp.llama_vocab_p, tokens: List[int]) -> str:
72+
return self.detokenize(vocab, tokens).decode("utf-8", errors="ignore")
7373

7474
@classmethod
7575
def from_ggml_file(cls, path: str) -> "LlamaTokenizer":

llama_cpp/server/app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,10 @@ def _logit_bias_tokens_to_input_ids(
227227
logit_bias: Dict[str, float],
228228
) -> Dict[str, float]:
229229
to_bias: Dict[str, float] = {}
230+
vocab=llama.llama_model_get_vocab(llama.model)
230231
for token, score in logit_bias.items():
231232
token = token.encode("utf-8")
232-
for input_id in llama.tokenize(token, add_bos=False, special=True):
233+
for input_id in llama.tokenize(vocab, token, add_bos=False, special=True):
233234
to_bias[str(input_id)] = score
234235
return to_bias
235236

0 commit comments

Comments
 (0)