Skip to content

Commit 7fc7bc3

Browse files
committed
Remove usage of eval_tokens for cache check
1 parent fe331ec commit 7fc7bc3

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

llama_cpp/llama.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -735,10 +735,10 @@ def _create_completion(
735735
try:
736736
cache_item = self.cache[prompt_tokens]
737737
cache_prefix_len = Llama.longest_token_prefix(
738-
cache_item.eval_tokens, prompt_tokens
738+
cache_item.input_ids.tolist(), prompt_tokens
739739
)
740740
eval_prefix_len = Llama.longest_token_prefix(
741-
self.eval_tokens, prompt_tokens
741+
self._input_ids.tolist(), prompt_tokens
742742
)
743743
if cache_prefix_len > eval_prefix_len:
744744
self.load_state(cache_item)

0 commit comments

Comments
 (0)