Skip to content

Commit 136476e

Browse files
Fix prompt cache saving and chat-persistent rollover (#1678)
* Fix prompt cache saving and chat-persistent rollover (fixes #1670) * clang-tidy Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent ffb06a3 commit 136476e

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

examples/main/main.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,13 @@ int main(int argc, char ** argv) {
202202
}
203203
}
204204

205+
// if we will use the cache for the full prompt without reaching the end of the cache, force
206+
// reevaluation of the last token token to recalculate the cached logits
207+
if (!embd_inp.empty() && n_matching_session_tokens == embd_inp.size() &&
208+
session_tokens.size() > embd_inp.size()) {
209+
session_tokens.resize(embd_inp.size() - 1);
210+
}
211+
205212
// number of tokens to keep when resetting context
206213
if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct) {
207214
params.n_keep = (int)embd_inp.size();
@@ -360,12 +367,6 @@ int main(int argc, char ** argv) {
360367
}
361368
}
362369
if (i > 0) {
363-
// check if we've used up all the prompt but not all cached tokens
364-
if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
365-
// force revaluation of the last token to recalculate logits
366-
i--;
367-
n_past--;
368-
}
369370
embd.erase(embd.begin(), embd.begin() + i);
370371
}
371372
}

0 commit comments

Comments
 (0)