File tree Expand file tree Collapse file tree 1 file changed +7
-6
lines changed Expand file tree Collapse file tree 1 file changed +7
-6
lines changed Original file line number Diff line number Diff line change @@ -202,6 +202,13 @@ int main(int argc, char ** argv) {
202
202
}
203
203
}
204
204
205
+ // if we will use the cache for the full prompt without reaching the end of the cache, force
206
+ // reevaluation of the last token token to recalculate the cached logits
207
+ if (!embd_inp.empty () && n_matching_session_tokens == embd_inp.size () &&
208
+ session_tokens.size () > embd_inp.size ()) {
209
+ session_tokens.resize (embd_inp.size () - 1 );
210
+ }
211
+
205
212
// number of tokens to keep when resetting context
206
213
if (params.n_keep < 0 || params.n_keep > (int ) embd_inp.size () || params.instruct ) {
207
214
params.n_keep = (int )embd_inp.size ();
@@ -360,12 +367,6 @@ int main(int argc, char ** argv) {
360
367
}
361
368
}
362
369
if (i > 0 ) {
363
- // check if we've used up all the prompt but not all cached tokens
364
- if (embd.size () == i && n_session_consumed < (int ) session_tokens.size ()) {
365
- // force revaluation of the last token to recalculate logits
366
- i--;
367
- n_past--;
368
- }
369
370
embd.erase (embd.begin (), embd.begin () + i);
370
371
}
371
372
}
You can’t perform that action at this time.
0 commit comments