@@ -2479,7 +2479,6 @@ static bool llama_kv_cache_init(
2479
2479
static bool llama_kv_cache_find_slot(
2480
2480
struct llama_kv_cache & cache,
2481
2481
const struct llama_batch & batch) {
2482
- const uint32_t n_ctx = cache.size;
2483
2482
const uint32_t n_tokens = batch.n_tokens;
2484
2483
2485
2484
if (cache.recurrent) {
@@ -2530,16 +2529,16 @@ static bool llama_kv_cache_find_slot(
2530
2529
}
2531
2530
// otherwise, one cell per token.
2532
2531
2533
- if (n_tokens > n_ctx ) {
2534
- LLAMA_LOG_ERROR("%s: n_tokens=%d > n_ctx =%d\n", __func__, n_tokens, n_ctx );
2532
+ if (n_tokens > cache.size ) {
2533
+ LLAMA_LOG_ERROR("%s: n_tokens=%d > cache.size =%d\n", __func__, n_tokens, cache.size );
2535
2534
return false;
2536
2535
}
2537
2536
2538
2537
uint32_t n_tested = 0;
2539
2538
2540
2539
while (true) {
2541
- if (cache.head + n_tokens > n_ctx ) {
2542
- n_tested += n_ctx - cache.head;
2540
+ if (cache.head + n_tokens > cache.size ) {
2541
+ n_tested += cache.size - cache.head;
2543
2542
cache.head = 0;
2544
2543
continue;
2545
2544
}
@@ -2558,7 +2557,7 @@ static bool llama_kv_cache_find_slot(
2558
2557
break;
2559
2558
}
2560
2559
2561
- if (n_tested >= n_ctx ) {
2560
+ if (n_tested >= cache.size ) {
2562
2561
//LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens);
2563
2562
return false;
2564
2563
}
0 commit comments