Skip to content

Commit 2e4adb4

Browse files
committed
llama : fix integer signedness mixing
1 parent 22504ec commit 2e4adb4

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

src/llama.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3561,7 +3561,7 @@ static bool llama_kv_cache_find_slot(
35613561
}
35623562

35633563
// gather and re-order
3564-
for (int32_t s = 0; s < n_seqs; ++s) {
3564+
for (uint32_t s = 0; s < n_seqs; ++s) {
35653565
int32_t dst_id = s + min;
35663566
int32_t src_id = cache.cells[batch.seq_id[s][0]].tail;
35673567
if (dst_id != src_id) {
@@ -3588,15 +3588,15 @@ static bool llama_kv_cache_find_slot(
35883588
int32_t cell_id = s + min;
35893589
llama_kv_cell & cell = cache.cells[cell_id];
35903590

3591-
if (last_pos != cell.pos + n_seq_tokens) {
3591+
if (last_pos != cell.pos + (llama_pos) n_seq_tokens) {
35923592
// What should happen when the pos backtracks or skips a value?
35933593
// Clearing the state mid-batch would require special-casing which isn't done.
35943594
LLAMA_LOG_WARN("%s: non-consecutive token position %d after %d for sequence %d\n",
35953595
__func__, last_pos, cell.pos, batch.seq_id[s][0]);
35963596
}
35973597
cell.pos = last_pos;
35983598
cell.seq_id.clear();
3599-
for (uint32_t j = 0; j < batch.n_seq_id[s]; ++ j) {
3599+
for (int32_t j = 0; j < batch.n_seq_id[s]; ++j) {
36003600
const llama_seq_id seq_id = batch.seq_id[s][j];
36013601
cell.seq_id.insert(seq_id);
36023602
cache.cells[seq_id].tail = cell_id;
@@ -3803,7 +3803,7 @@ static void llama_kv_cache_seq_keep(struct llama_kv_cache & cache, llama_seq_id
38033803
uint32_t new_head = cache.size;
38043804

38053805
for (uint32_t i = 0; i < cache.size; ++i) {
3806-
if (cache.recurrent && i != seq_id) {
3806+
if (cache.recurrent && (llama_seq_id) i != seq_id) {
38073807
cache.cells[i].tail = -1;
38083808
}
38093809
if (!cache.cells[i].has_seq_id(seq_id)) {
@@ -8992,6 +8992,7 @@ static struct ggml_tensor * llm_build_mamba(
89928992

89938993
// {n_embd, n_seq_tokens, n_seqs} => {n_embd, n_tokens}
89948994
cur = ggml_reshape_2d(ctx, cur, cur->ne[0], n_seq_tokens * n_seqs);
8995+
cb(cur, "mamba_out", il);
89958996

89968997
return cur;
89978998
}

0 commit comments

Comments
 (0)