Skip to content

Commit 1b53231

Browse files
committed
llama : remove reference of memory during encode
ggml-ci
1 parent 7ad547b commit 1b53231

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

src/llama-context.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ llama_context::llama_context(
179179
// init the memory module
180180
if (!hparams.vocab_only) {
181181
llama_memory_params params_mem = {
182-
/*.type_k =*/ params.type_k,
183-
/*.type_v =*/ params.type_v,
182+
/*.type_k =*/ params.type_k,
183+
/*.type_v =*/ params.type_v,
184184
};
185185

186186
memory.reset(model.create_memory(cparams, params_mem));
@@ -997,11 +997,9 @@ int llama_context::encode(llama_batch & inp_batch) {
997997
return -1;
998998
}
999999

1000-
llama_kv_cache * kv_self = static_cast<llama_kv_cache *>(memory.get());
1001-
10021000
// temporary allocate memory for the input batch if needed
1003-
// TODO: this is incorrect for multiple sequences because get_pos_max() is the maximum across all sequences
1004-
llama_batch_allocr batch_allocr(inp_batch, inp_batch.pos ? -1 : kv_self->get_pos_max() + 1);
1001+
// note: during encode, we always pass the full sequence starting from pos = 0
1002+
llama_batch_allocr batch_allocr(inp_batch, inp_batch.pos ? -1 : 0);
10051003

10061004
const llama_batch & batch = batch_allocr.batch;
10071005
const int32_t n_tokens = batch.n_tokens;

0 commit comments

Comments
 (0)