File tree Expand file tree Collapse file tree 1 file changed +4
-6
lines changed Expand file tree Collapse file tree 1 file changed +4
-6
lines changed Original file line number Diff line number Diff line change @@ -179,8 +179,8 @@ llama_context::llama_context(
179
179
// init the memory module
180
180
if (!hparams.vocab_only ) {
181
181
llama_memory_params params_mem = {
182
- /* .type_k =*/ params.type_k ,
183
- /* .type_v =*/ params.type_v ,
182
+ /* .type_k =*/ params.type_k ,
183
+ /* .type_v =*/ params.type_v ,
184
184
};
185
185
186
186
memory.reset (model.create_memory (cparams, params_mem));
@@ -997,11 +997,9 @@ int llama_context::encode(llama_batch & inp_batch) {
997
997
return -1 ;
998
998
}
999
999
1000
- llama_kv_cache * kv_self = static_cast <llama_kv_cache *>(memory.get ());
1001
-
1002
1000
// temporary allocate memory for the input batch if needed
1003
- // TODO: this is incorrect for multiple sequences because get_pos_max() is the maximum across all sequences
1004
- llama_batch_allocr batch_allocr (inp_batch, inp_batch.pos ? -1 : kv_self-> get_pos_max () + 1 );
1001
+ // note: during encode, we always pass the full sequence starting from pos = 0
1002
+ llama_batch_allocr batch_allocr (inp_batch, inp_batch.pos ? -1 : 0 );
1005
1003
1006
1004
const llama_batch & batch = batch_allocr.batch ;
1007
1005
const int32_t n_tokens = batch.n_tokens ;
You can’t perform that action at this time.
0 commit comments