File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change @@ -8040,7 +8040,7 @@ static int llama_decode_internal(
8040
8040
//
8041
8041
// abs(cell[i0].pos - cell[i1].pos) <= compress_delta
8042
8042
//
8043
- // - move the KV cache to the Host memory for easier maniiplation
8043
+ // - move the KV cache to the host memory for easier manipulation
8044
8044
// - processing is done layer-by-layer
8045
8045
// - convert the KV data to F32
8046
8046
// - merge the KV data (different ways to merge)
@@ -8269,11 +8269,14 @@ static void llama_kv_cache_compress_internal(struct llama_context & lctx) {
8269
8269
}
8270
8270
8271
8271
// copy the KV cache to the host memory and reshuffle the cells to the beginning of the cache
8272
- // removing any empty segments that may have been left by previous KV cache operations
8272
+ // this way we eliminate any empty segments that may have been left by previous KV cache operations
8273
+ //
8273
8274
// TODO: optimizations are possible:
8274
8275
// - multiple threads
8275
8276
// - avoid copying to the host memory when already there
8277
+ //
8276
8278
// TODO: can we do all this on-device?
8279
+ //
8277
8280
static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
8278
8281
auto & kv_self = lctx.kv_self;
8279
8282
You can’t perform that action at this time.
0 commit comments