Skip to content

Commit 1b6aeb8

Browse files
committed
llama : comments
1 parent d141c74 commit 1b6aeb8

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

llama.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8040,7 +8040,7 @@ static int llama_decode_internal(
80408040
//
80418041
// abs(cell[i0].pos - cell[i1].pos) <= compress_delta
80428042
//
8043-
// - move the KV cache to the Host memory for easier maniiplation
8043+
// - move the KV cache to the host memory for easier manipulation
80448044
// - processing is done layer-by-layer
80458045
// - convert the KV data to F32
80468046
// - merge the KV data (different ways to merge)
@@ -8269,11 +8269,14 @@ static void llama_kv_cache_compress_internal(struct llama_context & lctx) {
82698269
}
82708270

82718271
// copy the KV cache to the host memory and reshuffle the cells to the beginning of the cache
8272-
// removing any empty segments that may have been left by previous KV cache operations
8272+
// this way we eliminate any empty segments that may have been left by previous KV cache operations
8273+
//
82738274
// TODO: optimizations are possible:
82748275
// - multiple threads
82758276
// - avoid copying to the host memory when already there
8277+
//
82768278
// TODO: can we do all this on-device?
8279+
//
82778280
static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
82788281
auto & kv_self = lctx.kv_self;
82798282

0 commit comments

Comments
 (0)