Skip to content

Commit abde521

Browse files
committed
llama : don't attempt to serialize empty KV cache
Signed-off-by: Jared Van Bortel <[email protected]>
1 parent 7118c15 commit abde521

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

llama.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10850,7 +10850,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
1085010850
data_ctx->write(&kv_size, sizeof(kv_size));
1085110851
data_ctx->write(&kv_used, sizeof(kv_used));
1085210852

10853-
if (kv_buf_size) {
10853+
if (kv_buf_size && kv_head) {
1085410854
const size_t elt_size = ggml_element_size(kv_self.k_l[0]);
1085510855

1085610856
std::vector<uint8_t> tmp_buf;
@@ -10961,9 +10961,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
1096110961
memcpy(&kv_size, inp, sizeof(kv_size)); inp += sizeof(kv_size);
1096210962
memcpy(&kv_used, inp, sizeof(kv_used)); inp += sizeof(kv_used);
1096310963

10964-
if (kv_buf_size) {
10965-
GGML_ASSERT(kv_self.total_size() == kv_buf_size);
10964+
GGML_ASSERT(kv_self.total_size() == kv_buf_size);
1096610965

10966+
if (kv_buf_size && kv_head) {
1096710967
const size_t elt_size = ggml_element_size(kv_self.k_l[0]);
1096810968

1096910969
for (int il = 0; il < (int) n_layer; ++il) {

0 commit comments

Comments
 (0)