Update llama.cpp

abetlen · abetlen · commit 02cf88131781 · 2023-04-24T09:30:10.000-04:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -201,25 +201,6 @@ def llama_apply_lora_from_file(
 _lib.llama_apply_lora_from_file.restype = c_int
 
 
-# Returns the KV cache that will contain the context for the
-# ongoing prediction with the model.
-def llama_get_kv_cache(ctx: llama_context_p):
-    return _lib.llama_get_kv_cache(ctx)
-
-
-_lib.llama_get_kv_cache.argtypes = [llama_context_p]
-_lib.llama_get_kv_cache.restype = POINTER(c_uint8)
-
-
-# Returns the size of the KV cache
-def llama_get_kv_cache_size(ctx: llama_context_p) -> c_size_t:
-    return _lib.llama_get_kv_cache_size(ctx)
-
-
-_lib.llama_get_kv_cache_size.argtypes = [llama_context_p]
-_lib.llama_get_kv_cache_size.restype = c_size_t
-
-
 # Returns the number of tokens in the KV cache
 def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
     return _lib.llama_get_kv_cache_token_count(ctx)
@@ -229,17 +210,6 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
 _lib.llama_get_kv_cache_token_count.restype = c_int
 
 
-# Sets the KV cache containing the current context for the model
-def llama_set_kv_cache(
-    ctx: llama_context_p, kv_cache, n_size: c_size_t, n_token_count: c_int
-):
-    return _lib.llama_set_kv_cache(ctx, kv_cache, n_size, n_token_count)
-
-
-_lib.llama_set_kv_cache.argtypes = [llama_context_p, POINTER(c_uint8), c_size_t, c_int]
-_lib.llama_set_kv_cache.restype = None
-
-
 # Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
 def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
     return _lib.llama_get_state_size(ctx)
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 0e018fe008eacebdbcfa2d61b6c988c245c961cd
+Subproject commit c4fe84fb0d28851a5c10e5a633f82ae2ba3b7fae