@@ -201,25 +201,6 @@ def llama_apply_lora_from_file(
201
201
_lib .llama_apply_lora_from_file .restype = c_int
202
202
203
203
204
- # Returns the KV cache that will contain the context for the
205
- # ongoing prediction with the model.
206
- def llama_get_kv_cache (ctx : llama_context_p ):
207
- return _lib .llama_get_kv_cache (ctx )
208
-
209
-
210
- _lib .llama_get_kv_cache .argtypes = [llama_context_p ]
211
- _lib .llama_get_kv_cache .restype = POINTER (c_uint8 )
212
-
213
-
214
- # Returns the size of the KV cache
215
- def llama_get_kv_cache_size (ctx : llama_context_p ) -> c_size_t :
216
- return _lib .llama_get_kv_cache_size (ctx )
217
-
218
-
219
- _lib .llama_get_kv_cache_size .argtypes = [llama_context_p ]
220
- _lib .llama_get_kv_cache_size .restype = c_size_t
221
-
222
-
223
204
# Returns the number of tokens in the KV cache
224
205
def llama_get_kv_cache_token_count (ctx : llama_context_p ) -> c_int :
225
206
return _lib .llama_get_kv_cache_token_count (ctx )
@@ -229,17 +210,6 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
229
210
_lib .llama_get_kv_cache_token_count .restype = c_int
230
211
231
212
232
- # Sets the KV cache containing the current context for the model
233
- def llama_set_kv_cache (
234
- ctx : llama_context_p , kv_cache , n_size : c_size_t , n_token_count : c_int
235
- ):
236
- return _lib .llama_set_kv_cache (ctx , kv_cache , n_size , n_token_count )
237
-
238
-
239
- _lib .llama_set_kv_cache .argtypes = [llama_context_p , POINTER (c_uint8 ), c_size_t , c_int ]
240
- _lib .llama_set_kv_cache .restype = None
241
-
242
-
243
213
# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
244
214
def llama_get_state_size (ctx : llama_context_p ) -> c_size_t :
245
215
return _lib .llama_get_state_size (ctx )
0 commit comments