Skip to content

Commit 04959f1

Browse files
committed
feat: Update llama_cpp.py bindings
1 parent 35c980e commit 04959f1

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,11 +468,13 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
468468
# LLAMA_POOLING_TYPE_NONE = 0,
469469
# LLAMA_POOLING_TYPE_MEAN = 1,
470470
# LLAMA_POOLING_TYPE_CLS = 2,
471+
# LLAMA_POOLING_TYPE_LAST = 3,
471472
# };
472473
LLAMA_POOLING_TYPE_UNSPECIFIED = -1
473474
LLAMA_POOLING_TYPE_NONE = 0
474475
LLAMA_POOLING_TYPE_MEAN = 1
475476
LLAMA_POOLING_TYPE_CLS = 2
477+
LLAMA_POOLING_TYPE_LAST = 3
476478

477479
# enum llama_split_mode {
478480
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
@@ -761,7 +763,6 @@ class llama_model_params(ctypes.Structure):
761763

762764
# enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
763765
# enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
764-
# // (ignored if no pooling layer)
765766

766767
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
767768
# float rope_freq_base; // RoPE base frequency, 0 = from model
@@ -2316,6 +2317,16 @@ def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
23162317
...
23172318

23182319

2320+
# // Set whether the model is in embeddings model or not
2321+
# // If true, embeddings will be returned but logits will not
2322+
# LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
2323+
@ctypes_function("llama_set_embeddings", [llama_context_p_ctypes, ctypes.c_bool], None)
2324+
def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /):
2325+
"""Set whether the model is in embeddings model or not
2326+
If true, embeddings will be returned but logits will not"""
2327+
...
2328+
2329+
23192330
# // Set whether to use causal attention or not
23202331
# // If set to true, the model will only attend to the past tokens
23212332
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);

0 commit comments

Comments
 (0)