Skip to content

Commit ac188a2

Browse files
committed
Added low level grammar API
1 parent 91bf8fa commit ac188a2

File tree

2 files changed

+1365
-0
lines changed

2 files changed

+1365
-0
lines changed

llama_cpp/llama_cpp.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,23 @@ def llama_sample_temperature(
11571157
_lib.llama_sample_temperature.restype = None
11581158

11591159

1160+
# LLAMA_API void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * candidates, const struct llama_grammar * grammar);
1161+
def llama_sample_grammar(
1162+
ctx: llama_context_p,
1163+
candidates, # type: _Pointer[llama_token_data_array]
1164+
grammar, # type: llama_grammar_p
1165+
):
1166+
return _lib.llama_sample_grammar(ctx, candidates, grammar)
1167+
1168+
1169+
_lib.llama_sample_grammar.argtypes = [
1170+
llama_context_p,
1171+
llama_token_data_array_p,
1172+
llama_grammar_p,
1173+
]
1174+
_lib.llama_sample_grammar.restype = None
1175+
1176+
11601177
# @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
11611178
# @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
11621179
# @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@@ -1244,6 +1261,23 @@ def llama_sample_token(
12441261
_lib.llama_sample_token.restype = llama_token
12451262

12461263

1264+
# /// @details Accepts the sampled token into the grammar
1265+
# LLAMA_API void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token);
1266+
def llama_grammar_accept_token(
1267+
ctx: llama_context_p,
1268+
grammar: llama_grammar_p,
1269+
token: llama_token,
1270+
) -> None:
1271+
_lib.llama_grammar_accept_token(ctx, grammar, token)
1272+
1273+
1274+
_lib.llama_grammar_accept_token.argtypes = [
1275+
llama_context_p,
1276+
llama_grammar_p,
1277+
llama_token,
1278+
]
1279+
_lib.llama_grammar_accept_token.restype = None
1280+
12471281
# Performance information
12481282

12491283

0 commit comments

Comments
 (0)