Skip to content

Commit 774e9f5

Browse files
committed
Added support for . (any characer) token in grammar engine.
1 parent 7672ade commit 774e9f5

File tree

3 files changed

+24
-2
lines changed

3 files changed

+24
-2
lines changed

common/grammar-parser.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,10 @@ namespace grammar_parser {
197197
throw std::runtime_error(std::string("expecting ')' at ") + pos);
198198
}
199199
pos = parse_space(pos + 1, is_nested);
200+
} else if (*pos == '.') { // any char
201+
last_sym_start = out_elements.size();
202+
out_elements.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
203+
pos = parse_space(pos + 1, is_nested);
200204
} else if (*pos == '*' || *pos == '+' || *pos == '?') { // repetition operator
201205
if (last_sym_start == out_elements.size()) {
202206
throw std::runtime_error(std::string("expecting preceding item to */+/? at ") + pos);
@@ -325,6 +329,7 @@ namespace grammar_parser {
325329
case LLAMA_GRETYPE_CHAR_NOT: return true;
326330
case LLAMA_GRETYPE_CHAR_ALT: return true;
327331
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
332+
case LLAMA_GRETYPE_CHAR_ANY: return true;
328333
default: return false;
329334
}
330335
}
@@ -339,6 +344,7 @@ namespace grammar_parser {
339344
case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
340345
case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
341346
case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
347+
case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
342348
}
343349
switch (elem.type) {
344350
case LLAMA_GRETYPE_END:
@@ -350,6 +356,7 @@ namespace grammar_parser {
350356
case LLAMA_GRETYPE_CHAR_NOT:
351357
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
352358
case LLAMA_GRETYPE_CHAR_ALT:
359+
case LLAMA_GRETYPE_CHAR_ANY:
353360
fprintf(file, "(\"");
354361
print_grammar_char(file, elem.value);
355362
fprintf(file, "\") ");
@@ -407,11 +414,15 @@ namespace grammar_parser {
407414
}
408415
print_grammar_char(file, elem.value);
409416
break;
417+
case LLAMA_GRETYPE_CHAR_ANY:
418+
fprintf(file, ".");
419+
break;
410420
}
411421
if (is_char_element(elem)) {
412422
switch (rule[i + 1].type) {
413423
case LLAMA_GRETYPE_CHAR_ALT:
414424
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
425+
case LLAMA_GRETYPE_CHAR_ANY:
415426
break;
416427
default:
417428
fprintf(file, "] ");

llama.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13631,7 +13631,7 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
1363113631
const uint32_t chr) {
1363213632

1363313633
bool found = false;
13634-
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR;
13634+
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
1363513635

1363613636
GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
1363713637

@@ -13640,6 +13640,10 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
1364013640
// inclusive range, e.g. [a-z]
1364113641
found = found || (pos->value <= chr && chr <= pos[1].value);
1364213642
pos += 2;
13643+
} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
13644+
// Any character matches "."
13645+
found = true;
13646+
pos += 1;
1364313647
} else {
1364413648
// exact char match, e.g. [a] or "a"
1364513649
found = found || pos->value == chr;
@@ -13657,7 +13661,7 @@ static bool llama_grammar_match_partial_char(
1365713661
const llama_grammar_element * pos,
1365813662
const llama_partial_utf8 partial_utf8) {
1365913663

13660-
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR;
13664+
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
1366113665
GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
1366213666

1366313667
uint32_t partial_value = partial_utf8.value;
@@ -13687,6 +13691,9 @@ static bool llama_grammar_match_partial_char(
1368713691
return is_positive_char;
1368813692
}
1368913693
pos += 2;
13694+
} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
13695+
// Any character matches "."
13696+
return true;
1369013697
} else {
1369113698
// exact char match, e.g. [a] or "a"
1369213699
if (low <= pos->value && pos->value <= high) {
@@ -13747,6 +13754,7 @@ static void llama_grammar_advance_stack(
1374713754
}
1374813755
case LLAMA_GRETYPE_CHAR:
1374913756
case LLAMA_GRETYPE_CHAR_NOT:
13757+
case LLAMA_GRETYPE_CHAR_ANY:
1375013758
if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
1375113759
// only add the stack if it's not a duplicate of one we already have
1375213760
new_stacks.emplace_back(stack);

llama.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,9 @@ extern "C" {
365365
// modifies a preceding LLAMA_GRETYPE_CHAR or
366366
// LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA])
367367
LLAMA_GRETYPE_CHAR_ALT = 6,
368+
369+
// any character (.)
370+
LLAMA_GRETYPE_CHAR_ANY = 7,
368371
};
369372

370373
typedef struct llama_grammar_element {

0 commit comments

Comments
 (0)