Skip to content

Commit d0c0083

Browse files
committed
Added support for . (any characer) token in grammar engine.
1 parent a143c04 commit d0c0083

File tree

3 files changed

+24
-2
lines changed

3 files changed

+24
-2
lines changed

common/grammar-parser.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,10 @@ namespace grammar_parser {
266266
throw std::runtime_error(std::string("expecting ')' at ") + pos);
267267
}
268268
pos = parse_space(pos + 1, is_nested);
269+
} else if (*pos == '.') { // any char
270+
last_sym_start = out_elements.size();
271+
out_elements.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
272+
pos = parse_space(pos + 1, is_nested);
269273
} else if (*pos == '*') {
270274
pos = parse_space(pos + 1, is_nested);
271275
handle_repetitions(0, -1);
@@ -401,6 +405,7 @@ namespace grammar_parser {
401405
case LLAMA_GRETYPE_CHAR_NOT: return true;
402406
case LLAMA_GRETYPE_CHAR_ALT: return true;
403407
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
408+
case LLAMA_GRETYPE_CHAR_ANY: return true;
404409
default: return false;
405410
}
406411
}
@@ -415,6 +420,7 @@ namespace grammar_parser {
415420
case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
416421
case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
417422
case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
423+
case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
418424
}
419425
switch (elem.type) {
420426
case LLAMA_GRETYPE_END:
@@ -426,6 +432,7 @@ namespace grammar_parser {
426432
case LLAMA_GRETYPE_CHAR_NOT:
427433
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
428434
case LLAMA_GRETYPE_CHAR_ALT:
435+
case LLAMA_GRETYPE_CHAR_ANY:
429436
fprintf(file, "(\"");
430437
print_grammar_char(file, elem.value);
431438
fprintf(file, "\") ");
@@ -483,11 +490,15 @@ namespace grammar_parser {
483490
}
484491
print_grammar_char(file, elem.value);
485492
break;
493+
case LLAMA_GRETYPE_CHAR_ANY:
494+
fprintf(file, ".");
495+
break;
486496
}
487497
if (is_char_element(elem)) {
488498
switch (rule[i + 1].type) {
489499
case LLAMA_GRETYPE_CHAR_ALT:
490500
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
501+
case LLAMA_GRETYPE_CHAR_ANY:
491502
break;
492503
default:
493504
fprintf(file, "] ");

llama.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13640,7 +13640,7 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
1364013640
const uint32_t chr) {
1364113641

1364213642
bool found = false;
13643-
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR;
13643+
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
1364413644

1364513645
GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
1364613646

@@ -13649,6 +13649,10 @@ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
1364913649
// inclusive range, e.g. [a-z]
1365013650
found = found || (pos->value <= chr && chr <= pos[1].value);
1365113651
pos += 2;
13652+
} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
13653+
// Any character matches "."
13654+
found = true;
13655+
pos += 1;
1365213656
} else {
1365313657
// exact char match, e.g. [a] or "a"
1365413658
found = found || pos->value == chr;
@@ -13666,7 +13670,7 @@ static bool llama_grammar_match_partial_char(
1366613670
const llama_grammar_element * pos,
1366713671
const llama_partial_utf8 partial_utf8) {
1366813672

13669-
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR;
13673+
bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
1367013674
GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
1367113675

1367213676
uint32_t partial_value = partial_utf8.value;
@@ -13696,6 +13700,9 @@ static bool llama_grammar_match_partial_char(
1369613700
return is_positive_char;
1369713701
}
1369813702
pos += 2;
13703+
} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
13704+
// Any character matches "."
13705+
return true;
1369913706
} else {
1370013707
// exact char match, e.g. [a] or "a"
1370113708
if (low <= pos->value && pos->value <= high) {
@@ -13756,6 +13763,7 @@ static void llama_grammar_advance_stack(
1375613763
}
1375713764
case LLAMA_GRETYPE_CHAR:
1375813765
case LLAMA_GRETYPE_CHAR_NOT:
13766+
case LLAMA_GRETYPE_CHAR_ANY:
1375913767
if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
1376013768
// only add the stack if it's not a duplicate of one we already have
1376113769
new_stacks.emplace_back(stack);

llama.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,9 @@ extern "C" {
365365
// modifies a preceding LLAMA_GRETYPE_CHAR or
366366
// LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA])
367367
LLAMA_GRETYPE_CHAR_ALT = 6,
368+
369+
// any character (.)
370+
LLAMA_GRETYPE_CHAR_ANY = 7,
368371
};
369372

370373
typedef struct llama_grammar_element {

0 commit comments

Comments
 (0)