Skip to content

Commit 66c2c93

Browse files
authored
grammar : fix JSON Schema for string regex with top-level alt. (#9903)
Prior to this commit, using a JSON Schema containing a string with `pattern` regular expression that uses top-level alternation (e.g. `"pattern": "^A|B|C|D$"`) would result in invalid JSON output from the constrained sampling grammar, because it ended up creating a grammar rule like this for the string: ``` thing ::= "\"" "A" | "B" | "C" | "D" "\"" space ``` Note that this rule will only match a starting quote for the "A" case, and will only match an ending quote for the "D" case, so this rule will always produce invalid JSON when used for sampling (that is, the JSON will always be lacking the starting quote, the ending quote, or both). This was fixed in a simple way by adding parentheses to the generated rule (for all string pattern rules, to keep it simple), such that the new generated rule looks like this (correct): ``` thing ::= "\"" ("A" | "B" | "C" | "D") "\"" space ```
1 parent 10433e8 commit 66c2c93

File tree

4 files changed

+20
-7
lines changed

4 files changed

+20
-7
lines changed

common/json-schema-to-grammar.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ class SchemaConverter {
611611
}
612612
return join_seq();
613613
};
614-
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
614+
return _add_rule(name, "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space");
615615
}
616616

617617
/*

examples/json_schema_to_grammar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ def join_seq():
540540
return self._add_rule(
541541
name,
542542
to_rule(transform()) if self._raw_pattern \
543-
else "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space")
543+
else "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space")
544544

545545

546546
def _resolve_ref(self, ref):

examples/server/public/json-schema-to-grammar.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ export class SchemaConverter {
529529
return joinSeq();
530530
};
531531

532-
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
532+
return this._addRule(name, "\"\\\"\" (" + toRule(transform()) + ") \"\\\"\" space")
533533
}
534534

535535
_notStrings(strings) {

tests/test-json-schema-to-grammar.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
696696
"pattern": "^abc?d*efg+(hij)?kl$"
697697
})""",
698698
R"""(
699-
root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
699+
root ::= "\"" ("ab" "c"? "d"* "ef" "g"+ ("hij")? "kl") "\"" space
700700
space ::= | " " | "\n" [ \t]{0,20}
701701
)"""
702702
});
@@ -709,7 +709,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
709709
"pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$"
710710
})""",
711711
R"""(
712-
root ::= "\"" "[]{}()|+*?" "\"" space
712+
root ::= "\"" ("[]{}()|+*?") "\"" space
713713
space ::= | " " | "\n" [ \t]{0,20}
714714
)"""
715715
});
@@ -722,7 +722,20 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
722722
"pattern": "^\"$"
723723
})""",
724724
R"""(
725-
root ::= "\"" "\"" "\"" space
725+
root ::= "\"" ("\"") "\"" space
726+
space ::= | " " | "\n" [ \t]{0,20}
727+
)"""
728+
});
729+
730+
test({
731+
SUCCESS,
732+
"regexp with top-level alternation",
733+
R"""({
734+
"type": "string",
735+
"pattern": "^A|B|C|D$"
736+
})""",
737+
R"""(
738+
root ::= "\"" ("A" | "B" | "C" | "D") "\"" space
726739
space ::= | " " | "\n" [ \t]{0,20}
727740
)"""
728741
});
@@ -736,7 +749,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
736749
})""",
737750
R"""(
738751
dot ::= [^\x0A\x0D]
739-
root ::= "\"" ("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot "\"" space
752+
root ::= "\"" (("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot) "\"" space
740753
root-1 ::= [0-9]
741754
space ::= | " " | "\n" [ \t]{0,20}
742755
)"""

0 commit comments

Comments
 (0)