Skip to content

Commit 757c4df

Browse files
author
ochafik
committed
json: unified properties order across optional & required
1 parent 139cc62 commit 757c4df

File tree

5 files changed

+147
-103
lines changed

5 files changed

+147
-103
lines changed

common/json-schema-to-grammar.cpp

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -700,10 +700,9 @@ class SchemaConverter {
700700
const std::string & name,
701701
const json & additional_properties)
702702
{
703-
std::vector<std::string> required_props;
704-
std::vector<std::string> optional_props;
705-
std::unordered_map<std::string, std::string> prop_kv_rule_names;
706703
std::vector<std::string> prop_names;
704+
prop_names.reserve(properties.size() + 1);
705+
std::unordered_map<std::string, std::string> prop_kv_rule_names;
707706
for (const auto & kv : properties) {
708707
const auto &prop_name = kv.first;
709708
const auto &prop_schema = kv.second;
@@ -713,11 +712,6 @@ class SchemaConverter {
713712
name + (name.empty() ? "" : "-") + prop_name + "-kv",
714713
format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name
715714
);
716-
if (required.find(prop_name) != required.end()) {
717-
required_props.push_back(prop_name);
718-
} else {
719-
optional_props.push_back(prop_name);
720-
}
721715
prop_names.push_back(prop_name);
722716
}
723717
if ((additional_properties.is_boolean() && additional_properties.get<bool>()) || additional_properties.is_object()) {
@@ -731,35 +725,27 @@ class SchemaConverter {
731725
: _add_rule(sub_name + "-k", _not_strings(prop_names));
732726
std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
733727
prop_kv_rule_names["*"] = kv_rule;
734-
optional_props.push_back("*");
728+
prop_names.push_back("*");
735729
}
736730

737731
std::string rule = "\"{\" space ";
738-
for (size_t i = 0; i < required_props.size(); i++) {
739-
if (i > 0) {
740-
rule += " \",\" space ";
741-
}
742-
rule += prop_kv_rule_names[required_props[i]];
743-
}
744-
745-
if (!optional_props.empty()) {
746-
rule += " (";
747-
if (!required_props.empty()) {
748-
rule += " \",\" space ( ";
749-
}
750-
732+
if (!prop_kv_rule_names.empty()) {
751733
std::function<std::string(const std::vector<std::string> &, bool)> get_recursive_refs = [&](const std::vector<std::string> & ks, bool first_is_optional) {
752734
std::string res;
753735
if (ks.empty()) {
754736
return res;
755737
}
756738
std::string k = ks[0];
757739
std::string kv_rule_name = prop_kv_rule_names[k];
758-
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
740+
std::string comma_ref = "\",\" space " + kv_rule_name;
759741
if (first_is_optional) {
760-
res = comma_ref + (k == "*" ? "*" : "?");
742+
if (required.find(k) == required.end()) {
743+
res = "( " + comma_ref + (k == "*" ? " )*" : " )?");
744+
} else {
745+
res = comma_ref;
746+
}
761747
} else {
762-
res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
748+
res = kv_rule_name + (k == "*" ? " ( " + comma_ref + " )*" : "");
763749
}
764750
if (ks.size() > 1) {
765751
res += " " + _add_rule(
@@ -770,16 +756,21 @@ class SchemaConverter {
770756
return res;
771757
};
772758

773-
for (size_t i = 0; i < optional_props.size(); i++) {
774-
if (i > 0) {
775-
rule += " | ";
759+
std::vector<std::string> alternatives;
760+
auto has_required = false;
761+
for (size_t i = 0; i < prop_names.size(); i++) {
762+
alternatives.push_back(get_recursive_refs(std::vector<std::string>(prop_names.begin() + i, prop_names.end()), false));
763+
if (required.find(prop_names[i]) != required.end()) {
764+
has_required = true;
765+
break;
776766
}
777-
rule += get_recursive_refs(std::vector<std::string>(optional_props.begin() + i, optional_props.end()), false);
778767
}
779-
if (!required_props.empty()) {
780-
rule += " )";
768+
auto alts = join(alternatives.begin(), alternatives.end(), " | ");
769+
if (alternatives.size() > 1 || !has_required) {
770+
rule += "( " + alts + (has_required ? " )" : " )?");
771+
} else {
772+
rule += alts;
781773
}
782-
rule += " )?";
783774
}
784775

785776
rule += " \"}\" space";

examples/json_schema_to_grammar.py

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
from typing import Any, List, Optional, Set, Tuple, Union
88

9+
910
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
1011

1112
if min_items == 0 and max_items == 1:
@@ -677,63 +678,68 @@ def _add_primitive(self, name: str, rule: BuiltinRule):
677678
return n
678679

679680
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
680-
prop_order = self._prop_order
681-
# sort by position in prop_order (if specified) then by original order
682-
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
683-
684681
prop_kv_rule_names = {}
682+
prop_names = []
685683
for prop_name, prop_schema in properties:
686684
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
687685
prop_kv_rule_names[prop_name] = self._add_rule(
688686
f'{name}{"-" if name else ""}{prop_name}-kv',
689687
fr'{self._format_literal(json.dumps(prop_name))} space ":" space {prop_rule_name}'
690688
)
691-
required_props = [k for k in sorted_props if k in required]
692-
optional_props = [k for k in sorted_props if k not in required]
689+
prop_names.append(prop_name)
690+
691+
prop_order = self._prop_order
692+
if prop_order:
693+
# sort by position in prop_order (if specified) then by original order
694+
prop_names.sort(key=lambda k: (prop_order.get(k, float('inf')), prop_names.index(k)))
693695

694696
if additional_properties is not None and additional_properties != False:
695697
sub_name = f'{name}{"-" if name else ""}additional'
696698
value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
697699
self._add_primitive('value', PRIMITIVE_RULES['value'])
698-
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
699-
else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
700+
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not prop_names \
701+
else self._add_rule(f'{sub_name}-k', self._not_strings(prop_names))
700702

701703
prop_kv_rule_names["*"] = self._add_rule(
702704
f'{sub_name}-kv',
703705
f'{key_rule} ":" space {value_rule}'
704706
)
705-
optional_props.append("*")
707+
prop_names.append("*")
706708

707709
rule = '"{" space '
708-
rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props)
709-
710-
if optional_props:
711-
rule += ' ('
712-
if required_props:
713-
rule += ' "," space ( '
714710

711+
if prop_kv_rule_names:
715712
def get_recursive_refs(ks, first_is_optional):
716713
[k, *rest] = ks
717714
kv_rule_name = prop_kv_rule_names[k]
718-
comma_ref = f'( "," space {kv_rule_name} )'
715+
comma_ref = f'"," space {kv_rule_name}'
719716
if first_is_optional:
720-
res = comma_ref + ('*' if k == '*' else '?')
717+
if k not in required:
718+
res = '( ' + comma_ref + (' )*' if k == '*' else ' )?')
719+
else:
720+
res = comma_ref
721721
else:
722-
res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
722+
res = kv_rule_name + (' ( ' + comma_ref + " )*" if k == '*' else '')
723723
if len(rest) > 0:
724724
res += ' ' + self._add_rule(
725725
f'{name}{"-" if name else ""}{k}-rest',
726726
get_recursive_refs(rest, first_is_optional=True)
727727
)
728728
return res
729729

730-
rule += ' | '.join(
731-
get_recursive_refs(optional_props[i:], first_is_optional=False)
732-
for i in range(len(optional_props))
733-
)
734-
if required_props:
735-
rule += ' )'
736-
rule += ' )?'
730+
alternatives = []
731+
has_required = False
732+
for i, k in enumerate(prop_names):
733+
alternatives.append(get_recursive_refs(prop_names[i:], first_is_optional=False))
734+
if k in required:
735+
has_required = True
736+
break
737+
738+
alts = ' | '.join(alternatives)
739+
if len(alternatives) > 1 or not has_required:
740+
rule += '( ' + alts + (' )' if has_required else ' )?')
741+
else:
742+
rule += alts
737743

738744
rule += ' "}" space'
739745

examples/server/public/json-schema-to-grammar.mjs

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -732,24 +732,26 @@ export class SchemaConverter {
732732
}
733733

734734
_buildObjectRule(properties, required, name, additionalProperties) {
735-
const propOrder = this._propOrder;
736-
// sort by position in prop_order (if specified) then by original order
737-
const sortedProps = properties.map(([k]) => k).sort((a, b) => {
738-
const orderA = propOrder[a] || Infinity;
739-
const orderB = propOrder[b] || Infinity;
740-
return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
741-
});
742-
743735
const propKvRuleNames = {};
736+
const propNames = []
744737
for (const [propName, propSchema] of properties) {
745738
const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
746739
propKvRuleNames[propName] = this._addRule(
747740
`${name ?? ''}${name ? '-' : ''}${propName}-kv`,
748741
`${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}`
749742
);
743+
propNames.push(propName);
744+
}
745+
746+
const propOrder = this._propOrder;
747+
if (Object.keys(propOrder).length > 0) {
748+
// sort by position in prop_order (if specified) then by original order
749+
propNames.sort((a, b) => {
750+
const orderA = propOrder[a] || Infinity;
751+
const orderB = propOrder[b] || Infinity;
752+
return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
753+
});
750754
}
751-
const requiredProps = sortedProps.filter(k => required.has(k));
752-
const optionalProps = sortedProps.filter(k => !required.has(k));
753755

754756
if (additionalProperties) {
755757
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
@@ -758,33 +760,32 @@ export class SchemaConverter {
758760
: this._addPrimitive('value', PRIMITIVE_RULES['value']);
759761

760762
const key_rule =
761-
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
762-
: this._addRule(`${subName}-k`, this._notStrings(sortedProps));
763+
propNames.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
764+
: this._addRule(`${subName}-k`, this._notStrings(propNames));
763765

764766
propKvRuleNames['*'] = this._addRule(
765767
`${subName}-kv`,
766768
`${key_rule} ":" space ${valueRule}`);
767-
optionalProps.push('*');
769+
propNames.push('*');
768770
}
769771

770772
let rule = '"{" space ';
771-
rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
772-
773-
if (optionalProps.length > 0) {
774-
rule += ' (';
775-
if (requiredProps.length > 0) {
776-
rule += ' "," space ( ';
777-
}
778773

774+
if (propNames.length > 0) {
779775
const getRecursiveRefs = (ks, firstIsOptional) => {
780776
const [k, ...rest] = ks;
781777
const kvRuleName = propKvRuleNames[k];
782778
let res;
783-
const commaRef = `( "," space ${kvRuleName} )`;
779+
const commaRef = `"," space ${kvRuleName}`;
784780
if (firstIsOptional) {
785-
res = commaRef + (k === '*' ? '*' : '?');
781+
// res = commaRef + (k === '*' ? '*' : '?');
782+
if (!required.has(k)) {
783+
res = `( ${commaRef} )${k === '*' ? '*' : '?'}`;
784+
} else {
785+
res = commaRef;
786+
}
786787
} else {
787-
res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
788+
res = kvRuleName + (k === '*' ? ' ( ' + commaRef + ' )*' : '');
788789
}
789790
if (rest.length > 0) {
790791
res += ' ' + this._addRule(
@@ -795,11 +796,22 @@ export class SchemaConverter {
795796
return res;
796797
};
797798

798-
rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | ');
799-
if (requiredProps.length > 0) {
800-
rule += ' )';
799+
let hasRequired = false;
800+
const alternatives = [];
801+
for (let i = 0; i < propNames.length; i++) {
802+
alternatives.push(getRecursiveRefs(propNames.slice(i), false));
803+
if (required.has(propNames[i])) {
804+
hasRequired = true;
805+
break;
806+
}
807+
}
808+
809+
const alts = alternatives.join(' | ');
810+
if (alternatives.length > 1 || !hasRequired) {
811+
rule += `( ${alts} )${hasRequired ? '' : '?'}`;
812+
} else {
813+
rule += alts;
801814
}
802-
rule += ' )?';
803815
}
804816

805817
rule += ' "}" space';

tests/test-grammar-integration.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,33 @@ static void test_json_schema() {
11121112
}
11131113
);
11141114

1115+
test_schema(
1116+
"object property order",
1117+
// Schema
1118+
R"""({
1119+
"type": "object",
1120+
"properties": {
1121+
"a": { "type": "integer" },
1122+
"b": { "type": "integer" },
1123+
"c": { "type": "integer" },
1124+
"d": { "type": "integer" }
1125+
},
1126+
"required": ["b", "d"]
1127+
})""",
1128+
// Passing strings
1129+
{
1130+
R"""({ "b": 0, "d": 0 })""",
1131+
R"""({ "b": 0, "d": 0, "E": -1 })""",
1132+
R"""({ "a": 0, "b": 0, "c": 0, "d": 0 })""",
1133+
R"""({ "a": 0, "b": 0, "c": 0, "d": 0, "E": -1 })""",
1134+
},
1135+
// Failing strings
1136+
{
1137+
R"""({ "E": -1, "b": 0, "d": 0 })""",
1138+
R"""({ "b": 0, "d": 0, "a": 0 })""",
1139+
}
1140+
);
1141+
11151142
test_schema(
11161143
"additional properties can't override other properties",
11171144
R"""({

0 commit comments

Comments
 (0)