Skip to content

Commit 3bcc4ea

Browse files
authored
Add small validator utility for PEG grammars (GH-23519)
1 parent 7865f51 commit 3bcc4ea

File tree

3 files changed

+106
-0
lines changed

3 files changed

+106
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import unittest
2+
from test import test_tools
3+
4+
test_tools.skip_if_missing('peg_generator')
5+
with test_tools.imports_under_tool('peg_generator'):
6+
from pegen.grammar_parser import GeneratedParser as GrammarParser
7+
from pegen.validator import SubRuleValidator, ValidationError
8+
from pegen.testutil import parse_string
9+
from pegen.grammar import Grammar
10+
11+
12+
class TestPegen(unittest.TestCase):
13+
def test_rule_with_no_collision(self) -> None:
14+
grammar_source = """
15+
start: bad_rule
16+
sum:
17+
| NAME '-' NAME
18+
| NAME '+' NAME
19+
"""
20+
grammar: Grammar = parse_string(grammar_source, GrammarParser)
21+
validator = SubRuleValidator(grammar)
22+
for rule_name, rule in grammar.rules.items():
23+
validator.validate_rule(rule_name, rule)
24+
25+
def test_rule_with_simple_collision(self) -> None:
26+
grammar_source = """
27+
start: bad_rule
28+
sum:
29+
| NAME '+' NAME
30+
| NAME '+' NAME ';'
31+
"""
32+
grammar: Grammar = parse_string(grammar_source, GrammarParser)
33+
validator = SubRuleValidator(grammar)
34+
with self.assertRaises(ValidationError):
35+
for rule_name, rule in grammar.rules.items():
36+
validator.validate_rule(rule_name, rule)
37+
38+
def test_rule_with_collision_after_some_other_rules(self) -> None:
39+
grammar_source = """
40+
start: bad_rule
41+
sum:
42+
| NAME '+' NAME
43+
| NAME '*' NAME ';'
44+
| NAME '-' NAME
45+
| NAME '+' NAME ';'
46+
"""
47+
grammar: Grammar = parse_string(grammar_source, GrammarParser)
48+
validator = SubRuleValidator(grammar)
49+
with self.assertRaises(ValidationError):
50+
for rule_name, rule in grammar.rules.items():
51+
validator.validate_rule(rule_name, rule)

Tools/peg_generator/pegen/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import Tuple
1515

1616
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
17+
from pegen.validator import validate_grammar
1718

1819

1920
def generate_c_code(
@@ -128,6 +129,8 @@ def main() -> None:
128129
grammar, parser, tokenizer, gen = args.func(args)
129130
t1 = time.time()
130131

132+
validate_grammar(grammar)
133+
131134
if not args.quiet:
132135
if args.verbose:
133136
print("Raw Grammar:")
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from pegen import grammar
2+
from pegen.grammar import (
3+
Alt,
4+
Cut,
5+
Gather,
6+
GrammarVisitor,
7+
Group,
8+
Lookahead,
9+
NamedItem,
10+
NameLeaf,
11+
NegativeLookahead,
12+
Opt,
13+
PositiveLookahead,
14+
Repeat0,
15+
Repeat1,
16+
Rhs,
17+
Rule,
18+
StringLeaf,
19+
)
20+
21+
class ValidationError(Exception):
22+
pass
23+
24+
class GrammarValidator(GrammarVisitor):
25+
def __init__(self, grammar: grammar.Grammar):
26+
self.grammar = grammar
27+
self.rulename = None
28+
29+
def validate_rule(self, rulename: str, node: Rule):
30+
self.rulename = rulename
31+
self.visit(node)
32+
self.rulename = None
33+
34+
35+
class SubRuleValidator(GrammarValidator):
36+
def visit_Rhs(self, node: Rule):
37+
for index, alt in enumerate(node.alts):
38+
alts_to_consider = node.alts[index+1:]
39+
for other_alt in alts_to_consider:
40+
self.check_intersection(alt, other_alt)
41+
42+
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
43+
if str(second_alt).startswith(str(first_alt)):
44+
raise ValidationError(
45+
f"In {self.rulename} there is an alternative that will "
46+
f"never be visited:\n{second_alt}")
47+
48+
def validate_grammar(the_grammar: grammar.Grammar):
49+
for validator_cls in GrammarValidator.__subclasses__():
50+
validator = validator_cls(the_grammar)
51+
for rule_name, rule in the_grammar.rules.items():
52+
validator.validate_rule(rule_name, rule)

0 commit comments

Comments
 (0)