Skip to content

Commit 7ba08ff

Browse files
authored
bpo-40334: use the TOKENS file when checking dangling rules (GH-19849)
1 parent 6bd99d5 commit 7ba08ff

File tree

5 files changed

+31
-15
lines changed

5 files changed

+31
-15
lines changed

Tools/peg_generator/pegen/build.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
MOD_DIR = pathlib.Path(__file__).resolve().parent
1919

20+
TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
21+
2022

2123
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
2224
flags = sysconfig.get_config_var(compiler_flags)
@@ -112,7 +114,8 @@ def build_parser(
112114
return grammar, parser, tokenizer
113115

114116

115-
def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str]]:
117+
def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
118+
all_tokens = {}
116119
exact_tokens = {}
117120
non_exact_tokens = set()
118121
numbers = itertools.count(0)
@@ -129,13 +132,15 @@ def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str
129132
if len(pieces) == 1:
130133
(token,) = pieces
131134
non_exact_tokens.add(token)
135+
all_tokens[index] = token
132136
elif len(pieces) == 2:
133-
_, op = pieces
137+
token, op = pieces
134138
exact_tokens[op.strip("'")] = index
139+
all_tokens[index] = token
135140
else:
136141
raise ValueError(f"Unexpected line found in Tokens file: {line}")
137142

138-
return exact_tokens, non_exact_tokens
143+
return all_tokens, exact_tokens, non_exact_tokens
139144

140145

141146
def build_c_generator(
@@ -149,10 +154,10 @@ def build_c_generator(
149154
skip_actions: bool = False,
150155
) -> ParserGenerator:
151156
with open(tokens_file, "r") as tok_file:
152-
exact_tok, non_exact_tok = generate_token_definitions(tok_file)
157+
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
153158
with open(output_file, "w") as file:
154159
gen: ParserGenerator = CParserGenerator(
155-
grammar, exact_tok, non_exact_tok, file, skip_actions=skip_actions
160+
grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
156161
)
157162
gen.generate(grammar_file)
158163

Tools/peg_generator/pegen/c_generator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,13 +265,14 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
265265
def __init__(
266266
self,
267267
grammar: grammar.Grammar,
268+
tokens: Dict[int, str],
268269
exact_tokens: Dict[str, int],
269270
non_exact_tokens: Set[str],
270271
file: Optional[IO[Text]],
271272
debug: bool = False,
272273
skip_actions: bool = False,
273274
):
274-
super().__init__(grammar, file)
275+
super().__init__(grammar, tokens, file)
275276
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
276277
self, exact_tokens, non_exact_tokens
277278
)

Tools/peg_generator/pegen/parser_generator.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import contextlib
2-
import token
32
from abc import abstractmethod
43

54
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
@@ -19,11 +18,12 @@
1918

2019

2120
class RuleCheckingVisitor(GrammarVisitor):
22-
def __init__(self, rules: Dict[str, Rule]):
21+
def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
2322
self.rules = rules
23+
self.tokens = tokens
2424

2525
def visit_NameLeaf(self, node: NameLeaf) -> None:
26-
if node.value not in self.rules and node.value not in token.tok_name.values():
26+
if node.value not in self.rules and node.value not in self.tokens.values():
2727
# TODO: Add line/col info to (leaf) nodes
2828
raise GrammarError(f"Dangling reference to rule {node.value!r}")
2929

@@ -32,12 +32,13 @@ class ParserGenerator:
3232

3333
callmakervisitor: GrammarVisitor
3434

35-
def __init__(self, grammar: Grammar, file: Optional[IO[Text]]):
35+
def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
3636
self.grammar = grammar
37+
self.tokens = tokens
3738
self.rules = grammar.rules
3839
if "trailer" not in grammar.metas and "start" not in self.rules:
3940
raise GrammarError("Grammar without a trailer must have a 'start' rule")
40-
checker = RuleCheckingVisitor(self.rules)
41+
checker = RuleCheckingVisitor(self.rules, self.tokens)
4142
for rule in self.rules.values():
4243
checker.visit(rule)
4344
self.file = file

Tools/peg_generator/pegen/python_generator.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import token
12
from typing import Any, Dict, Optional, IO, Text, Tuple
23

34
from pegen.grammar import (
@@ -123,8 +124,13 @@ def visit_Cut(self, node: Cut) -> Tuple[str, str]:
123124

124125

125126
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
126-
def __init__(self, grammar: grammar.Grammar, file: Optional[IO[Text]]):
127-
super().__init__(grammar, file)
127+
def __init__(
128+
self,
129+
grammar: grammar.Grammar,
130+
file: Optional[IO[Text]],
131+
tokens: Dict[int, str] = token.tok_name,
132+
):
133+
super().__init__(grammar, tokens, file)
128134
self.callmakervisitor = PythonCallMakerVisitor(self)
129135

130136
def generate(self, filename: str) -> None:

Tools/peg_generator/pegen/testutil.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pegen.python_generator import PythonParserGenerator
1818
from pegen.tokenizer import Tokenizer
1919

20+
ALL_TOKENS = token.tok_name
2021
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
2122
NON_EXACT_TOKENS = {
2223
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
@@ -76,7 +77,7 @@ def import_file(full_name: str, path: str) -> Any:
7677

7778
def generate_c_parser_source(grammar: Grammar) -> str:
7879
out = io.StringIO()
79-
genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, out)
80+
genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
8081
genr.generate("<string>")
8182
return out.getvalue()
8283

@@ -96,7 +97,9 @@ def generate_parser_c_extension(
9697
assert not os.listdir(path)
9798
source = path / "parse.c"
9899
with open(source, "w") as file:
99-
genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug)
100+
genr = CParserGenerator(
101+
grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
102+
)
100103
genr.generate("parse.c")
101104
compile_c_extension(str(source), build_dir=str(path))
102105

0 commit comments

Comments
 (0)