Skip to content

Commit 45cf5db

Browse files
authored
Allow pgen to produce a DOT format dump of the grammar (GH-18005)
Originally suggested by Anthony Shaw.
1 parent 65a5ce2 commit 45cf5db

File tree

3 files changed

+46
-2
lines changed

3 files changed

+46
-2
lines changed

Parser/pgen/__main__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,19 @@ def main():
2121
)
2222

2323
parser.add_argument("--verbose", "-v", action="count")
24+
parser.add_argument(
25+
"--graph",
26+
type=argparse.FileType("w"),
27+
action="store",
28+
metavar="GRAPH_OUTPUT_FILE",
29+
help="Dumps a DOT representation of the generated automata in a file",
30+
)
31+
2432
args = parser.parse_args()
2533

26-
p = ParserGenerator(args.grammar, args.tokens, verbose=args.verbose)
34+
p = ParserGenerator(
35+
args.grammar, args.tokens, verbose=args.verbose, graph_file=args.graph
36+
)
2737
grammar = p.make_grammar()
2838
grammar.produce_graminit_h(args.graminit_h.write)
2939
grammar.produce_graminit_c(args.graminit_c.write)

Parser/pgen/automata.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,26 @@ def dump(self, writer=print):
4848
else:
4949
writer(" %s -> %d" % (label, j))
5050

51+
def dump_graph(self, writer):
52+
"""Dump a DOT representation of the NFA"""
53+
writer('digraph %s_nfa {\n' % self.name)
54+
todo = [self.start]
55+
for i, state in enumerate(todo):
56+
writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
57+
for arc in state.arcs:
58+
label = arc.label
59+
next = arc.target
60+
if next in todo:
61+
j = todo.index(next)
62+
else:
63+
j = len(todo)
64+
todo.append(next)
65+
if label is None:
66+
writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
67+
else:
68+
writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
69+
writer('}\n')
70+
5171

5272
class NFAArc:
5373
"""An arc representing a transition between two NFA states.
@@ -301,6 +321,15 @@ def dump(self, writer=print):
301321
for label, next in sorted(state.arcs.items()):
302322
writer(" %s -> %d" % (label, self.states.index(next)))
303323

324+
def dump_graph(self, writer):
325+
"""Dump a DOT representation of the DFA"""
326+
writer('digraph %s_dfa {\n' % self.name)
327+
for i, state in enumerate(self.states):
328+
writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
329+
for label, next in sorted(state.arcs.items()):
330+
writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
331+
writer('}\n')
332+
304333

305334
class DFAState(object):
306335
"""A state of a DFA

Parser/pgen/pgen.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def __repr__(self):
130130

131131

132132
class ParserGenerator(object):
133-
def __init__(self, grammar_file, token_file, verbose=False):
133+
def __init__(self, grammar_file, token_file, verbose=False, graph_file=None):
134134
with open(grammar_file) as f:
135135
self.grammar = f.read()
136136
with open(token_file) as tok_file:
@@ -141,6 +141,7 @@ def __init__(self, grammar_file, token_file, verbose=False):
141141
self.opmap["<>"] = "NOTEQUAL"
142142
self.verbose = verbose
143143
self.filename = grammar_file
144+
self.graph_file = graph_file
144145
self.dfas, self.startsymbol = self.create_dfas()
145146
self.first = {} # map from symbol name to set of tokens
146147
self.calculate_first_sets()
@@ -152,11 +153,15 @@ def create_dfas(self):
152153
if self.verbose:
153154
print("Dump of NFA for", nfa.name)
154155
nfa.dump()
156+
if self.graph_file is not None:
157+
nfa.dump_graph(self.graph_file.write)
155158
dfa = DFA.from_nfa(nfa)
156159
if self.verbose:
157160
print("Dump of DFA for", dfa.name)
158161
dfa.dump()
159162
dfa.simplify()
163+
if self.graph_file is not None:
164+
dfa.dump_graph(self.graph_file.write)
160165
rule_to_dfas[dfa.name] = dfa
161166

162167
if start_nonterminal is None:

0 commit comments

Comments
 (0)