3
3
4
4
# Pgen imports
5
5
from . import grammar , token , tokenize
6
+ import collections
7
+
6
8
7
9
class PgenGrammar (grammar .Grammar ):
8
- pass
10
+ def produce_graminit_h (self , writer ):
11
+ writer ("/* Generated by Parser/pgen2 */\n \n " )
12
+ for number , symbol in self .number2symbol .items ():
13
+ writer ("#define {} {}\n " .format (symbol , number ))
14
+
15
+ def produce_graminit_c (self , writer ):
16
+ writer ("/* Generated by Parser/pgen2 */\n \n " )
17
+
18
+ writer ('#include "pgenheaders.h"\n ' )
19
+ writer ('#include "grammar.h"\n ' )
20
+ writer ("grammar _PyParser_Grammar;\n " )
21
+
22
+ self .print_dfas (writer )
23
+ self .print_labels (writer )
24
+
25
+ writer ("grammar _PyParser_Grammar = {\n " )
26
+ writer (" {n_dfas},\n " .format (n_dfas = len (self .dfas )))
27
+ writer (" dfas,\n " )
28
+ writer (" {{{n_labels}, labels}},\n " .format (n_labels = len (self .labels )))
29
+ writer (" {start_number}\n " .format (start_number = self .start ))
30
+ writer ("};\n " )
31
+
32
+ def print_labels (self , writer ):
33
+ writer (
34
+ "static label labels[{n_labels}] = {{\n " .format (n_labels = len (self .labels ))
35
+ )
36
+ for label , name in self .labels :
37
+ if name is None :
38
+ writer (" {{{label}, 0}},\n " .format (label = label ))
39
+ else :
40
+ writer (
41
+ ' {{{label}, "{label_name}"}},\n ' .format (
42
+ label = label , label_name = name
43
+ )
44
+ )
45
+ writer ("};\n " )
46
+
47
+ def print_dfas (self , writer ):
48
+ self .print_states (writer )
49
+ writer ("static dfa dfas[{}] = {{\n " .format (len (self .dfas )))
50
+ for dfaindex , dfa_elem in enumerate (self .dfas .items ()):
51
+ symbol , (dfa , first_sets ) = dfa_elem
52
+ writer (
53
+ ' {{{dfa_symbol}, "{symbol_name}", ' .format (
54
+ dfa_symbol = symbol , symbol_name = self .number2symbol [symbol ]
55
+ )
56
+ + "0, {n_states}, states_{dfa_index},\n " .format (
57
+ n_states = len (dfa ), dfa_index = dfaindex
58
+ )
59
+ )
60
+ writer (' "' )
61
+
62
+ k = [name for label , name in self .labels if label in first_sets ]
63
+ bitset = bytearray ((len (self .labels ) >> 3 ) + 1 )
64
+ for token in first_sets :
65
+ bitset [token >> 3 ] |= 1 << (token & 7 )
66
+ for byte in bitset :
67
+ writer ("\\ %03o" % (byte & 0xFF ))
68
+ writer ('"},\n ' )
69
+ writer ("};\n " )
70
+
71
+ def print_states (self , write ):
72
+ for dfaindex , dfa in enumerate (self .states ):
73
+ self .print_arcs (write , dfaindex , dfa )
74
+ write (
75
+ "static state states_{dfa_index}[{n_states}] = {{\n " .format (
76
+ dfa_index = dfaindex , n_states = len (dfa )
77
+ )
78
+ )
79
+ for stateindex , state in enumerate (dfa ):
80
+ narcs = len (state )
81
+ write (
82
+ " {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n " .format (
83
+ n_arcs = narcs , dfa_index = dfaindex , state_index = stateindex
84
+ )
85
+ )
86
+ write ("};\n " )
87
+
88
+ def print_arcs (self , write , dfaindex , states ):
89
+ for stateindex , state in enumerate (states ):
90
+ narcs = len (state )
91
+ write (
92
+ "static arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n " .format (
93
+ dfa_index = dfaindex , state_index = stateindex , n_arcs = narcs
94
+ )
95
+ )
96
+ for a , b in state :
97
+ write (
98
+ " {{{from_label}, {to_state}}},\n " .format (
99
+ from_label = a , to_state = b
100
+ )
101
+ )
102
+ write ("};\n " )
103
+
9
104
10
105
class ParserGenerator (object ):
11
106
12
- def __init__ (self , filename , stream = None ):
107
+ def __init__ (self , filename , stream = None , verbose = False ):
13
108
close_stream = None
14
109
if stream is None :
15
110
stream = open (filename )
16
111
close_stream = stream .close
112
+ self .verbose = verbose
17
113
self .filename = filename
18
114
self .stream = stream
19
115
self .generator = tokenize .generate_tokens (stream .readline )
@@ -27,14 +123,14 @@ def __init__(self, filename, stream=None):
27
123
def make_grammar (self ):
28
124
c = PgenGrammar ()
29
125
names = list (self .dfas .keys ())
30
- names .sort ()
31
126
names .remove (self .startsymbol )
32
127
names .insert (0 , self .startsymbol )
33
128
for name in names :
34
129
i = 256 + len (c .symbol2number )
35
130
c .symbol2number [name ] = i
36
131
c .number2symbol [i ] = name
37
132
for name in names :
133
+ self .make_label (c , name )
38
134
dfa = self .dfas [name ]
39
135
states = []
40
136
for state in dfa :
@@ -47,15 +143,30 @@ def make_grammar(self):
47
143
c .states .append (states )
48
144
c .dfas [c .symbol2number [name ]] = (states , self .make_first (c , name ))
49
145
c .start = c .symbol2number [self .startsymbol ]
146
+
147
+ if self .verbose :
148
+ print ("" )
149
+ print ("Grammar summary" )
150
+ print ("===============" )
151
+
152
+ print ("- {n_labels} labels" .format (n_labels = len (c .labels )))
153
+ print ("- {n_dfas} dfas" .format (n_dfas = len (c .dfas )))
154
+ print ("- {n_tokens} tokens" .format (n_tokens = len (c .tokens )))
155
+ print ("- {n_keywords} keywords" .format (n_keywords = len (c .keywords )))
156
+ print (
157
+ "- Start symbol: {start_symbol}" .format (
158
+ start_symbol = c .number2symbol [c .start ]
159
+ )
160
+ )
50
161
return c
51
162
52
163
def make_first (self , c , name ):
53
164
rawfirst = self .first [name ]
54
- first = {}
165
+ first = set ()
55
166
for label in sorted (rawfirst ):
56
167
ilabel = self .make_label (c , label )
57
168
##assert ilabel not in first # XXX failed on <> ... !=
58
- first [ ilabel ] = 1
169
+ first . add ( ilabel )
59
170
return first
60
171
61
172
def make_label (self , c , label ):
@@ -106,17 +217,20 @@ def make_label(self, c, label):
106
217
107
218
def addfirstsets (self ):
108
219
names = list (self .dfas .keys ())
109
- names .sort ()
110
220
for name in names :
111
221
if name not in self .first :
112
222
self .calcfirst (name )
113
- #print name, self.first[name].keys()
223
+
224
+ if self .verbose :
225
+ print ("First set for {dfa_name}" .format (dfa_name = name ))
226
+ for item in self .first [name ]:
227
+ print (" - {terminal}" .format (terminal = item ))
114
228
115
229
def calcfirst (self , name ):
116
230
dfa = self .dfas [name ]
117
231
self .first [name ] = None # dummy to detect left recursion
118
232
state = dfa [0 ]
119
- totalset = {}
233
+ totalset = set ()
120
234
overlapcheck = {}
121
235
for label , next in state .arcs .items ():
122
236
if label in self .dfas :
@@ -130,8 +244,8 @@ def calcfirst(self, name):
130
244
totalset .update (fset )
131
245
overlapcheck [label ] = fset
132
246
else :
133
- totalset [ label ] = 1
134
- overlapcheck [label ] = {label : 1 }
247
+ totalset . add ( label )
248
+ overlapcheck [label ] = {label }
135
249
inverse = {}
136
250
for label , itsfirst in overlapcheck .items ():
137
251
for symbol in itsfirst :
@@ -143,20 +257,24 @@ def calcfirst(self, name):
143
257
self .first [name ] = totalset
144
258
145
259
def parse (self ):
146
- dfas = {}
260
+ dfas = collections . OrderedDict ()
147
261
startsymbol = None
148
262
# MSTART: (NEWLINE | RULE)* ENDMARKER
149
263
while self .type != token .ENDMARKER :
150
264
while self .type == token .NEWLINE :
151
265
self .gettoken ()
152
266
# RULE: NAME ':' RHS NEWLINE
153
267
name = self .expect (token .NAME )
268
+ if self .verbose :
269
+ print ("Processing rule {dfa_name}" .format (dfa_name = name ))
154
270
self .expect (token .OP , ":" )
155
271
a , z = self .parse_rhs ()
156
272
self .expect (token .NEWLINE )
157
- #self.dump_nfa(name, a, z)
273
+ if self .verbose :
274
+ self .dump_nfa (name , a , z )
158
275
dfa = self .make_dfa (a , z )
159
- #self.dump_dfa(name, dfa)
276
+ if self .verbose :
277
+ self .dump_dfa (name , dfa )
160
278
oldlen = len (dfa )
161
279
self .simplify_dfa (dfa )
162
280
newlen = len (dfa )
@@ -174,14 +292,14 @@ def make_dfa(self, start, finish):
174
292
assert isinstance (start , NFAState )
175
293
assert isinstance (finish , NFAState )
176
294
def closure (state ):
177
- base = {}
295
+ base = set ()
178
296
addclosure (state , base )
179
297
return base
180
298
def addclosure (state , base ):
181
299
assert isinstance (state , NFAState )
182
300
if state in base :
183
301
return
184
- base [ state ] = 1
302
+ base . add ( state )
185
303
for label , next in state .arcs :
186
304
if label is None :
187
305
addclosure (next , base )
@@ -191,7 +309,7 @@ def addclosure(state, base):
191
309
for nfastate in state .nfaset :
192
310
for label , next in nfastate .arcs :
193
311
if label is not None :
194
- addclosure (next , arcs .setdefault (label , {} ))
312
+ addclosure (next , arcs .setdefault (label , set () ))
195
313
for label , nfaset in sorted (arcs .items ()):
196
314
for st in states :
197
315
if st .nfaset == nfaset :
@@ -347,7 +465,7 @@ def addarc(self, next, label=None):
347
465
class DFAState (object ):
348
466
349
467
def __init__ (self , nfaset , final ):
350
- assert isinstance (nfaset , dict )
468
+ assert isinstance (nfaset , set )
351
469
assert isinstance (next (iter (nfaset )), NFAState )
352
470
assert isinstance (final , NFAState )
353
471
self .nfaset = nfaset
@@ -381,6 +499,6 @@ def __eq__(self, other):
381
499
382
500
__hash__ = None # For Py3 compatibility.
383
501
384
- def generate_grammar (filename = "Grammar.txt" ):
385
- p = ParserGenerator (filename )
502
+ def generate_grammar (filename = "Grammar.txt" , verbose = False ):
503
+ p = ParserGenerator (filename , verbose = verbose )
386
504
return p .make_grammar ()
0 commit comments