1
- import os
2
- import sys
3
1
import collections
4
- import importlib . machinery
2
+ import tokenize # from stdlib
5
3
6
- # Use Lib/token.py and Lib/tokenize.py to obtain the tokens. To maintain this
7
- # compatible with older versions of Python, we need to make sure that we only
8
- # import these two files (and not any of the dependencies of these files).
9
-
10
- CURRENT_FOLDER_LOCATION = os .path .dirname (os .path .realpath (__file__ ))
11
- LIB_LOCATION = os .path .realpath (os .path .join (CURRENT_FOLDER_LOCATION , '..' , '..' , 'Lib' ))
12
- TOKEN_LOCATION = os .path .join (LIB_LOCATION , 'token.py' )
13
- TOKENIZE_LOCATION = os .path .join (LIB_LOCATION , 'tokenize.py' )
14
-
15
- token = importlib .machinery .SourceFileLoader ('token' ,
16
- TOKEN_LOCATION ).load_module ()
17
- # Add token to the module cache so tokenize.py uses that excact one instead of
18
- # the one in the stdlib of the interpreter executing this file.
19
- sys .modules ['token' ] = token
20
- tokenize = importlib .machinery .SourceFileLoader ('tokenize' ,
21
- TOKENIZE_LOCATION ).load_module ()
22
-
23
- from . import grammar
4
+ from . import grammar , token
24
5
25
6
class ParserGenerator (object ):
26
7
27
- def __init__ (self , filename , stream = None , verbose = False ):
8
+ def __init__ (self , grammar_file , token_file , stream = None , verbose = False ):
28
9
close_stream = None
29
10
if stream is None :
30
- stream = open (filename )
11
+ stream = open (grammar_file )
31
12
close_stream = stream .close
32
- self .tokens = token
33
- self .opmap = token .EXACT_TOKEN_TYPES
13
+ with open (token_file ) as tok_file :
14
+ token_lines = tok_file .readlines ()
15
+ self .tokens = dict (token .generate_tokens (token_lines ))
16
+ self .opmap = dict (token .generate_opmap (token_lines ))
34
17
# Manually add <> so it does not collide with !=
35
- self .opmap ['<>' ] = self . tokens . NOTEQUAL
18
+ self .opmap ['<>' ] = " NOTEQUAL"
36
19
self .verbose = verbose
37
- self .filename = filename
20
+ self .filename = grammar_file
38
21
self .stream = stream
39
22
self .generator = tokenize .generate_tokens (stream .readline )
40
23
self .gettoken () # Initialize lookahead
@@ -108,9 +91,9 @@ def make_label(self, c, label):
108
91
return ilabel
109
92
else :
110
93
# A named token (NAME, NUMBER, STRING)
111
- itoken = getattr ( self .tokens , label , None )
94
+ itoken = self .tokens . get ( label , None )
112
95
assert isinstance (itoken , int ), label
113
- assert itoken in self .tokens .tok_name , label
96
+ assert itoken in self .tokens .values () , label
114
97
if itoken in c .tokens :
115
98
return c .tokens [itoken ]
116
99
else :
@@ -126,12 +109,13 @@ def make_label(self, c, label):
126
109
if value in c .keywords :
127
110
return c .keywords [value ]
128
111
else :
129
- c .labels .append ((self .tokens . NAME , value ))
112
+ c .labels .append ((self .tokens [ " NAME" ] , value ))
130
113
c .keywords [value ] = ilabel
131
114
return ilabel
132
115
else :
133
116
# An operator (any non-numeric token)
134
- itoken = self .opmap [value ] # Fails if unknown token
117
+ tok_name = self .opmap [value ] # Fails if unknown token
118
+ itoken = self .tokens [tok_name ]
135
119
if itoken in c .tokens :
136
120
return c .tokens [itoken ]
137
121
else :
@@ -184,16 +168,16 @@ def parse(self):
184
168
dfas = collections .OrderedDict ()
185
169
startsymbol = None
186
170
# MSTART: (NEWLINE | RULE)* ENDMARKER
187
- while self .type != self . tokens .ENDMARKER :
188
- while self .type == self . tokens .NEWLINE :
171
+ while self .type != tokenize .ENDMARKER :
172
+ while self .type == tokenize .NEWLINE :
189
173
self .gettoken ()
190
174
# RULE: NAME ':' RHS NEWLINE
191
- name = self .expect (self . tokens .NAME )
175
+ name = self .expect (tokenize .NAME )
192
176
if self .verbose :
193
177
print ("Processing rule {dfa_name}" .format (dfa_name = name ))
194
- self .expect (self . tokens .OP , ":" )
178
+ self .expect (tokenize .OP , ":" )
195
179
a , z = self .parse_rhs ()
196
- self .expect (self . tokens .NEWLINE )
180
+ self .expect (tokenize .NEWLINE )
197
181
if self .verbose :
198
182
self .dump_nfa (name , a , z )
199
183
dfa = self .make_dfa (a , z )
@@ -309,7 +293,7 @@ def parse_alt(self):
309
293
# ALT: ITEM+
310
294
a , b = self .parse_item ()
311
295
while (self .value in ("(" , "[" ) or
312
- self .type in (self . tokens . NAME , self . tokens .STRING )):
296
+ self .type in (tokenize . NAME , tokenize .STRING )):
313
297
c , d = self .parse_item ()
314
298
b .addarc (c )
315
299
b = d
@@ -320,7 +304,7 @@ def parse_item(self):
320
304
if self .value == "[" :
321
305
self .gettoken ()
322
306
a , z = self .parse_rhs ()
323
- self .expect (self . tokens .OP , "]" )
307
+ self .expect (tokenize .OP , "]" )
324
308
a .addarc (z )
325
309
return a , z
326
310
else :
@@ -340,9 +324,9 @@ def parse_atom(self):
340
324
if self .value == "(" :
341
325
self .gettoken ()
342
326
a , z = self .parse_rhs ()
343
- self .expect (self . tokens .OP , ")" )
327
+ self .expect (tokenize .OP , ")" )
344
328
return a , z
345
- elif self .type in (self . tokens . NAME , self . tokens .STRING ):
329
+ elif self .type in (tokenize . NAME , tokenize .STRING ):
346
330
a = NFAState ()
347
331
z = NFAState ()
348
332
a .addarc (z , self .value )
@@ -365,7 +349,7 @@ def gettoken(self):
365
349
while tup [0 ] in (tokenize .COMMENT , tokenize .NL ):
366
350
tup = next (self .generator )
367
351
self .type , self .value , self .begin , self .end , self .line = tup
368
- #print self.tokens[ 'tok_name'] [self.type], repr(self.value)
352
+ # print(getattr(tokenize, 'tok_name') [self.type], repr(self.value) )
369
353
370
354
def raise_error (self , msg , * args ):
371
355
if args :
0 commit comments