@@ -11,9 +11,10 @@ fn new_str_hash[V]() -> map.hashmap[str,V] {
11
11
12
12
type reader = obj {
13
13
fn is_eof ( ) -> bool ;
14
- fn peek ( ) -> char ;
14
+ fn curr ( ) -> char ;
15
+ fn next ( ) -> char ;
15
16
fn bump ( ) ;
16
- fn get_pos ( ) -> tup ( str , uint , uint ) ;
17
+ fn get_curr_pos ( ) -> tup ( str , uint , uint ) ;
17
18
fn get_keywords ( ) -> hashmap [ str, token. token ] ;
18
19
fn get_reserved ( ) -> hashmap [ str, ( ) ] ;
19
20
} ;
@@ -23,6 +24,7 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
23
24
obj reader ( stdio_reader rdr,
24
25
str filename,
25
26
mutable char c,
27
+ mutable char n,
26
28
mutable uint line,
27
29
mutable uint col,
28
30
hashmap[ str, token. token ] keywords,
@@ -32,22 +34,33 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
32
34
ret c == ( -1 ) as char ;
33
35
}
34
36
35
- fn get_pos ( ) -> tup ( str , uint , uint ) {
37
+ fn get_curr_pos ( ) -> tup ( str , uint , uint ) {
36
38
ret tup ( filename, line, col) ;
37
39
}
38
40
39
- fn peek ( ) -> char {
41
+ fn curr ( ) -> char {
40
42
ret c;
41
43
}
42
44
45
+ fn next ( ) -> char {
46
+ ret n;
47
+ }
48
+
43
49
fn bump ( ) {
44
- c = rdr. getc ( ) as char ;
50
+ c = n;
51
+
52
+ if ( c == ( -1 ) as char ) {
53
+ ret;
54
+ }
55
+
45
56
if ( c == '\n' ) {
46
57
line += 1 u;
47
58
col = 0 u;
48
59
} else {
49
60
col += 1 u;
50
61
}
62
+
63
+ n = rdr. getc ( ) as char ;
51
64
}
52
65
53
66
fn get_keywords ( ) -> hashmap [ str, token. token ] {
@@ -82,8 +95,8 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
82
95
keywords. insert ( "ret" , token. RET ( ) ) ;
83
96
keywords. insert ( "be" , token. BE ( ) ) ;
84
97
85
- ret reader( rdr, filename, rdr. getc ( ) as char , 1 u , 1 u ,
86
- keywords, reserved) ;
98
+ ret reader( rdr, filename, rdr. getc ( ) as char , rdr . getc ( ) as char ,
99
+ 1 u , 1 u , keywords, reserved) ;
87
100
}
88
101
89
102
@@ -116,201 +129,196 @@ fn is_whitespace(char c) -> bool {
116
129
ret c == ' ' || c == '\t' || c == '\r' || c == '\n' ;
117
130
}
118
131
119
- fn consume_any_whitespace ( stdio_reader rdr, char c) -> char {
120
- auto c1 = c;
121
- while ( is_whitespace ( c1) ) {
122
- c1 = rdr. getc ( ) as char ;
132
+ fn consume_any_whitespace ( reader rdr) {
133
+ while ( is_whitespace ( rdr. curr ( ) ) ) {
134
+ rdr. bump ( ) ;
123
135
}
124
- be consume_any_line_comment ( rdr, c1 ) ;
136
+ be consume_any_line_comment ( rdr) ;
125
137
}
126
138
127
- fn consume_any_line_comment ( stdio_reader rdr, char c) -> char {
128
- auto c1 = c;
129
- if ( c1 == '/' ) {
130
- auto c2 = rdr. getc ( ) as char ;
131
- if ( c2 == '/' ) {
132
- while ( c1 != '\n' ) {
133
- c1 = rdr. getc ( ) as char ;
139
+ fn consume_any_line_comment ( reader rdr) {
140
+ if ( rdr. curr ( ) == '/' ) {
141
+ if ( rdr. next ( ) == '/' ) {
142
+ while ( rdr. curr ( ) != '\n' ) {
143
+ rdr. bump ( ) ;
134
144
}
135
145
// Restart whitespace munch.
136
- be consume_any_whitespace ( rdr, c1 ) ;
146
+ be consume_any_whitespace ( rdr) ;
137
147
}
138
148
}
139
- ret c;
140
149
}
141
150
142
- fn next_token ( stdio_reader rdr) -> token . token {
143
- auto eof = ( -1 ) as char ;
144
- auto c = rdr. getc ( ) as char ;
151
+ fn next_token ( reader rdr) -> token . token {
145
152
auto accum_str = "" ;
146
153
auto accum_int = 0 ;
147
154
148
- fn next ( stdio_reader rdr) -> char {
149
- ret rdr. getc ( ) as char ;
150
- }
151
-
152
- fn forget ( stdio_reader rdr, char c) {
153
- rdr. ungetc ( c as int ) ;
154
- }
155
+ consume_any_whitespace ( rdr) ;
155
156
156
- c = consume_any_whitespace ( rdr, c ) ;
157
+ if ( rdr. is_eof ( ) ) { ret token . EOF ( ) ; }
157
158
158
- if ( c == eof ) { ret token . EOF ( ) ; }
159
+ auto c = rdr . curr ( ) ;
159
160
160
161
if ( is_alpha ( c) ) {
161
- while ( is_alpha ( c) ) {
162
+ while ( is_alpha ( rdr. curr ( ) ) ) {
163
+ c = rdr. curr ( ) ;
162
164
accum_str += ( c as u8 ) ;
163
- c = next ( rdr) ;
165
+ rdr. bump ( ) ;
164
166
}
165
- forget ( rdr, c) ;
166
167
ret token. IDENT ( accum_str) ;
167
168
}
168
169
169
170
if ( is_dec_digit ( c) ) {
170
171
if ( c == '0' ) {
172
+ log "fixme: leading zero" ;
173
+ fail;
171
174
} else {
172
175
while ( is_dec_digit ( c) ) {
176
+ c = rdr. curr ( ) ;
173
177
accum_int *= 10 ;
174
178
accum_int += ( c as int ) - ( '0' as int ) ;
175
- c = next ( rdr) ;
179
+ rdr. bump ( ) ;
176
180
}
177
- forget ( rdr, c) ;
178
181
ret token. LIT_INT ( accum_int) ;
179
182
}
180
183
}
181
184
182
185
183
- fn op_or_opeq ( stdio_reader rdr, char c2,
184
- token . op op) -> token . token {
185
- if ( c2 == '=' ) {
186
+ fn op_or_opeq ( reader rdr, token . op op) -> token . token {
187
+ rdr. bump ( ) ;
188
+ if ( rdr. next ( ) == '=' ) {
189
+ rdr. bump ( ) ;
186
190
ret token. OPEQ ( op) ;
187
191
} else {
188
- forget ( rdr, c2) ;
189
192
ret token. OP ( op) ;
190
193
}
191
194
}
192
195
193
196
alt ( c) {
194
197
// One-byte tokens.
195
- case ( ';' ) { ret token. SEMI ( ) ; }
196
- case ( ',' ) { ret token. COMMA ( ) ; }
197
- case ( '.' ) { ret token. DOT ( ) ; }
198
- case ( '(' ) { ret token. LPAREN ( ) ; }
199
- case ( ')' ) { ret token. RPAREN ( ) ; }
200
- case ( '{' ) { ret token. LBRACE ( ) ; }
201
- case ( '}' ) { ret token. RBRACE ( ) ; }
202
- case ( '[' ) { ret token. LBRACKET ( ) ; }
203
- case ( ']' ) { ret token. RBRACKET ( ) ; }
204
- case ( '@' ) { ret token. AT ( ) ; }
205
- case ( '#' ) { ret token. POUND ( ) ; }
198
+ case ( ';' ) { rdr . bump ( ) ; ret token. SEMI ( ) ; }
199
+ case ( ',' ) { rdr . bump ( ) ; ret token. COMMA ( ) ; }
200
+ case ( '.' ) { rdr . bump ( ) ; ret token. DOT ( ) ; }
201
+ case ( '(' ) { rdr . bump ( ) ; ret token. LPAREN ( ) ; }
202
+ case ( ')' ) { rdr . bump ( ) ; ret token. RPAREN ( ) ; }
203
+ case ( '{' ) { rdr . bump ( ) ; ret token. LBRACE ( ) ; }
204
+ case ( '}' ) { rdr . bump ( ) ; ret token. RBRACE ( ) ; }
205
+ case ( '[' ) { rdr . bump ( ) ; ret token. LBRACKET ( ) ; }
206
+ case ( ']' ) { rdr . bump ( ) ; ret token. RBRACKET ( ) ; }
207
+ case ( '@' ) { rdr . bump ( ) ; ret token. AT ( ) ; }
208
+ case ( '#' ) { rdr . bump ( ) ; ret token. POUND ( ) ; }
206
209
207
210
// Multi-byte tokens.
208
211
case ( '=' ) {
209
- auto c2 = next ( rdr) ;
210
- if ( c2 == '=' ) {
212
+ if ( rdr. next ( ) == '=' ) {
213
+ rdr. bump ( ) ;
214
+ rdr. bump ( ) ;
211
215
ret token. OP ( token. EQEQ ( ) ) ;
212
216
} else {
213
- forget ( rdr, c2 ) ;
217
+ rdr. bump ( ) ;
214
218
ret token. OP ( token. EQ ( ) ) ;
215
219
}
216
220
}
217
221
218
222
case ( '\'' ) {
219
- // FIXME: general utf8-consumption support.
220
- auto c2 = next ( rdr) ;
223
+ rdr . bump ( ) ;
224
+ auto c2 = rdr. curr ( ) ;
221
225
if ( c2 == '\\' ) {
222
- c2 = next ( rdr) ;
223
- alt ( c2) {
224
- case ( 'n' ) { c2 = '\n' ; }
225
- case ( 'r' ) { c2 = '\r' ; }
226
- case ( 't' ) { c2 = '\t' ; }
227
- case ( '\\' ) { c2 = '\\' ; }
228
- case ( '\'' ) { c2 = '\'' ; }
226
+ alt ( rdr. next ( ) ) {
227
+ case ( 'n' ) { rdr. bump ( ) ; c2 = '\n' ; }
228
+ case ( 'r' ) { rdr. bump ( ) ; c2 = '\r' ; }
229
+ case ( 't' ) { rdr. bump ( ) ; c2 = '\t' ; }
230
+ case ( '\\' ) { rdr. bump ( ) ; c2 = '\\' ; }
231
+ case ( '\'' ) { rdr. bump ( ) ; c2 = '\'' ; }
229
232
// FIXME: unicode numeric escapes.
230
- case ( _ ) {
233
+ case ( c2 ) {
231
234
log "unknown character escape" ;
232
235
log c2;
233
236
fail;
234
237
}
235
238
}
236
239
}
237
- if ( next ( rdr) != '\'' ) {
240
+
241
+ if ( rdr. next ( ) != '\'' ) {
238
242
log "unterminated character constant" ;
239
243
fail;
240
244
}
245
+ rdr. bump ( ) ;
246
+ rdr. bump ( ) ;
241
247
ret token. LIT_CHAR ( c2) ;
242
248
}
243
249
244
250
case ( '"' ) {
251
+ rdr. bump ( ) ;
245
252
// FIXME: general utf8-consumption support.
246
- auto c2 = next ( rdr) ;
247
- while ( c2 != '"' ) {
248
- alt ( c2) {
253
+ while ( rdr. curr ( ) != '"' ) {
254
+ alt ( rdr. curr ( ) ) {
249
255
case ( '\\' ) {
250
- c2 = next ( rdr) ;
251
- alt ( c2) {
252
- case ( 'n' ) { accum_str += '\n' as u8 ; }
253
- case ( 'r' ) { accum_str += '\r' as u8 ; }
254
- case ( 't' ) { accum_str += '\t' as u8 ; }
255
- case ( '\\' ) { accum_str += '\\' as u8 ; }
256
- case ( '"' ) { accum_str += '"' as u8 ; }
256
+ alt ( rdr. next ( ) ) {
257
+ case ( 'n' ) { rdr. bump ( ) ; accum_str += '\n' as u8 ; }
258
+ case ( 'r' ) { rdr. bump ( ) ; accum_str += '\r' as u8 ; }
259
+ case ( 't' ) { rdr. bump ( ) ; accum_str += '\t' as u8 ; }
260
+ case ( '\\' ) { rdr. bump ( ) ; accum_str += '\\' as u8 ; }
261
+ case ( '"' ) { rdr. bump ( ) ; accum_str += '"' as u8 ; }
257
262
// FIXME: unicode numeric escapes.
258
- case ( _ ) {
263
+ case ( c2 ) {
259
264
log "unknown string escape" ;
260
265
log c2;
261
266
fail;
262
267
}
263
268
}
264
269
}
265
270
case ( _) {
266
- accum_str += c2 as u8 ;
271
+ accum_str += rdr . curr ( ) as u8 ;
267
272
}
268
273
}
269
- c2 = next ( rdr) ;
274
+ rdr. bump ( ) ;
270
275
}
276
+ rdr. bump ( ) ;
271
277
ret token. LIT_STR ( accum_str) ;
272
278
}
273
279
274
280
case ( '-' ) {
275
- auto c2 = next ( rdr) ;
276
- if ( c2 == '>' ) {
281
+ if ( rdr. next ( ) == '>' ) {
282
+ rdr. bump ( ) ;
283
+ rdr. bump ( ) ;
277
284
ret token. RARROW ( ) ;
278
285
} else {
279
- ret op_or_opeq ( rdr, c2 , token. MINUS ( ) ) ;
286
+ ret op_or_opeq ( rdr, token. MINUS ( ) ) ;
280
287
}
281
288
}
282
289
283
290
case ( '&' ) {
284
- auto c2 = next ( rdr) ;
285
- if ( c2 == '&' ) {
291
+ if ( rdr. next ( ) == '&' ) {
292
+ rdr. bump ( ) ;
293
+ rdr. bump ( ) ;
286
294
ret token. OP ( token. ANDAND ( ) ) ;
287
295
} else {
288
- ret op_or_opeq ( rdr, c2 , token. AND ( ) ) ;
296
+ ret op_or_opeq ( rdr, token. AND ( ) ) ;
289
297
}
290
298
}
291
299
292
300
case ( '+' ) {
293
- ret op_or_opeq ( rdr, next ( rdr ) , token. PLUS ( ) ) ;
301
+ ret op_or_opeq ( rdr, token. PLUS ( ) ) ;
294
302
}
295
303
296
304
case ( '*' ) {
297
- ret op_or_opeq ( rdr, next ( rdr ) , token. STAR ( ) ) ;
305
+ ret op_or_opeq ( rdr, token. STAR ( ) ) ;
298
306
}
299
307
300
308
case ( '/' ) {
301
- ret op_or_opeq ( rdr, next ( rdr ) , token. STAR ( ) ) ;
309
+ ret op_or_opeq ( rdr, token. STAR ( ) ) ;
302
310
}
303
311
304
312
case ( '!' ) {
305
- ret op_or_opeq ( rdr, next ( rdr ) , token. NOT ( ) ) ;
313
+ ret op_or_opeq ( rdr, token. NOT ( ) ) ;
306
314
}
307
315
308
316
case ( '^' ) {
309
- ret op_or_opeq ( rdr, next ( rdr ) , token. CARET ( ) ) ;
317
+ ret op_or_opeq ( rdr, token. CARET ( ) ) ;
310
318
}
311
319
312
320
case ( '%' ) {
313
- ret op_or_opeq ( rdr, next ( rdr ) , token. PERCENT ( ) ) ;
321
+ ret op_or_opeq ( rdr, token. PERCENT ( ) ) ;
314
322
}
315
323
316
324
}
0 commit comments