Skip to content

Commit 56cfb78

Browse files
committed
---
yaml --- r: 616 b: refs/heads/master c: 1428b59 h: refs/heads/master v: v3
1 parent c5e481f commit 56cfb78

File tree

3 files changed

+104
-95
lines changed

3 files changed

+104
-95
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
---
2-
refs/heads/master: 88c9759347dddb61cb9e9a1e9d524b365857cf67
2+
refs/heads/master: 1428b59a19610d838d7849c7ac518c40281fddd1

trunk/src/comp/fe/lexer.rs

Lines changed: 100 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ fn new_str_hash[V]() -> map.hashmap[str,V] {
1111

1212
type reader = obj {
1313
fn is_eof() -> bool;
14-
fn peek() -> char;
14+
fn curr() -> char;
15+
fn next() -> char;
1516
fn bump();
16-
fn get_pos() -> tup(str,uint,uint);
17+
fn get_curr_pos() -> tup(str,uint,uint);
1718
fn get_keywords() -> hashmap[str,token.token];
1819
fn get_reserved() -> hashmap[str,()];
1920
};
@@ -23,6 +24,7 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
2324
obj reader(stdio_reader rdr,
2425
str filename,
2526
mutable char c,
27+
mutable char n,
2628
mutable uint line,
2729
mutable uint col,
2830
hashmap[str,token.token] keywords,
@@ -32,22 +34,33 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
3234
ret c == (-1) as char;
3335
}
3436

35-
fn get_pos() -> tup(str,uint,uint) {
37+
fn get_curr_pos() -> tup(str,uint,uint) {
3638
ret tup(filename, line, col);
3739
}
3840

39-
fn peek() -> char {
41+
fn curr() -> char {
4042
ret c;
4143
}
4244

45+
fn next() -> char {
46+
ret n;
47+
}
48+
4349
fn bump() {
44-
c = rdr.getc() as char;
50+
c = n;
51+
52+
if (c == (-1) as char) {
53+
ret;
54+
}
55+
4556
if (c == '\n') {
4657
line += 1u;
4758
col = 0u;
4859
} else {
4960
col += 1u;
5061
}
62+
63+
n = rdr.getc() as char;
5164
}
5265

5366
fn get_keywords() -> hashmap[str,token.token] {
@@ -82,8 +95,8 @@ fn new_reader(stdio_reader rdr, str filename) -> reader
8295
keywords.insert("ret", token.RET());
8396
keywords.insert("be", token.BE());
8497

85-
ret reader(rdr, filename, rdr.getc() as char, 1u, 1u,
86-
keywords, reserved);
98+
ret reader(rdr, filename, rdr.getc() as char, rdr.getc() as char,
99+
1u, 1u, keywords, reserved);
87100
}
88101

89102

@@ -116,201 +129,196 @@ fn is_whitespace(char c) -> bool {
116129
ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
117130
}
118131

119-
fn consume_any_whitespace(stdio_reader rdr, char c) -> char {
120-
auto c1 = c;
121-
while (is_whitespace(c1)) {
122-
c1 = rdr.getc() as char;
132+
fn consume_any_whitespace(reader rdr) {
133+
while (is_whitespace(rdr.curr())) {
134+
rdr.bump();
123135
}
124-
be consume_any_line_comment(rdr, c1);
136+
be consume_any_line_comment(rdr);
125137
}
126138

127-
fn consume_any_line_comment(stdio_reader rdr, char c) -> char {
128-
auto c1 = c;
129-
if (c1 == '/') {
130-
auto c2 = rdr.getc() as char;
131-
if (c2 == '/') {
132-
while (c1 != '\n') {
133-
c1 = rdr.getc() as char;
139+
fn consume_any_line_comment(reader rdr) {
140+
if (rdr.curr() == '/') {
141+
if (rdr.next() == '/') {
142+
while (rdr.curr() != '\n') {
143+
rdr.bump();
134144
}
135145
// Restart whitespace munch.
136-
be consume_any_whitespace(rdr, c1);
146+
be consume_any_whitespace(rdr);
137147
}
138148
}
139-
ret c;
140149
}
141150

142-
fn next_token(stdio_reader rdr) -> token.token {
143-
auto eof = (-1) as char;
144-
auto c = rdr.getc() as char;
151+
fn next_token(reader rdr) -> token.token {
145152
auto accum_str = "";
146153
auto accum_int = 0;
147154

148-
fn next(stdio_reader rdr) -> char {
149-
ret rdr.getc() as char;
150-
}
151-
152-
fn forget(stdio_reader rdr, char c) {
153-
rdr.ungetc(c as int);
154-
}
155+
consume_any_whitespace(rdr);
155156

156-
c = consume_any_whitespace(rdr, c);
157+
if (rdr.is_eof()) { ret token.EOF(); }
157158

158-
if (c == eof) { ret token.EOF(); }
159+
auto c = rdr.curr();
159160

160161
if (is_alpha(c)) {
161-
while (is_alpha(c)) {
162+
while (is_alpha(rdr.curr())) {
163+
c = rdr.curr();
162164
accum_str += (c as u8);
163-
c = next(rdr);
165+
rdr.bump();
164166
}
165-
forget(rdr, c);
166167
ret token.IDENT(accum_str);
167168
}
168169

169170
if (is_dec_digit(c)) {
170171
if (c == '0') {
172+
log "fixme: leading zero";
173+
fail;
171174
} else {
172175
while (is_dec_digit(c)) {
176+
c = rdr.curr();
173177
accum_int *= 10;
174178
accum_int += (c as int) - ('0' as int);
175-
c = next(rdr);
179+
rdr.bump();
176180
}
177-
forget(rdr, c);
178181
ret token.LIT_INT(accum_int);
179182
}
180183
}
181184

182185

183-
fn op_or_opeq(stdio_reader rdr, char c2,
184-
token.op op) -> token.token {
185-
if (c2 == '=') {
186+
fn op_or_opeq(reader rdr, token.op op) -> token.token {
187+
rdr.bump();
188+
if (rdr.next() == '=') {
189+
rdr.bump();
186190
ret token.OPEQ(op);
187191
} else {
188-
forget(rdr, c2);
189192
ret token.OP(op);
190193
}
191194
}
192195

193196
alt (c) {
194197
// One-byte tokens.
195-
case (';') { ret token.SEMI(); }
196-
case (',') { ret token.COMMA(); }
197-
case ('.') { ret token.DOT(); }
198-
case ('(') { ret token.LPAREN(); }
199-
case (')') { ret token.RPAREN(); }
200-
case ('{') { ret token.LBRACE(); }
201-
case ('}') { ret token.RBRACE(); }
202-
case ('[') { ret token.LBRACKET(); }
203-
case (']') { ret token.RBRACKET(); }
204-
case ('@') { ret token.AT(); }
205-
case ('#') { ret token.POUND(); }
198+
case (';') { rdr.bump(); ret token.SEMI(); }
199+
case (',') { rdr.bump(); ret token.COMMA(); }
200+
case ('.') { rdr.bump(); ret token.DOT(); }
201+
case ('(') { rdr.bump(); ret token.LPAREN(); }
202+
case (')') { rdr.bump(); ret token.RPAREN(); }
203+
case ('{') { rdr.bump(); ret token.LBRACE(); }
204+
case ('}') { rdr.bump(); ret token.RBRACE(); }
205+
case ('[') { rdr.bump(); ret token.LBRACKET(); }
206+
case (']') { rdr.bump(); ret token.RBRACKET(); }
207+
case ('@') { rdr.bump(); ret token.AT(); }
208+
case ('#') { rdr.bump(); ret token.POUND(); }
206209

207210
// Multi-byte tokens.
208211
case ('=') {
209-
auto c2 = next(rdr);
210-
if (c2 == '=') {
212+
if (rdr.next() == '=') {
213+
rdr.bump();
214+
rdr.bump();
211215
ret token.OP(token.EQEQ());
212216
} else {
213-
forget(rdr, c2);
217+
rdr.bump();
214218
ret token.OP(token.EQ());
215219
}
216220
}
217221

218222
case ('\'') {
219-
// FIXME: general utf8-consumption support.
220-
auto c2 = next(rdr);
223+
rdr.bump();
224+
auto c2 = rdr.curr();
221225
if (c2 == '\\') {
222-
c2 = next(rdr);
223-
alt (c2) {
224-
case ('n') { c2 = '\n'; }
225-
case ('r') { c2 = '\r'; }
226-
case ('t') { c2 = '\t'; }
227-
case ('\\') { c2 = '\\'; }
228-
case ('\'') { c2 = '\''; }
226+
alt (rdr.next()) {
227+
case ('n') { rdr.bump(); c2 = '\n'; }
228+
case ('r') { rdr.bump(); c2 = '\r'; }
229+
case ('t') { rdr.bump(); c2 = '\t'; }
230+
case ('\\') { rdr.bump(); c2 = '\\'; }
231+
case ('\'') { rdr.bump(); c2 = '\''; }
229232
// FIXME: unicode numeric escapes.
230-
case (_) {
233+
case (c2) {
231234
log "unknown character escape";
232235
log c2;
233236
fail;
234237
}
235238
}
236239
}
237-
if (next(rdr) != '\'') {
240+
241+
if (rdr.next() != '\'') {
238242
log "unterminated character constant";
239243
fail;
240244
}
245+
rdr.bump();
246+
rdr.bump();
241247
ret token.LIT_CHAR(c2);
242248
}
243249

244250
case ('"') {
251+
rdr.bump();
245252
// FIXME: general utf8-consumption support.
246-
auto c2 = next(rdr);
247-
while (c2 != '"') {
248-
alt (c2) {
253+
while (rdr.curr() != '"') {
254+
alt (rdr.curr()) {
249255
case ('\\') {
250-
c2 = next(rdr);
251-
alt (c2) {
252-
case ('n') { accum_str += '\n' as u8; }
253-
case ('r') { accum_str += '\r' as u8; }
254-
case ('t') { accum_str += '\t' as u8; }
255-
case ('\\') { accum_str += '\\' as u8; }
256-
case ('"') { accum_str += '"' as u8; }
256+
alt (rdr.next()) {
257+
case ('n') { rdr.bump(); accum_str += '\n' as u8; }
258+
case ('r') { rdr.bump(); accum_str += '\r' as u8; }
259+
case ('t') { rdr.bump(); accum_str += '\t' as u8; }
260+
case ('\\') { rdr.bump(); accum_str += '\\' as u8; }
261+
case ('"') { rdr.bump(); accum_str += '"' as u8; }
257262
// FIXME: unicode numeric escapes.
258-
case (_) {
263+
case (c2) {
259264
log "unknown string escape";
260265
log c2;
261266
fail;
262267
}
263268
}
264269
}
265270
case (_) {
266-
accum_str += c2 as u8;
271+
accum_str += rdr.curr() as u8;
267272
}
268273
}
269-
c2 = next(rdr);
274+
rdr.bump();
270275
}
276+
rdr.bump();
271277
ret token.LIT_STR(accum_str);
272278
}
273279

274280
case ('-') {
275-
auto c2 = next(rdr);
276-
if (c2 == '>') {
281+
if (rdr.next() == '>') {
282+
rdr.bump();
283+
rdr.bump();
277284
ret token.RARROW();
278285
} else {
279-
ret op_or_opeq(rdr, c2, token.MINUS());
286+
ret op_or_opeq(rdr, token.MINUS());
280287
}
281288
}
282289

283290
case ('&') {
284-
auto c2 = next(rdr);
285-
if (c2 == '&') {
291+
if (rdr.next() == '&') {
292+
rdr.bump();
293+
rdr.bump();
286294
ret token.OP(token.ANDAND());
287295
} else {
288-
ret op_or_opeq(rdr, c2, token.AND());
296+
ret op_or_opeq(rdr, token.AND());
289297
}
290298
}
291299

292300
case ('+') {
293-
ret op_or_opeq(rdr, next(rdr), token.PLUS());
301+
ret op_or_opeq(rdr, token.PLUS());
294302
}
295303

296304
case ('*') {
297-
ret op_or_opeq(rdr, next(rdr), token.STAR());
305+
ret op_or_opeq(rdr, token.STAR());
298306
}
299307

300308
case ('/') {
301-
ret op_or_opeq(rdr, next(rdr), token.STAR());
309+
ret op_or_opeq(rdr, token.STAR());
302310
}
303311

304312
case ('!') {
305-
ret op_or_opeq(rdr, next(rdr), token.NOT());
313+
ret op_or_opeq(rdr, token.NOT());
306314
}
307315

308316
case ('^') {
309-
ret op_or_opeq(rdr, next(rdr), token.CARET());
317+
ret op_or_opeq(rdr, token.CARET());
310318
}
311319

312320
case ('%') {
313-
ret op_or_opeq(rdr, next(rdr), token.PERCENT());
321+
ret op_or_opeq(rdr, token.PERCENT());
314322
}
315323

316324
}

trunk/src/comp/fe/parser.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ state type parser =
88

99
fn new_parser(str path) -> parser {
1010
state obj stdio_parser(mutable token.token tok,
11-
_io.stdio_reader rdr)
11+
lexer.reader rdr)
1212
{
1313
state fn peek() -> token.token {
1414
ret tok;
@@ -17,7 +17,8 @@ fn new_parser(str path) -> parser {
1717
tok = lexer.next_token(rdr);
1818
}
1919
}
20-
auto rdr = _io.new_stdio_reader(path);
20+
auto srdr = _io.new_stdio_reader(path);
21+
auto rdr = lexer.new_reader(srdr, path);
2122
ret stdio_parser(lexer.next_token(rdr), rdr);
2223
}
2324

0 commit comments

Comments
 (0)