Skip to content

Commit 094d31f

Browse files
committed
Make lexer buffer the whole file
This way, it won't have to go through a bunch of calls for each byte fetched.
1 parent cae703c commit 094d31f

File tree

2 files changed

+74
-59
lines changed

2 files changed

+74
-59
lines changed

src/comp/front/lexer.rs

Lines changed: 66 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -11,88 +11,95 @@ import util.common;
1111
import util.common.new_str_hash;
1212

1313
state type reader = state obj {
14-
fn is_eof() -> bool;
15-
fn curr() -> char;
16-
fn next() -> char;
17-
impure fn bump();
18-
fn mark();
19-
fn get_filename() -> str;
20-
fn get_mark_pos() -> common.pos;
21-
fn get_curr_pos() -> common.pos;
22-
fn get_keywords() -> hashmap[str,token.token];
23-
fn get_reserved() -> hashmap[str,()];
14+
fn is_eof() -> bool;
15+
fn curr() -> char;
16+
fn next() -> char;
17+
impure fn init();
18+
impure fn bump();
19+
fn mark();
20+
fn get_filename() -> str;
21+
fn get_mark_pos() -> common.pos;
22+
fn get_curr_pos() -> common.pos;
23+
fn get_keywords() -> hashmap[str,token.token];
24+
fn get_reserved() -> hashmap[str,()];
2425
};
2526

2627
impure fn new_reader(io.reader rdr, str filename) -> reader
2728
{
28-
state obj reader(io.reader rdr,
29+
state obj reader(str file,
2930
str filename,
30-
mutable char c,
31-
mutable char n,
31+
uint len,
32+
mutable uint pos,
33+
mutable char ch,
3234
mutable uint mark_line,
3335
mutable uint mark_col,
3436
mutable uint line,
3537
mutable uint col,
3638
hashmap[str,token.token] keywords,
3739
hashmap[str,()] reserved) {
3840

39-
fn is_eof() -> bool {
40-
ret c == (-1) as char;
41-
}
41+
fn is_eof() -> bool {
42+
ret ch == -1 as char;
43+
}
4244

43-
fn get_curr_pos() -> common.pos {
44-
ret rec(line=line, col=col);
45-
}
45+
fn get_curr_pos() -> common.pos {
46+
ret rec(line=line, col=col);
47+
}
4648

47-
fn get_mark_pos() -> common.pos {
48-
ret rec(line=mark_line, col=mark_col);
49-
}
49+
fn get_mark_pos() -> common.pos {
50+
ret rec(line=mark_line, col=mark_col);
51+
}
5052

51-
fn get_filename() -> str {
52-
ret filename;
53-
}
53+
fn get_filename() -> str {
54+
ret filename;
55+
}
5456

55-
fn curr() -> char {
56-
ret c;
57-
}
57+
fn curr() -> char {
58+
ret ch;
59+
}
5860

59-
fn next() -> char {
60-
ret n;
61+
fn next() -> char {
62+
if (pos < len) {ret _str.char_at(file, pos);}
63+
else {ret -1 as char;}
64+
}
65+
66+
impure fn init() {
67+
if (pos < len) {
68+
auto next = _str.char_range_at(file, pos);
69+
pos = next._1;
70+
ch = next._0;
6171
}
72+
}
6273

63-
impure fn bump() {
64-
65-
let char prev = c;
66-
67-
c = n;
68-
69-
if (c == (-1) as char) {
70-
ret;
71-
}
72-
73-
if (prev == '\n') {
74+
impure fn bump() {
75+
if (pos < len) {
76+
if (ch == '\n') {
7477
line += 1u;
7578
col = 0u;
7679
} else {
7780
col += 1u;
7881
}
79-
80-
n = rdr.read_char();
82+
auto next = _str.char_range_at(file, pos);
83+
pos = next._1;
84+
ch = next._0;
85+
} else {
86+
ch = -1 as char;
8187
}
88+
}
8289

83-
fn mark() {
84-
mark_line = line;
85-
mark_col = col;
86-
}
90+
fn mark() {
91+
mark_line = line;
92+
mark_col = col;
93+
}
8794

88-
fn get_keywords() -> hashmap[str,token.token] {
89-
ret keywords;
90-
}
95+
fn get_keywords() -> hashmap[str,token.token] {
96+
ret keywords;
97+
}
9198

92-
fn get_reserved() -> hashmap[str,()] {
93-
ret reserved;
94-
}
99+
fn get_reserved() -> hashmap[str,()] {
100+
ret reserved;
95101
}
102+
}
96103

97104
auto keywords = new_str_hash[token.token]();
98105

@@ -208,13 +215,14 @@ impure fn new_reader(io.reader rdr, str filename) -> reader
208215
reserved.insert("m128", ()); // IEEE 754-2008 'decimal128'
209216
reserved.insert("dec", ()); // One of m32, m64, m128
210217

211-
ret reader(rdr, filename, rdr.read_char(),
212-
rdr.read_char(), 1u, 0u, 1u, 0u, keywords, reserved);
218+
auto file = _str.unsafe_from_bytes(rdr.read_whole_stream());
219+
auto rd = reader(file, filename, _str.byte_len(file), 0u, -1 as char,
220+
1u, 0u, 1u, 0u, keywords, reserved);
221+
rd.init();
222+
ret rd;
213223
}
214224

215225

216-
217-
218226
fn in_range(char c, char lo, char hi) -> bool {
219227
ret lo <= c && c <= hi;
220228
}
@@ -689,7 +697,6 @@ impure fn next_token(reader rdr) -> token.token {
689697

690698
case ('"') {
691699
rdr.bump();
692-
// FIXME: general utf8-consumption support.
693700
while (rdr.curr() != '"') {
694701
alt (rdr.curr()) {
695702
case ('\\') {
@@ -850,7 +857,7 @@ impure fn read_block_comment(reader rdr) -> cmnt {
850857

851858
impure fn gather_comments(str path) -> vec[cmnt] {
852859
auto srdr = io.file_reader(path);
853-
auto rdr = lexer.new_reader(srdr, path);
860+
auto rdr = new_reader(srdr, path);
854861
let vec[cmnt] comments = vec();
855862
while (!rdr.is_eof()) {
856863
while (true) {

src/lib/io.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ type reader =
4141
impure fn read_le_uint(uint size) -> uint;
4242
impure fn read_le_int(uint size) -> int;
4343
impure fn read_be_uint(uint size) -> uint;
44+
impure fn read_whole_stream() -> vec[u8];
4445

4546
impure fn seek(int offset, seek_style whence);
4647
impure fn tell() -> uint; // FIXME: eventually u64
@@ -170,6 +171,13 @@ state obj new_reader(buf_reader rdr) {
170171
}
171172
ret val;
172173
}
174+
impure fn read_whole_stream() -> vec[u8] {
175+
let vec[u8] buf = vec();
176+
while (!rdr.eof()) {
177+
buf += rdr.read(2048u);
178+
}
179+
ret buf;
180+
}
173181
impure fn seek(int offset, seek_style whence) {
174182
ret rdr.seek(offset, whence);
175183
}

0 commit comments

Comments
 (0)