Skip to content

Commit 5cb8aa6

Browse files
committed
---
yaml --- r: 10720 b: refs/heads/snap-stage3 c: 4f10495 h: refs/heads/master v: v3
1 parent ccdc3d5 commit 5cb8aa6

File tree

9 files changed

+371
-30
lines changed

9 files changed

+371
-30
lines changed

[refs]

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
refs/heads/master: 2898dcc5d97da9427ac367542382b6239d9c0bbf
33
refs/heads/snap-stage1: e33de59e47c5076a89eadeb38f4934f58a3618a6
4-
refs/heads/snap-stage3: 650dfe58a3d0b41b561d8a68924f31e93f79d4bc
4+
refs/heads/snap-stage3: 4f104954a67ad736244ce212467290c836394fad
55
refs/heads/try: 2898dcc5d97da9427ac367542382b6239d9c0bbf
66
refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105

branches/snap-stage3/src/libsyntax/ast.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,17 @@ enum token_tree {
377377
tt_flat(span, token::token)
378378
}
379379

380+
#[auto_serialize]
381+
type matcher = spanned<matcher_>;
382+
383+
#[auto_serialize]
384+
enum matcher_ {
385+
mtc_tok(token::token),
386+
/* body, separator, zero ok? : */
387+
mtc_rep([matcher], option<token::token>, bool),
388+
mtc_bb(ident, ident, uint)
389+
}
390+
380391
#[auto_serialize]
381392
type mac = spanned<mac_>;
382393

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
// Earley-like parser for macros.
2+
import parse::token;
3+
import parse::token::{token, EOF, to_str, whole_nt};
4+
import parse::lexer::{reader, tt_reader, tt_reader_as_reader};
5+
import parse::parser::{parser,SOURCE_FILE};
6+
import parse::common::parser_common;
7+
import parse::parse_sess;
8+
import dvec::{dvec, extensions};
9+
import ast::{matcher, mtc_tok, mtc_rep, mtc_bb};
10+
11+
/* This is an Earley-like parser, without support for nonterminals. This
12+
means that there are no completer or predictor rules, and therefore no need to
13+
store one column per token: instead, there's a set of current Earley items and
14+
a set of next ones. Instead of NTs, we have a special case for Kleene
15+
star. The big-O, in pathological cases, is worse than traditional Earley
16+
parsing, but it's an easier fit for Macro-by-Example-style rules, and I think
17+
the overhead is lower. */
18+
19+
20+
/* to avoid costly uniqueness checks, we require that `mtc_rep` always has a
21+
nonempty body. */
22+
23+
enum matcher_pos_up { /* to break a circularity */
24+
matcher_pos_up(option<matcher_pos>)
25+
}
26+
27+
fn is_some(&&mpu: matcher_pos_up) -> bool {
28+
alt mpu {
29+
matcher_pos_up(none) { false }
30+
_ { true }
31+
}
32+
}
33+
34+
type matcher_pos = ~{
35+
elts: [ast::matcher], // maybe should be /& ? Need to understand regions.
36+
sep: option<token>,
37+
mut idx: uint,
38+
mut up: matcher_pos_up, // mutable for swapping only
39+
matches: [dvec<@arb_depth>]
40+
};
41+
42+
fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos {
43+
alt mpu {
44+
matcher_pos_up(some(mp)) { copy mp }
45+
_ { fail }
46+
}
47+
}
48+
49+
fn count_names(ms: [matcher]/&) -> uint {
50+
vec::foldl(0u, ms, {|ct, m|
51+
ct + alt m.node {
52+
mtc_tok(_) { 0u }
53+
mtc_rep(more_ms, _, _) { count_names(more_ms) }
54+
mtc_bb(_,_,_) { 1u }
55+
}})
56+
}
57+
58+
fn new_matcher_pos(ms: [matcher], sep: option<token>) -> matcher_pos {
59+
~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none),
60+
matches: copy vec::from_fn(count_names(ms), {|_i| dvec::dvec()}) }
61+
}
62+
63+
/* logically, an arb_depth should contain only one kind of nonterminal */
64+
enum arb_depth { leaf(whole_nt), seq([@arb_depth]) }
65+
66+
type earley_item = matcher_pos;
67+
68+
69+
fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: [matcher])
70+
-> [@arb_depth] {
71+
let mut cur_eis = [];
72+
vec::push(cur_eis, new_matcher_pos(ms, none));
73+
74+
loop {
75+
let mut bb_eis = []; // black-box parsed by parser.rs
76+
let mut next_eis = []; // or proceed normally
77+
let mut eof_eis = [];
78+
79+
let {tok: tok, sp: _} = rdr.peek();
80+
81+
/* we append new items to this while we go */
82+
while cur_eis.len() > 0u { /* for each Earley Item */
83+
let mut ei = vec::pop(cur_eis);
84+
85+
let idx = ei.idx;
86+
let len = ei.elts.len();
87+
88+
/* at end of sequence */
89+
if idx >= len {
90+
// can't move out of `alt`s, so:
91+
if is_some(ei.up) {
92+
// hack: a matcher sequence is repeating iff it has a
93+
// parent (the top level is just a container)
94+
95+
96+
// disregard separator, try to go up
97+
// (remove this condition to make trailing seps ok)
98+
if idx == len {
99+
// pop from the matcher position
100+
101+
let new_pos = copy_up(ei.up);
102+
103+
// update matches (the MBE "parse tree") by appending
104+
// each tree as a subtree.
105+
106+
// I bet this is a perf problem: we're preemptively
107+
// doing a lot of array work that will get thrown away
108+
// most of the time.
109+
for ei.matches.eachi() { |idx, elt|
110+
new_pos.matches[idx].push(@seq(elt.get()));
111+
}
112+
113+
new_pos.idx += 1u;
114+
vec::push(cur_eis, new_pos);
115+
}
116+
117+
// can we go around again?
118+
119+
// the *_t vars are workarounds for the lack of unary move
120+
alt copy ei.sep {
121+
some(t) if idx == len { // we need a separator
122+
if tok == t { //pass the separator
123+
let ei_t <- ei;
124+
ei_t.idx += 1u;
125+
vec::push(next_eis, ei_t);
126+
}
127+
}
128+
_ { // we don't need a separator
129+
let ei_t <- ei;
130+
ei_t.idx = 0u;
131+
vec::push(cur_eis, ei_t);
132+
}
133+
}
134+
} else {
135+
vec::push(eof_eis, ei);
136+
}
137+
} else {
138+
alt copy ei.elts[idx].node {
139+
/* need to descend into sequence */
140+
mtc_rep(matchers, sep, zero_ok) {
141+
if zero_ok {
142+
let new_ei = copy ei;
143+
new_ei.idx += 1u;
144+
vec::push(cur_eis, new_ei);
145+
}
146+
147+
let matches = vec::map(ei.matches, // fresh, same size:
148+
{|_m| dvec::<@arb_depth>()});
149+
let ei_t <- ei;
150+
vec::push(cur_eis, ~{
151+
elts: matchers, sep: sep, mut idx: 0u,
152+
mut up: matcher_pos_up(some(ei_t)),
153+
matches: matches
154+
});
155+
}
156+
mtc_bb(_,_,_) { vec::push(bb_eis, ei) }
157+
mtc_tok(t) {
158+
let ei_t <- ei;
159+
if t == tok { ei_t.idx += 1u; vec::push(next_eis, ei_t)}
160+
}
161+
}
162+
}
163+
}
164+
165+
/* error messages here could be improved with links to orig. rules */
166+
if tok == EOF {
167+
if eof_eis.len() == 1u {
168+
let ret_val = vec::map(eof_eis[0u].matches, {|dv| dv.pop()});
169+
ret ret_val; /* success */
170+
} else if eof_eis.len() > 1u {
171+
rdr.fatal("Ambiguity: multiple successful parses");
172+
} else {
173+
rdr.fatal("Unexpected end of macro invocation");
174+
}
175+
} else {
176+
if (bb_eis.len() > 0u && next_eis.len() > 0u)
177+
|| bb_eis.len() > 1u {
178+
let nts = str::connect(vec::map(bb_eis, {|ei|
179+
alt ei.elts[ei.idx].node
180+
{ mtc_bb(_,name,_) { *name } _ { fail; } }
181+
}), " or ");
182+
rdr.fatal(#fmt["Local ambiguity: multiple parsing options: \
183+
built-in NTs %s or %u other options.",
184+
nts, next_eis.len()]);
185+
} else if (bb_eis.len() == 0u && next_eis.len() == 0u) {
186+
rdr.fatal("No rules expected the token "
187+
+ to_str(*rdr.interner(), tok));
188+
} else if (next_eis.len() > 0u) {
189+
/* Now process the next token */
190+
while(next_eis.len() > 0u) {
191+
vec::push(cur_eis, vec::pop(next_eis));
192+
}
193+
rdr.next_token();
194+
} else /* bb_eis.len() == 1 */ {
195+
let rust_parser = parser(sess, cfg, rdr.dup(), SOURCE_FILE);
196+
197+
let ei = vec::pop(bb_eis);
198+
alt ei.elts[ei.idx].node {
199+
mtc_bb(_, name, idx) {
200+
ei.matches[idx].push(@leaf(
201+
parse_nt(rust_parser, *name)));
202+
ei.idx += 1u;
203+
}
204+
_ { fail; }
205+
}
206+
vec::push(cur_eis,ei);
207+
208+
/* this would fail if zero-length tokens existed */
209+
while rdr.peek().sp.lo < rust_parser.span.lo {
210+
rdr.next_token();
211+
}
212+
}
213+
}
214+
215+
assert cur_eis.len() > 0u;
216+
}
217+
}
218+
219+
fn parse_nt(p: parser, name: str) -> whole_nt {
220+
alt name {
221+
"item" { alt p.parse_item([], ast::public) {
222+
some(i) { token::w_item(i) }
223+
none { p.fatal("expected an item keyword") }
224+
}}
225+
"block" { token::w_block(p.parse_block()) }
226+
"stmt" { token::w_stmt(p.parse_stmt([])) }
227+
"pat" { token::w_pat(p.parse_pat()) }
228+
"expr" { token::w_expr(p.parse_expr()) }
229+
"ty" { token::w_ty(p.parse_ty(false /* no need to disambiguate*/)) }
230+
// this could be handled like a token, since it is one
231+
"ident" { token::w_ident(p.parse_ident()) }
232+
"path" { token::w_path(p.parse_path_with_tps(false)) }
233+
_ { p.fatal("Unsupported builtin nonterminal parser: " + name)}
234+
}
235+
}
236+
237+
// Local Variables:
238+
// mode: rust;
239+
// fill-column: 78;
240+
// indent-tabs-mode: nil
241+
// c-basic-offset: 4
242+
// buffer-file-coding-system: utf-8-unix
243+
// End:

branches/snap-stage3/src/libsyntax/parse/comments.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,9 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
199199

200200

201201
let bstart = rdr.pos;
202+
rdr.next_token();
202203
//discard, and look ahead; we're working with internal state
203-
let {tok: tok, sp: sp} = rdr.next_token();
204+
let {tok: tok, sp: sp} = rdr.peek();
204205
if token::is_lit(tok) {
205206
let s = get_str_from(rdr, bstart);
206207
vec::push(literals, {lit: s, pos: sp.lo});

branches/snap-stage3/src/libsyntax/parse/common.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ impl parser_common for parser {
111111
if !self.eat_keyword(word) {
112112
self.fatal("expecting " + word + ", found " +
113113
token_to_str(self.reader, self.token));
114+
}
114115
}
115-
}
116116

117117
fn is_restricted_keyword(word: str) -> bool {
118118
self.restricted_keywords.contains_key(word)

0 commit comments

Comments
 (0)