|
| 1 | +// Earley-like parser for macros. |
| 2 | +import parse::token; |
| 3 | +import parse::token::{token, EOF, to_str, whole_nt}; |
| 4 | +import parse::lexer::{reader, tt_reader, tt_reader_as_reader}; |
| 5 | +import parse::parser::{parser,SOURCE_FILE}; |
| 6 | +import parse::common::parser_common; |
| 7 | +import parse::parse_sess; |
| 8 | +import dvec::{dvec, extensions}; |
| 9 | +import ast::{matcher, mtc_tok, mtc_rep, mtc_bb}; |
| 10 | + |
| 11 | +/* This is an Earley-like parser, without support for nonterminals. This |
| 12 | +means that there are no completer or predictor rules, and therefore no need to |
| 13 | +store one column per token: instead, there's a set of current Earley items and |
| 14 | +a set of next ones. Instead of NTs, we have a special case for Kleene |
| 15 | +star. The big-O, in pathological cases, is worse than traditional Earley |
| 16 | +parsing, but it's an easier fit for Macro-by-Example-style rules, and I think |
| 17 | +the overhead is lower. */ |
| 18 | + |
| 19 | + |
| 20 | +/* to avoid costly uniqueness checks, we require that `mtc_rep` always has a |
| 21 | +nonempty body. */ |
| 22 | + |
| 23 | +enum matcher_pos_up { /* to break a circularity */ |
| 24 | + matcher_pos_up(option<matcher_pos>) |
| 25 | +} |
| 26 | + |
| 27 | +fn is_some(&&mpu: matcher_pos_up) -> bool { |
| 28 | + alt mpu { |
| 29 | + matcher_pos_up(none) { false } |
| 30 | + _ { true } |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +type matcher_pos = ~{ |
| 35 | + elts: [ast::matcher], // maybe should be /& ? Need to understand regions. |
| 36 | + sep: option<token>, |
| 37 | + mut idx: uint, |
| 38 | + mut up: matcher_pos_up, // mutable for swapping only |
| 39 | + matches: [dvec<@arb_depth>] |
| 40 | +}; |
| 41 | + |
| 42 | +fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos { |
| 43 | + alt mpu { |
| 44 | + matcher_pos_up(some(mp)) { copy mp } |
| 45 | + _ { fail } |
| 46 | + } |
| 47 | +} |
| 48 | + |
| 49 | +fn count_names(ms: [matcher]/&) -> uint { |
| 50 | + vec::foldl(0u, ms, {|ct, m| |
| 51 | + ct + alt m.node { |
| 52 | + mtc_tok(_) { 0u } |
| 53 | + mtc_rep(more_ms, _, _) { count_names(more_ms) } |
| 54 | + mtc_bb(_,_,_) { 1u } |
| 55 | + }}) |
| 56 | +} |
| 57 | + |
| 58 | +fn new_matcher_pos(ms: [matcher], sep: option<token>) -> matcher_pos { |
| 59 | + ~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none), |
| 60 | + matches: copy vec::from_fn(count_names(ms), {|_i| dvec::dvec()}) } |
| 61 | +} |
| 62 | + |
| 63 | +/* logically, an arb_depth should contain only one kind of nonterminal */ |
| 64 | +enum arb_depth { leaf(whole_nt), seq([@arb_depth]) } |
| 65 | + |
| 66 | +type earley_item = matcher_pos; |
| 67 | + |
| 68 | + |
| 69 | +fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: [matcher]) |
| 70 | + -> [@arb_depth] { |
| 71 | + let mut cur_eis = []; |
| 72 | + vec::push(cur_eis, new_matcher_pos(ms, none)); |
| 73 | + |
| 74 | + loop { |
| 75 | + let mut bb_eis = []; // black-box parsed by parser.rs |
| 76 | + let mut next_eis = []; // or proceed normally |
| 77 | + let mut eof_eis = []; |
| 78 | + |
| 79 | + let {tok: tok, sp: _} = rdr.peek(); |
| 80 | + |
| 81 | + /* we append new items to this while we go */ |
| 82 | + while cur_eis.len() > 0u { /* for each Earley Item */ |
| 83 | + let mut ei = vec::pop(cur_eis); |
| 84 | + |
| 85 | + let idx = ei.idx; |
| 86 | + let len = ei.elts.len(); |
| 87 | + |
| 88 | + /* at end of sequence */ |
| 89 | + if idx >= len { |
| 90 | + // can't move out of `alt`s, so: |
| 91 | + if is_some(ei.up) { |
| 92 | + // hack: a matcher sequence is repeating iff it has a |
| 93 | + // parent (the top level is just a container) |
| 94 | + |
| 95 | + |
| 96 | + // disregard separator, try to go up |
| 97 | + // (remove this condition to make trailing seps ok) |
| 98 | + if idx == len { |
| 99 | + // pop from the matcher position |
| 100 | + |
| 101 | + let new_pos = copy_up(ei.up); |
| 102 | + |
| 103 | + // update matches (the MBE "parse tree") by appending |
| 104 | + // each tree as a subtree. |
| 105 | + |
| 106 | + // I bet this is a perf problem: we're preemptively |
| 107 | + // doing a lot of array work that will get thrown away |
| 108 | + // most of the time. |
| 109 | + for ei.matches.eachi() { |idx, elt| |
| 110 | + new_pos.matches[idx].push(@seq(elt.get())); |
| 111 | + } |
| 112 | + |
| 113 | + new_pos.idx += 1u; |
| 114 | + vec::push(cur_eis, new_pos); |
| 115 | + } |
| 116 | + |
| 117 | + // can we go around again? |
| 118 | + |
| 119 | + // the *_t vars are workarounds for the lack of unary move |
| 120 | + alt copy ei.sep { |
| 121 | + some(t) if idx == len { // we need a separator |
| 122 | + if tok == t { //pass the separator |
| 123 | + let ei_t <- ei; |
| 124 | + ei_t.idx += 1u; |
| 125 | + vec::push(next_eis, ei_t); |
| 126 | + } |
| 127 | + } |
| 128 | + _ { // we don't need a separator |
| 129 | + let ei_t <- ei; |
| 130 | + ei_t.idx = 0u; |
| 131 | + vec::push(cur_eis, ei_t); |
| 132 | + } |
| 133 | + } |
| 134 | + } else { |
| 135 | + vec::push(eof_eis, ei); |
| 136 | + } |
| 137 | + } else { |
| 138 | + alt copy ei.elts[idx].node { |
| 139 | + /* need to descend into sequence */ |
| 140 | + mtc_rep(matchers, sep, zero_ok) { |
| 141 | + if zero_ok { |
| 142 | + let new_ei = copy ei; |
| 143 | + new_ei.idx += 1u; |
| 144 | + vec::push(cur_eis, new_ei); |
| 145 | + } |
| 146 | + |
| 147 | + let matches = vec::map(ei.matches, // fresh, same size: |
| 148 | + {|_m| dvec::<@arb_depth>()}); |
| 149 | + let ei_t <- ei; |
| 150 | + vec::push(cur_eis, ~{ |
| 151 | + elts: matchers, sep: sep, mut idx: 0u, |
| 152 | + mut up: matcher_pos_up(some(ei_t)), |
| 153 | + matches: matches |
| 154 | + }); |
| 155 | + } |
| 156 | + mtc_bb(_,_,_) { vec::push(bb_eis, ei) } |
| 157 | + mtc_tok(t) { |
| 158 | + let ei_t <- ei; |
| 159 | + if t == tok { ei_t.idx += 1u; vec::push(next_eis, ei_t)} |
| 160 | + } |
| 161 | + } |
| 162 | + } |
| 163 | + } |
| 164 | + |
| 165 | + /* error messages here could be improved with links to orig. rules */ |
| 166 | + if tok == EOF { |
| 167 | + if eof_eis.len() == 1u { |
| 168 | + let ret_val = vec::map(eof_eis[0u].matches, {|dv| dv.pop()}); |
| 169 | + ret ret_val; /* success */ |
| 170 | + } else if eof_eis.len() > 1u { |
| 171 | + rdr.fatal("Ambiguity: multiple successful parses"); |
| 172 | + } else { |
| 173 | + rdr.fatal("Unexpected end of macro invocation"); |
| 174 | + } |
| 175 | + } else { |
| 176 | + if (bb_eis.len() > 0u && next_eis.len() > 0u) |
| 177 | + || bb_eis.len() > 1u { |
| 178 | + let nts = str::connect(vec::map(bb_eis, {|ei| |
| 179 | + alt ei.elts[ei.idx].node |
| 180 | + { mtc_bb(_,name,_) { *name } _ { fail; } } |
| 181 | + }), " or "); |
| 182 | + rdr.fatal(#fmt["Local ambiguity: multiple parsing options: \ |
| 183 | + built-in NTs %s or %u other options.", |
| 184 | + nts, next_eis.len()]); |
| 185 | + } else if (bb_eis.len() == 0u && next_eis.len() == 0u) { |
| 186 | + rdr.fatal("No rules expected the token " |
| 187 | + + to_str(*rdr.interner(), tok)); |
| 188 | + } else if (next_eis.len() > 0u) { |
| 189 | + /* Now process the next token */ |
| 190 | + while(next_eis.len() > 0u) { |
| 191 | + vec::push(cur_eis, vec::pop(next_eis)); |
| 192 | + } |
| 193 | + rdr.next_token(); |
| 194 | + } else /* bb_eis.len() == 1 */ { |
| 195 | + let rust_parser = parser(sess, cfg, rdr.dup(), SOURCE_FILE); |
| 196 | + |
| 197 | + let ei = vec::pop(bb_eis); |
| 198 | + alt ei.elts[ei.idx].node { |
| 199 | + mtc_bb(_, name, idx) { |
| 200 | + ei.matches[idx].push(@leaf( |
| 201 | + parse_nt(rust_parser, *name))); |
| 202 | + ei.idx += 1u; |
| 203 | + } |
| 204 | + _ { fail; } |
| 205 | + } |
| 206 | + vec::push(cur_eis,ei); |
| 207 | + |
| 208 | + /* this would fail if zero-length tokens existed */ |
| 209 | + while rdr.peek().sp.lo < rust_parser.span.lo { |
| 210 | + rdr.next_token(); |
| 211 | + } |
| 212 | + } |
| 213 | + } |
| 214 | + |
| 215 | + assert cur_eis.len() > 0u; |
| 216 | + } |
| 217 | +} |
| 218 | + |
| 219 | +fn parse_nt(p: parser, name: str) -> whole_nt { |
| 220 | + alt name { |
| 221 | + "item" { alt p.parse_item([], ast::public) { |
| 222 | + some(i) { token::w_item(i) } |
| 223 | + none { p.fatal("expected an item keyword") } |
| 224 | + }} |
| 225 | + "block" { token::w_block(p.parse_block()) } |
| 226 | + "stmt" { token::w_stmt(p.parse_stmt([])) } |
| 227 | + "pat" { token::w_pat(p.parse_pat()) } |
| 228 | + "expr" { token::w_expr(p.parse_expr()) } |
| 229 | + "ty" { token::w_ty(p.parse_ty(false /* no need to disambiguate*/)) } |
| 230 | + // this could be handled like a token, since it is one |
| 231 | + "ident" { token::w_ident(p.parse_ident()) } |
| 232 | + "path" { token::w_path(p.parse_path_with_tps(false)) } |
| 233 | + _ { p.fatal("Unsupported builtin nonterminal parser: " + name)} |
| 234 | + } |
| 235 | +} |
| 236 | + |
| 237 | +// Local Variables: |
| 238 | +// mode: rust; |
| 239 | +// fill-column: 78; |
| 240 | +// indent-tabs-mode: nil |
| 241 | +// c-basic-offset: 4 |
| 242 | +// buffer-file-coding-system: utf-8-unix |
| 243 | +// End: |
0 commit comments