Skip to content

Commit db2a708

Browse files
bors[bot]matklad
andauthored
Merge #10995
10995: internal: switch from trait-based TokenSource to simple struct of arrays r=matklad a=matklad cc #10765 The idea here is to try to simplify the interface as best as we can. The original trait-based approach is a bit over-engineered and hard to debug. Here, we replace callback with just data. The next PR in series will replace the output `TreeSink` trait with data as well. The biggest drawback here is that we now require to materialize all parser's input up-front. This is a bad fit for macro by example: when you parse `$e:expr`, you might consume only part of the input. However, today's trait-based solution doesn't really help -- we were already materializing the whole thing! So, let's keep it simple! Co-authored-by: Aleksey Kladov <[email protected]>
2 parents fc628cf + 3b5b988 commit db2a708

File tree

17 files changed

+310
-362
lines changed

17 files changed

+310
-362
lines changed

crates/mbe/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ mod parser;
1010
mod expander;
1111
mod syntax_bridge;
1212
mod tt_iter;
13-
mod subtree_source;
13+
mod to_parser_tokens;
1414

1515
#[cfg(test)]
1616
mod benchmark;

crates/mbe/src/subtree_source.rs

Lines changed: 0 additions & 174 deletions
This file was deleted.

crates/mbe/src/syntax_bridge.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use syntax::{
1212
use tt::buffer::{Cursor, TokenBuffer};
1313

1414
use crate::{
15-
subtree_source::SubtreeTokenSource, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
15+
to_parser_tokens::to_parser_tokens, tt_iter::TtIter, ExpandError, ParserEntryPoint, TokenMap,
1616
};
1717

1818
/// Convert the syntax node to a `TokenTree` (what macro
@@ -56,9 +56,9 @@ pub fn token_tree_to_syntax_node(
5656
}
5757
_ => TokenBuffer::from_subtree(tt),
5858
};
59-
let mut token_source = SubtreeTokenSource::new(&buffer);
59+
let parser_tokens = to_parser_tokens(&buffer);
6060
let mut tree_sink = TtTreeSink::new(buffer.begin());
61-
parser::parse(&mut token_source, &mut tree_sink, entry_point);
61+
parser::parse(&parser_tokens, &mut tree_sink, entry_point);
6262
if tree_sink.roots.len() != 1 {
6363
return Err(ExpandError::ConversionError);
6464
}

crates/mbe/src/to_parser_tokens.rs

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
//! Convert macro-by-example tokens which are specific to macro expansion into a
2+
//! format that works for our parser.
3+
4+
use syntax::{lex_single_syntax_kind, SyntaxKind, SyntaxKind::*, T};
5+
use tt::buffer::TokenBuffer;
6+
7+
pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
8+
let mut res = parser::Tokens::default();
9+
10+
let mut current = buffer.begin();
11+
12+
while !current.eof() {
13+
let cursor = current;
14+
let tt = cursor.token_tree();
15+
16+
// Check if it is lifetime
17+
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
18+
if punct.char == '\'' {
19+
let next = cursor.bump();
20+
match next.token_tree() {
21+
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(_ident), _)) => {
22+
res.push(LIFETIME_IDENT);
23+
current = next.bump();
24+
continue;
25+
}
26+
_ => panic!("Next token must be ident : {:#?}", next.token_tree()),
27+
}
28+
}
29+
}
30+
31+
current = match tt {
32+
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
33+
match leaf {
34+
tt::Leaf::Literal(lit) => {
35+
let is_negated = lit.text.starts_with('-');
36+
let inner_text = &lit.text[if is_negated { 1 } else { 0 }..];
37+
38+
let kind = lex_single_syntax_kind(inner_text)
39+
.map(|(kind, _error)| kind)
40+
.filter(|kind| {
41+
kind.is_literal()
42+
&& (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
43+
})
44+
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit));
45+
46+
res.push(kind);
47+
}
48+
tt::Leaf::Ident(ident) => match ident.text.as_ref() {
49+
"_" => res.push(T![_]),
50+
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
51+
_ => match SyntaxKind::from_keyword(&ident.text) {
52+
Some(kind) => res.push(kind),
53+
None => {
54+
let contextual_keyword =
55+
SyntaxKind::from_contextual_keyword(&ident.text)
56+
.unwrap_or(SyntaxKind::IDENT);
57+
res.push_ident(contextual_keyword);
58+
}
59+
},
60+
},
61+
tt::Leaf::Punct(punct) => {
62+
let kind = SyntaxKind::from_char(punct.char)
63+
.unwrap_or_else(|| panic!("{:#?} is not a valid punct", punct));
64+
res.push(kind);
65+
if punct.spacing == tt::Spacing::Joint {
66+
res.was_joint();
67+
}
68+
}
69+
}
70+
cursor.bump()
71+
}
72+
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
73+
if let Some(d) = subtree.delimiter_kind() {
74+
res.push(match d {
75+
tt::DelimiterKind::Parenthesis => T!['('],
76+
tt::DelimiterKind::Brace => T!['{'],
77+
tt::DelimiterKind::Bracket => T!['['],
78+
});
79+
}
80+
cursor.subtree().unwrap()
81+
}
82+
None => match cursor.end() {
83+
Some(subtree) => {
84+
if let Some(d) = subtree.delimiter_kind() {
85+
res.push(match d {
86+
tt::DelimiterKind::Parenthesis => T![')'],
87+
tt::DelimiterKind::Brace => T!['}'],
88+
tt::DelimiterKind::Bracket => T![']'],
89+
})
90+
}
91+
cursor.bump()
92+
}
93+
None => continue,
94+
},
95+
};
96+
}
97+
98+
res
99+
}

crates/mbe/src/tt_iter.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! A "Parser" structure for token trees. We use this when parsing a declarative
22
//! macro definition into a list of patterns and templates.
33
4-
use crate::{subtree_source::SubtreeTokenSource, ExpandError, ExpandResult, ParserEntryPoint};
4+
use crate::{to_parser_tokens::to_parser_tokens, ExpandError, ExpandResult, ParserEntryPoint};
55

66
use parser::TreeSink;
77
use syntax::SyntaxKind;
@@ -116,10 +116,10 @@ impl<'a> TtIter<'a> {
116116
}
117117

118118
let buffer = TokenBuffer::from_tokens(self.inner.as_slice());
119-
let mut src = SubtreeTokenSource::new(&buffer);
119+
let parser_tokens = to_parser_tokens(&buffer);
120120
let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false };
121121

122-
parser::parse(&mut src, &mut sink, entry_point);
122+
parser::parse(&parser_tokens, &mut sink, entry_point);
123123

124124
let mut err = if !sink.cursor.is_root() || sink.error {
125125
Some(err!("expected {:?}", entry_point))

crates/parser/src/grammar/expressions.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,7 @@ fn lhs(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)>
296296
T![&] => {
297297
m = p.start();
298298
p.bump(T![&]);
299-
if p.at(IDENT)
300-
&& p.at_contextual_kw("raw")
301-
&& (p.nth_at(1, T![mut]) || p.nth_at(1, T![const]))
302-
{
299+
if p.at_contextual_kw(T![raw]) && (p.nth_at(1, T![mut]) || p.nth_at(1, T![const])) {
303300
p.bump_remap(T![raw]);
304301
p.bump_any();
305302
} else {

crates/parser/src/grammar/items.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,14 +122,14 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
122122
has_mods = true;
123123
abi(p);
124124
}
125-
if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == T![trait] {
125+
if p.at_contextual_kw(T![auto]) && p.nth(1) == T![trait] {
126126
p.bump_remap(T![auto]);
127127
has_mods = true;
128128
}
129129

130130
// test default_item
131131
// default impl T for Foo {}
132-
if p.at(IDENT) && p.at_contextual_kw("default") {
132+
if p.at_contextual_kw(T![default]) {
133133
match p.nth(1) {
134134
T![fn] | T![type] | T![const] | T![impl] => {
135135
p.bump_remap(T![default]);
@@ -176,7 +176,7 @@ pub(super) fn opt_item(p: &mut Parser, m: Marker) -> Result<(), Marker> {
176176

177177
// test existential_type
178178
// existential type Foo: Fn() -> usize;
179-
if p.at(IDENT) && p.at_contextual_kw("existential") && p.nth(1) == T![type] {
179+
if p.at_contextual_kw(T![existential]) && p.nth(1) == T![type] {
180180
p.bump_remap(T![existential]);
181181
has_mods = true;
182182
}
@@ -224,10 +224,10 @@ fn opt_item_without_modifiers(p: &mut Parser, m: Marker) -> Result<(), Marker> {
224224
T![type] => type_alias(p, m),
225225
T![struct] => adt::strukt(p, m),
226226
T![enum] => adt::enum_(p, m),
227-
IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => adt::union(p, m),
227+
IDENT if p.at_contextual_kw(T![union]) && p.nth(1) == IDENT => adt::union(p, m),
228228

229229
T![macro] => macro_def(p, m),
230-
IDENT if p.at_contextual_kw("macro_rules") && p.nth(1) == BANG => macro_rules(p, m),
230+
IDENT if p.at_contextual_kw(T![macro_rules]) && p.nth(1) == BANG => macro_rules(p, m),
231231

232232
T![const] if (la == IDENT || la == T![_] || la == T![mut]) => consts::konst(p, m),
233233
T![static] => consts::static_(p, m),
@@ -319,7 +319,7 @@ pub(crate) fn extern_item_list(p: &mut Parser) {
319319
}
320320

321321
fn macro_rules(p: &mut Parser, m: Marker) {
322-
assert!(p.at_contextual_kw("macro_rules"));
322+
assert!(p.at_contextual_kw(T![macro_rules]));
323323
p.bump_remap(T![macro_rules]);
324324
p.expect(T![!]);
325325

crates/parser/src/grammar/items/adt.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub(super) fn strukt(p: &mut Parser, m: Marker) {
1010
// test union_item
1111
// struct U { i: i32, f: f32 }
1212
pub(super) fn union(p: &mut Parser, m: Marker) {
13-
assert!(p.at_contextual_kw("union"));
13+
assert!(p.at_contextual_kw(T![union]));
1414
p.bump_remap(T![union]);
1515
struct_or_union(p, m, false);
1616
}

0 commit comments

Comments
 (0)