Skip to content

Commit 6fa6efe

Browse files
committed
fix: Fix parsing of nested tuple field accesses in a cursed way
1 parent dab685d commit 6fa6efe

File tree

13 files changed

+294
-35
lines changed

13 files changed

+294
-35
lines changed

crates/parser/src/event.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,12 @@ pub(crate) enum Event {
7272
/// `n_raw_tokens = 2` is used to produced a single `>>`.
7373
Token {
7474
kind: SyntaxKind,
75+
// Consider custom enum here?
7576
n_raw_tokens: u8,
7677
},
77-
78+
FloatSplitHack {
79+
has_pseudo_dot: bool,
80+
},
7881
Error {
7982
msg: String,
8083
},
@@ -125,6 +128,11 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
125128
Event::Token { kind, n_raw_tokens } => {
126129
res.token(kind, n_raw_tokens);
127130
}
131+
Event::FloatSplitHack { has_pseudo_dot } => {
132+
res.float_split_hack(has_pseudo_dot);
133+
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
134+
assert!(matches!(ev, Event::Finish), "{ev:?}");
135+
}
128136
Event::Error { msg } => res.error(msg),
129137
}
130138
}

crates/parser/src/grammar/expressions.rs

Lines changed: 66 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ fn postfix_expr(
379379
// }
380380
T!['('] if allow_calls => call_expr(p, lhs),
381381
T!['['] if allow_calls => index_expr(p, lhs),
382-
T![.] => match postfix_dot_expr(p, lhs) {
382+
T![.] => match postfix_dot_expr::<false>(p, lhs) {
383383
Ok(it) => it,
384384
Err(it) => {
385385
lhs = it;
@@ -393,35 +393,44 @@ fn postfix_expr(
393393
block_like = BlockLike::NotBlock;
394394
}
395395
return (lhs, block_like);
396+
}
396397

397-
fn postfix_dot_expr(
398-
p: &mut Parser<'_>,
399-
lhs: CompletedMarker,
400-
) -> Result<CompletedMarker, CompletedMarker> {
398+
fn postfix_dot_expr<const FLOAT_RECOVERY: bool>(
399+
p: &mut Parser<'_>,
400+
lhs: CompletedMarker,
401+
) -> Result<CompletedMarker, CompletedMarker> {
402+
if !FLOAT_RECOVERY {
401403
assert!(p.at(T![.]));
402-
if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])) {
403-
return Ok(method_call_expr(p, lhs));
404-
}
404+
}
405+
let nth1 = if FLOAT_RECOVERY { 0 } else { 1 };
406+
let nth2 = if FLOAT_RECOVERY { 1 } else { 2 };
405407

406-
// test await_expr
407-
// fn foo() {
408-
// x.await;
409-
// x.0.await;
410-
// x.0().await?.hello();
411-
// }
412-
if p.nth(1) == T![await] {
413-
let m = lhs.precede(p);
414-
p.bump(T![.]);
415-
p.bump(T![await]);
416-
return Ok(m.complete(p, AWAIT_EXPR));
417-
}
408+
if p.nth(nth1) == IDENT && (p.nth(nth2) == T!['('] || p.nth_at(nth2, T![::])) {
409+
return Ok(method_call_expr::<FLOAT_RECOVERY>(p, lhs));
410+
}
418411

419-
if p.at(T![..=]) || p.at(T![..]) {
420-
return Err(lhs);
412+
// test await_expr
413+
// fn foo() {
414+
// x.await;
415+
// x.0.await;
416+
// x.0().await?.hello();
417+
// x.0.0.await;
418+
// x.0. await;
419+
// }
420+
if p.nth(nth1) == T![await] {
421+
let m = lhs.precede(p);
422+
if !FLOAT_RECOVERY {
423+
p.bump(T![.]);
421424
}
425+
p.bump(T![await]);
426+
return Ok(m.complete(p, AWAIT_EXPR));
427+
}
422428

423-
Ok(field_expr(p, lhs))
429+
if p.at(T![..=]) || p.at(T![..]) {
430+
return Err(lhs);
424431
}
432+
433+
field_expr::<FLOAT_RECOVERY>(p, lhs)
425434
}
426435

427436
// test call_expr
@@ -455,11 +464,22 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
455464
// fn foo() {
456465
// x.foo();
457466
// y.bar::<T>(1, 2,);
467+
// x.0.0.call();
468+
// x.0. call();
458469
// }
459-
fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
460-
assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])));
470+
fn method_call_expr<const FLOAT_RECOVERY: bool>(
471+
p: &mut Parser<'_>,
472+
lhs: CompletedMarker,
473+
) -> CompletedMarker {
474+
if FLOAT_RECOVERY {
475+
assert!(p.nth(0) == IDENT && (p.nth(1) == T!['('] || p.nth_at(1, T![::])));
476+
} else {
477+
assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])));
478+
}
461479
let m = lhs.precede(p);
462-
p.bump_any();
480+
if !FLOAT_RECOVERY {
481+
p.bump(T![.]);
482+
}
463483
name_ref(p);
464484
generic_args::opt_generic_arg_list(p, true);
465485
if p.at(T!['(']) {
@@ -472,21 +492,35 @@ fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker
472492
// fn foo() {
473493
// x.foo;
474494
// x.0.bar;
495+
// x.0.1;
496+
// x.0. bar;
475497
// x.0();
476498
// }
477-
fn field_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
478-
assert!(p.at(T![.]));
499+
fn field_expr<const FLOAT_RECOVERY: bool>(
500+
p: &mut Parser<'_>,
501+
lhs: CompletedMarker,
502+
) -> Result<CompletedMarker, CompletedMarker> {
503+
if !FLOAT_RECOVERY {
504+
assert!(p.at(T![.]));
505+
}
479506
let m = lhs.precede(p);
480-
p.bump(T![.]);
507+
if !FLOAT_RECOVERY {
508+
p.bump(T![.]);
509+
}
481510
if p.at(IDENT) || p.at(INT_NUMBER) {
482511
name_ref_or_index(p);
483512
} else if p.at(FLOAT_NUMBER) {
484-
// FIXME: How to recover and instead parse INT + T![.]?
485-
p.bump_any();
513+
return match p.split_float(m) {
514+
(true, m) => {
515+
let lhs = m.complete(p, FIELD_EXPR);
516+
postfix_dot_expr::<true>(p, lhs)
517+
}
518+
(false, m) => Ok(m.complete(p, FIELD_EXPR)),
519+
};
486520
} else {
487521
p.error("expected field name or number");
488522
}
489-
m.complete(p, FIELD_EXPR)
523+
Ok(m.complete(p, FIELD_EXPR))
490524
}
491525

492526
// test try_expr

crates/parser/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ impl TopEntryPoint {
102102
match step {
103103
Step::Enter { .. } => depth += 1,
104104
Step::Exit => depth -= 1,
105-
Step::Token { .. } | Step::Error { .. } => (),
105+
Step::FloatSplit { .. } | Step::Token { .. } | Step::Error { .. } => (),
106106
}
107107
}
108108
assert!(!first, "no tree at all");

crates/parser/src/output.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub struct Output {
2525
#[derive(Debug)]
2626
pub enum Step<'a> {
2727
Token { kind: SyntaxKind, n_input_tokens: u8 },
28+
FloatSplit { has_pseudo_dot: bool },
2829
Enter { kind: SyntaxKind },
2930
Exit,
3031
Error { msg: &'a str },
@@ -44,6 +45,7 @@ impl Output {
4445
const TOKEN_EVENT: u8 = 0;
4546
const ENTER_EVENT: u8 = 1;
4647
const EXIT_EVENT: u8 = 2;
48+
const SPLIT_EVENT: u8 = 3;
4749

4850
pub fn iter(&self) -> impl Iterator<Item = Step<'_>> {
4951
self.event.iter().map(|&event| {
@@ -67,6 +69,9 @@ impl Output {
6769
Step::Enter { kind }
6870
}
6971
Self::EXIT_EVENT => Step::Exit,
72+
Self::SPLIT_EVENT => {
73+
Step::FloatSplit { has_pseudo_dot: event & Self::N_INPUT_TOKEN_MASK != 0 }
74+
}
7075
_ => unreachable!(),
7176
}
7277
})
@@ -79,6 +84,13 @@ impl Output {
7984
self.event.push(e)
8085
}
8186

87+
pub(crate) fn float_split_hack(&mut self, has_pseudo_dot: bool) {
88+
let e = (Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT
89+
| ((has_pseudo_dot as u32) << Self::N_INPUT_TOKEN_SHIFT)
90+
| Self::EVENT_MASK;
91+
self.event.push(e);
92+
}
93+
8294
pub(crate) fn enter_node(&mut self, kind: SyntaxKind) {
8395
let e = ((kind as u16 as u32) << Self::KIND_SHIFT)
8496
| ((Self::ENTER_EVENT as u32) << Self::TAG_SHIFT)

crates/parser/src/parser.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,38 @@ impl<'t> Parser<'t> {
181181
self.do_bump(kind, 1);
182182
}
183183

184+
/// Advances the parser by one token
185+
pub(crate) fn split_float(&mut self, marker: Marker) -> (bool, Marker) {
186+
assert!(self.at(SyntaxKind::FLOAT_NUMBER));
187+
// we have parse `<something>.`
188+
// `<something>`.0.1
189+
// here we need to insert an extra event
190+
//
191+
// `<something>`. 0. 1;
192+
// here we need to change the follow up parse, the return value will cause us to emulate a dot
193+
// the actual splitting happens later
194+
let has_pseudo_dot = !self.inp.is_joint(self.pos);
195+
let marker = if !has_pseudo_dot {
196+
let new_pos = self.start();
197+
let idx = marker.pos as usize;
198+
match &mut self.events[idx] {
199+
Event::Start { forward_parent, kind } => {
200+
*kind = SyntaxKind::FIELD_EXPR;
201+
*forward_parent = Some(new_pos.pos - marker.pos);
202+
}
203+
_ => unreachable!(),
204+
}
205+
// NOTE: This brings the start / finish pairs out of balance!
206+
std::mem::forget(marker);
207+
new_pos
208+
} else {
209+
marker
210+
};
211+
self.pos += 1 as usize;
212+
self.push_event(Event::FloatSplitHack { has_pseudo_dot });
213+
(has_pseudo_dot, marker)
214+
}
215+
184216
/// Advances the parser by one token, remapping its kind.
185217
/// This is useful to create contextual keywords from
186218
/// identifiers. For example, the lexer creates a `union`

crates/parser/src/shortcuts.rs

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,17 @@ impl<'a> LexedStr<'a> {
4444
}
4545
res.push(kind);
4646
}
47-
was_joint = true;
47+
if kind == SyntaxKind::FLOAT_NUMBER {
48+
// we set jointness for floating point numbers as a hack to inform the
49+
// parser about whether we have a `0.` or `0.1` style float
50+
if self.text(i).split_once('.').map_or(false, |(_, it)| it.is_empty()) {
51+
was_joint = false;
52+
} else {
53+
was_joint = true;
54+
}
55+
} else {
56+
was_joint = true;
57+
}
4858
}
4959
}
5060
res
@@ -63,6 +73,7 @@ impl<'a> LexedStr<'a> {
6373
Step::Token { kind, n_input_tokens: n_raw_tokens } => {
6474
builder.token(kind, n_raw_tokens)
6575
}
76+
Step::FloatSplit { has_pseudo_dot } => builder.float_split(has_pseudo_dot),
6677
Step::Enter { kind } => builder.enter(kind),
6778
Step::Exit => builder.exit(),
6879
Step::Error { msg } => {
@@ -109,6 +120,16 @@ impl Builder<'_, '_> {
109120
self.do_token(kind, n_tokens as usize);
110121
}
111122

123+
fn float_split(&mut self, has_pseudo_dot: bool) {
124+
match mem::replace(&mut self.state, State::Normal) {
125+
State::PendingEnter => unreachable!(),
126+
State::PendingExit => (self.sink)(StrStep::Exit),
127+
State::Normal => (),
128+
}
129+
self.eat_trivias();
130+
self.do_float_split(has_pseudo_dot);
131+
}
132+
112133
fn enter(&mut self, kind: SyntaxKind) {
113134
match mem::replace(&mut self.state, State::Normal) {
114135
State::PendingEnter => {
@@ -164,6 +185,37 @@ impl Builder<'_, '_> {
164185
self.pos += n_tokens;
165186
(self.sink)(StrStep::Token { kind, text });
166187
}
188+
189+
fn do_float_split(&mut self, has_pseudo_dot: bool) {
190+
let text = &self.lexed.range_text(self.pos..self.pos + 1);
191+
self.pos += 1;
192+
match text.split_once('.') {
193+
Some((left, right)) => {
194+
assert!(!left.is_empty());
195+
(self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
196+
(self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left });
197+
(self.sink)(StrStep::Exit);
198+
199+
// here we move the exit up, the original exit has been deleted in process
200+
(self.sink)(StrStep::Exit);
201+
202+
(self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." });
203+
204+
if has_pseudo_dot {
205+
assert!(right.is_empty());
206+
self.state = State::Normal;
207+
} else {
208+
(self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
209+
(self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right });
210+
(self.sink)(StrStep::Exit);
211+
212+
// the parser creates an unbalanced start node, we are required to close it here
213+
self.state = State::PendingExit;
214+
}
215+
}
216+
None => unreachable!(),
217+
}
218+
}
167219
}
168220

169221
fn n_attached_trivias<'a>(

crates/parser/src/tests/prefix_entries.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ fn expr() {
5151
check(PrefixEntryPoint::Expr, "-1", "-1");
5252
check(PrefixEntryPoint::Expr, "fn foo() {}", "fn");
5353
check(PrefixEntryPoint::Expr, "#[attr] ()", "#[attr] ()");
54+
check(PrefixEntryPoint::Expr, "foo.0", "foo.0");
55+
check(PrefixEntryPoint::Expr, "foo.0.1", "foo.0.1");
56+
check(PrefixEntryPoint::Expr, "foo.0. foo", "foo.0. foo");
5457
}
5558

5659
#[test]
@@ -88,6 +91,7 @@ fn check(entry: PrefixEntryPoint, input: &str, prefix: &str) {
8891
for step in entry.parse(&input).iter() {
8992
match step {
9093
Step::Token { n_input_tokens, .. } => n_tokens += n_input_tokens as usize,
94+
Step::FloatSplit { .. } => n_tokens += 1,
9195
Step::Enter { .. } | Step::Exit | Step::Error { .. } => (),
9296
}
9397
}

crates/parser/test_data/parser/inline/ok/0011_field_expr.rast

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,39 @@ SOURCE_FILE
4040
IDENT "bar"
4141
SEMICOLON ";"
4242
WHITESPACE "\n "
43+
EXPR_STMT
44+
FIELD_EXPR
45+
FIELD_EXPR
46+
PATH_EXPR
47+
PATH
48+
PATH_SEGMENT
49+
NAME_REF
50+
IDENT "x"
51+
DOT "."
52+
NAME_REF
53+
INT_NUMBER "0"
54+
DOT "."
55+
NAME_REF
56+
INT_NUMBER "1"
57+
SEMICOLON ";"
58+
WHITESPACE "\n "
59+
EXPR_STMT
60+
FIELD_EXPR
61+
FIELD_EXPR
62+
PATH_EXPR
63+
PATH
64+
PATH_SEGMENT
65+
NAME_REF
66+
IDENT "x"
67+
DOT "."
68+
NAME_REF
69+
INT_NUMBER "0"
70+
DOT "."
71+
WHITESPACE " "
72+
NAME_REF
73+
IDENT "bar"
74+
SEMICOLON ";"
75+
WHITESPACE "\n "
4376
EXPR_STMT
4477
CALL_EXPR
4578
FIELD_EXPR

0 commit comments

Comments
 (0)