Skip to content

Commit 6d8d7c4

Browse files
committed
handle arbitrary length _* in e _* suffix
1 parent a9af5ee commit 6d8d7c4

File tree

1 file changed

+70
-28
lines changed
  • compiler/rustc_lexer/src

1 file changed

+70
-28
lines changed

compiler/rustc_lexer/src/lib.rs

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -356,17 +356,6 @@ pub fn is_ident(string: &str) -> bool {
356356
}
357357
}
358358

359-
/// Is the character after the 'e' in a number valid for an exponent?
360-
///
361-
/// If not the number will be passed to the parser with a suffix beginning with 'e' rather
362-
/// than an exponent (and will be rejected there).
363-
///
364-
/// The way this function is written means that `1e_` is considered an invalid exponent
365-
/// rather than a number with suffix.
366-
fn is_exponent_second(ch: char) -> bool {
367-
matches!(ch, '0'..='9' | '_' | '+' | '-')
368-
}
369-
370359
impl Cursor<'_> {
371360
/// Parses a token from the input string.
372361
pub fn advance_token(&mut self) -> Token {
@@ -420,9 +409,7 @@ impl Cursor<'_> {
420409

421410
// Numeric literal.
422411
c @ '0'..='9' => {
423-
let literal_kind = self.number(c);
424-
let suffix_start = self.pos_within_token();
425-
self.eat_literal_suffix();
412+
let (literal_kind, suffix_start) = self.number(c);
426413
TokenKind::Literal { kind: literal_kind, suffix_start }
427414
}
428415

@@ -617,7 +604,7 @@ impl Cursor<'_> {
617604
}
618605
}
619606

620-
fn number(&mut self, first_digit: char) -> LiteralKind {
607+
fn number(&mut self, first_digit: char) -> (LiteralKind, u32) {
621608
debug_assert!('0' <= self.prev() && self.prev() <= '9');
622609
let mut base = Base::Decimal;
623610
if first_digit == '0' {
@@ -627,21 +614,27 @@ impl Cursor<'_> {
627614
base = Base::Binary;
628615
self.bump();
629616
if !self.eat_decimal_digits() {
630-
return Int { base, empty_int: true };
617+
let suffix_start = self.pos_within_token();
618+
self.eat_literal_suffix();
619+
return (Int { base, empty_int: true }, suffix_start);
631620
}
632621
}
633622
'o' => {
634623
base = Base::Octal;
635624
self.bump();
636625
if !self.eat_decimal_digits() {
637-
return Int { base, empty_int: true };
626+
let suffix_start = self.pos_within_token();
627+
self.eat_literal_suffix();
628+
return (Int { base, empty_int: true }, suffix_start);
638629
}
639630
}
640631
'x' => {
641632
base = Base::Hexadecimal;
642633
self.bump();
643634
if !self.eat_hexadecimal_digits() {
644-
return Int { base, empty_int: true };
635+
let suffix_start = self.pos_within_token();
636+
self.eat_literal_suffix();
637+
return (Int { base, empty_int: true }, suffix_start);
645638
}
646639
}
647640
// Not a base prefix; consume additional digits.
@@ -653,40 +646,88 @@ impl Cursor<'_> {
653646
'.' | 'e' | 'E' => {}
654647

655648
// Just a 0.
656-
_ => return Int { base, empty_int: false },
649+
_ => {
650+
let suffix_start = self.pos_within_token();
651+
self.eat_literal_suffix();
652+
return (Int { base, empty_int: false }, suffix_start);
653+
}
657654
}
658655
} else {
659656
// No base prefix, parse number in the usual way.
660657
self.eat_decimal_digits();
661658
};
662659

663-
match self.first() {
660+
match (self.first(), self.second()) {
664661
// Don't be greedy if this is actually an
665662
// integer literal followed by field/method access or a range pattern
666663
// (`0..2` and `12.foo()`)
667-
'.' if self.second() != '.' && !is_id_start(self.second()) => {
664+
('.', second) if second != '.' && !is_id_start(second) => {
668665
// might have stuff after the ., and if it does, it needs to start
669666
// with a number
670667
self.bump();
671668
let mut empty_exponent = false;
669+
let mut suffix_start = self.pos_within_token();
672670
if self.first().is_ascii_digit() {
673671
self.eat_decimal_digits();
674-
match self.first() {
675-
'e' | 'E' if is_exponent_second(self.second()) => {
672+
// This will be the start of the suffix if there is no exponent
673+
suffix_start = self.pos_within_token();
674+
match (self.first(), self.second()) {
675+
('e' | 'E', '_') => {
676+
// check if series of `_` is ended by a digit. If yes
677+
// include it in the number as exponent. If no include
678+
// it in suffix.
679+
self.bump();
680+
while matches!(self.first(), '_') {
681+
self.bump();
682+
}
683+
if self.first().is_ascii_digit() {
684+
self.eat_decimal_digits();
685+
suffix_start = self.pos_within_token();
686+
}
687+
}
688+
('e' | 'E', '0'..'9' | '+' | '-') => {
689+
// definitely an exponent
676690
self.bump();
677691
empty_exponent = !self.eat_float_exponent();
692+
suffix_start = self.pos_within_token();
678693
}
679694
_ => (),
680695
}
681696
}
682-
Float { base, empty_exponent }
697+
self.eat_literal_suffix();
698+
(Float { base, empty_exponent }, suffix_start)
699+
}
700+
('e' | 'E', '_') => {
701+
// see above bock for similar apporach
702+
let non_exponent_suffix_start = self.pos_within_token();
703+
self.bump();
704+
while matches!(self.first(), '_') {
705+
self.bump();
706+
}
707+
if self.first().is_ascii_digit() {
708+
self.eat_decimal_digits();
709+
let suffix_start = self.pos_within_token();
710+
self.eat_literal_suffix();
711+
(Float { base, empty_exponent: false }, suffix_start)
712+
} else {
713+
// No digit means suffix, and therefore int
714+
self.eat_literal_suffix();
715+
(Int { base, empty_int: false }, non_exponent_suffix_start)
716+
}
683717
}
684-
'e' | 'E' if is_exponent_second(self.second()) => {
718+
('e' | 'E', '0'..='9' | '+' | '-') => {
719+
// definitely an exponent
685720
self.bump();
686721
let empty_exponent = !self.eat_float_exponent();
687-
Float { base, empty_exponent }
722+
let suffix_start = self.pos_within_token();
723+
self.eat_literal_suffix();
724+
(Float { base, empty_exponent }, suffix_start)
725+
}
726+
_ => {
727+
let suffix_start = self.pos_within_token();
728+
self.eat_literal_suffix();
729+
(Int { base, empty_int: false }, suffix_start)
688730
}
689-
_ => Int { base, empty_int: false },
690731
}
691732
}
692733

@@ -935,6 +976,7 @@ impl Cursor<'_> {
935976
}
936977
}
937978

979+
/// Returns `true` if a digit was consumed (rather than just '_')
938980
fn eat_decimal_digits(&mut self) -> bool {
939981
let mut has_digits = false;
940982
loop {
@@ -972,7 +1014,7 @@ impl Cursor<'_> {
9721014
/// Eats the float exponent. Returns true if at least one digit was met,
9731015
/// and returns false otherwise.
9741016
fn eat_float_exponent(&mut self) -> bool {
975-
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
1017+
debug_assert!(matches!(self.prev(), 'e' | 'E'));
9761018
if self.first() == '-' || self.first() == '+' {
9771019
self.bump();
9781020
}

0 commit comments

Comments
 (0)