Skip to content

Commit 9f1a3ae

Browse files
bors[bot]matklad
andauthored
Merge #11046
11046: internal: move all the lexing to the parser crate r=matklad a=matklad bors r+ 🤖 Co-authored-by: Aleksey Kladov <[email protected]>
2 parents 7f28b49 + 92dad47 commit 9f1a3ae

File tree

233 files changed

+971
-1019
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

233 files changed

+971
-1019
lines changed

Cargo.lock

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ide_assists/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ itertools = "0.10.0"
1616
either = "1.6.1"
1717

1818
stdx = { path = "../stdx", version = "0.0.0" }
19+
parser = { path = "../parser", version = "0.0.0" }
1920
syntax = { path = "../syntax", version = "0.0.0" }
2021
text_edit = { path = "../text_edit", version = "0.0.0" }
2122
profile = { path = "../profile", version = "0.0.0" }

crates/ide_assists/src/utils/suggest_name.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ fn normalize(name: &str) -> Option<String> {
135135
}
136136

137137
fn is_valid_name(name: &str) -> bool {
138-
match syntax::lex_single_syntax_kind(name) {
138+
match parser::LexedStr::single_token(name) {
139139
Some((syntax::SyntaxKind::IDENT, _error)) => true,
140140
_ => false,
141141
}

crates/ide_db/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ arrayvec = "0.7"
2222
indexmap = "1.7"
2323

2424
stdx = { path = "../stdx", version = "0.0.0" }
25+
parser = { path = "../parser", version = "0.0.0" }
2526
syntax = { path = "../syntax", version = "0.0.0" }
2627
text_edit = { path = "../text_edit", version = "0.0.0" }
2728
base_db = { path = "../base_db", version = "0.0.0" }

crates/ide_db/src/rename.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use hir::{AsAssocItem, FieldSource, HasSource, InFile, ModuleSource, Semantics};
2828
use stdx::never;
2929
use syntax::{
3030
ast::{self, HasName},
31-
lex_single_syntax_kind, AstNode, SyntaxKind, TextRange, T,
31+
AstNode, SyntaxKind, TextRange, T,
3232
};
3333
use text_edit::{TextEdit, TextEditBuilder};
3434

@@ -490,7 +490,7 @@ pub enum IdentifierKind {
490490

491491
impl IdentifierKind {
492492
pub fn classify(new_name: &str) -> Result<IdentifierKind> {
493-
match lex_single_syntax_kind(new_name) {
493+
match parser::LexedStr::single_token(new_name) {
494494
Some(res) => match res {
495495
(SyntaxKind::IDENT, _) => Ok(IdentifierKind::Ident),
496496
(T![_], _) => Ok(IdentifierKind::Underscore),

crates/ide_ssr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ rustc-hash = "1.1.0"
1616
itertools = "0.10.0"
1717

1818
text_edit = { path = "../text_edit", version = "0.0.0" }
19+
parser = { path = "../parser", version = "0.0.0" }
1920
syntax = { path = "../syntax", version = "0.0.0" }
2021
ide_db = { path = "../ide_db", version = "0.0.0" }
2122
hir = { path = "../hir", version = "0.0.0" }

crates/ide_ssr/src/parsing.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -256,19 +256,13 @@ fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> {
256256
}
257257

258258
fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
259-
let mut start = 0;
260-
let (raw_tokens, errors) = syntax::tokenize(source);
261-
if let Some(first_error) = errors.first() {
259+
let lexed = parser::LexedStr::new(source);
260+
if let Some((_, first_error)) = lexed.errors().next() {
262261
bail!("Failed to parse pattern: {}", first_error);
263262
}
264263
let mut tokens: Vec<Token> = Vec::new();
265-
for raw_token in raw_tokens {
266-
let token_len = usize::from(raw_token.len);
267-
tokens.push(Token {
268-
kind: raw_token.kind,
269-
text: SmolStr::new(&source[start..start + token_len]),
270-
});
271-
start += token_len;
264+
for i in 0..lexed.len() {
265+
tokens.push(Token { kind: lexed.kind(i), text: lexed.text(i).into() });
272266
}
273267
Ok(tokens)
274268
}

crates/mbe/src/syntax_bridge.rs

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@ use parser::{ParseError, TreeSink};
44
use rustc_hash::{FxHashMap, FxHashSet};
55
use syntax::{
66
ast::{self, make::tokens::doc_comment},
7-
tokenize, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind,
7+
AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind,
88
SyntaxKind::*,
9-
SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, Token as RawToken, WalkEvent,
10-
T,
9+
SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T,
1110
};
1211
use tt::buffer::{Cursor, TokenBuffer};
1312

@@ -69,15 +68,14 @@ pub fn token_tree_to_syntax_node(
6968

7069
/// Convert a string to a `TokenTree`
7170
pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
72-
let (tokens, errors) = tokenize(text);
73-
if !errors.is_empty() {
71+
let lexed = parser::LexedStr::new(text);
72+
if lexed.errors().next().is_some() {
7473
return None;
7574
}
7675

7776
let mut conv = RawConvertor {
78-
text,
79-
offset: TextSize::default(),
80-
inner: tokens.iter(),
77+
lexed: lexed,
78+
pos: 0,
8179
id_alloc: TokenIdAlloc {
8280
map: Default::default(),
8381
global_offset: TextSize::default(),
@@ -146,7 +144,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
146144
Some(it) => it,
147145
};
148146

149-
let k: SyntaxKind = token.kind();
147+
let k: SyntaxKind = token.kind(&conv);
150148
if k == COMMENT {
151149
if let Some(tokens) = conv.convert_doc_comment(&token) {
152150
// FIXME: There has to be a better way to do this
@@ -199,19 +197,19 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
199197
} else {
200198
let spacing = match conv.peek() {
201199
Some(next)
202-
if next.kind().is_trivia()
203-
|| next.kind() == T!['[']
204-
|| next.kind() == T!['{']
205-
|| next.kind() == T!['('] =>
200+
if next.kind(&conv).is_trivia()
201+
|| next.kind(&conv) == T!['[']
202+
|| next.kind(&conv) == T!['{']
203+
|| next.kind(&conv) == T!['('] =>
206204
{
207205
tt::Spacing::Alone
208206
}
209-
Some(next) if next.kind().is_punct() && next.kind() != UNDERSCORE => {
207+
Some(next) if next.kind(&conv).is_punct() && next.kind(&conv) != UNDERSCORE => {
210208
tt::Spacing::Joint
211209
}
212210
_ => tt::Spacing::Alone,
213211
};
214-
let char = match token.to_char() {
212+
let char = match token.to_char(&conv) {
215213
Some(c) => c,
216214
None => {
217215
panic!("Token from lexer must be single char: token = {:#?}", token);
@@ -222,7 +220,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
222220
} else {
223221
macro_rules! make_leaf {
224222
($i:ident) => {
225-
tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text() }.into()
223+
tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text(conv) }.into()
226224
};
227225
}
228226
let leaf: tt::Leaf = match k {
@@ -243,7 +241,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
243241

244242
let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
245243
let ident = tt::Leaf::from(tt::Ident {
246-
text: SmolStr::new(&token.to_text()[1..]),
244+
text: SmolStr::new(&token.to_text(conv)[1..]),
247245
id: conv.id_alloc().alloc(r),
248246
});
249247
result.push(ident.into());
@@ -392,22 +390,21 @@ impl TokenIdAlloc {
392390

393391
/// A Raw Token (straightly from lexer) convertor
394392
struct RawConvertor<'a> {
395-
text: &'a str,
396-
offset: TextSize,
393+
lexed: parser::LexedStr<'a>,
394+
pos: usize,
397395
id_alloc: TokenIdAlloc,
398-
inner: std::slice::Iter<'a, RawToken>,
399396
}
400397

401-
trait SrcToken: std::fmt::Debug {
402-
fn kind(&self) -> SyntaxKind;
398+
trait SrcToken<Ctx>: std::fmt::Debug {
399+
fn kind(&self, ctx: &Ctx) -> SyntaxKind;
403400

404-
fn to_char(&self) -> Option<char>;
401+
fn to_char(&self, ctx: &Ctx) -> Option<char>;
405402

406-
fn to_text(&self) -> SmolStr;
403+
fn to_text(&self, ctx: &Ctx) -> SmolStr;
407404
}
408405

409-
trait TokenConvertor {
410-
type Token: SrcToken;
406+
trait TokenConvertor: Sized {
407+
type Token: SrcToken<Self>;
411408

412409
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>;
413410

@@ -418,42 +415,45 @@ trait TokenConvertor {
418415
fn id_alloc(&mut self) -> &mut TokenIdAlloc;
419416
}
420417

421-
impl<'a> SrcToken for (&'a RawToken, &'a str) {
422-
fn kind(&self) -> SyntaxKind {
423-
self.0.kind
418+
impl<'a> SrcToken<RawConvertor<'a>> for usize {
419+
fn kind(&self, ctx: &RawConvertor<'a>) -> SyntaxKind {
420+
ctx.lexed.kind(*self)
424421
}
425422

426-
fn to_char(&self) -> Option<char> {
427-
self.1.chars().next()
423+
fn to_char(&self, ctx: &RawConvertor<'a>) -> Option<char> {
424+
ctx.lexed.text(*self).chars().next()
428425
}
429426

430-
fn to_text(&self) -> SmolStr {
431-
self.1.into()
427+
fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr {
428+
ctx.lexed.text(*self).into()
432429
}
433430
}
434431

435432
impl<'a> TokenConvertor for RawConvertor<'a> {
436-
type Token = (&'a RawToken, &'a str);
433+
type Token = usize;
437434

438-
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> {
439-
convert_doc_comment(&doc_comment(token.1))
435+
fn convert_doc_comment(&self, token: &usize) -> Option<Vec<tt::TokenTree>> {
436+
let text = self.lexed.text(*token);
437+
convert_doc_comment(&doc_comment(text))
440438
}
441439

442440
fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
443-
let token = self.inner.next()?;
444-
let range = TextRange::at(self.offset, token.len);
445-
self.offset += token.len;
441+
if self.pos == self.lexed.len() {
442+
return None;
443+
}
444+
let token = self.pos;
445+
self.pos += 1;
446+
let range = self.lexed.text_range(token);
447+
let range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
446448

447-
Some(((token, &self.text[range]), range))
449+
Some((token, range))
448450
}
449451

450452
fn peek(&self) -> Option<Self::Token> {
451-
let token = self.inner.as_slice().get(0);
452-
453-
token.map(|it| {
454-
let range = TextRange::at(self.offset, it.len);
455-
(it, &self.text[range])
456-
})
453+
if self.pos == self.lexed.len() {
454+
return None;
455+
}
456+
Some(self.pos)
457457
}
458458

459459
fn id_alloc(&mut self) -> &mut TokenIdAlloc {
@@ -523,17 +523,17 @@ impl SynToken {
523523
}
524524
}
525525

526-
impl SrcToken for SynToken {
527-
fn kind(&self) -> SyntaxKind {
526+
impl<'a> SrcToken<Convertor<'a>> for SynToken {
527+
fn kind(&self, _ctx: &Convertor<'a>) -> SyntaxKind {
528528
self.token().kind()
529529
}
530-
fn to_char(&self) -> Option<char> {
530+
fn to_char(&self, _ctx: &Convertor<'a>) -> Option<char> {
531531
match self {
532532
SynToken::Ordinary(_) => None,
533533
SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
534534
}
535535
}
536-
fn to_text(&self) -> SmolStr {
536+
fn to_text(&self, _ctx: &Convertor<'a>) -> SmolStr {
537537
self.token().text().into()
538538
}
539539
}

crates/mbe/src/to_parser_tokens.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Convert macro-by-example tokens which are specific to macro expansion into a
22
//! format that works for our parser.
33
4-
use syntax::{lex_single_syntax_kind, SyntaxKind, SyntaxKind::*, T};
4+
use syntax::{SyntaxKind, SyntaxKind::*, T};
55
use tt::buffer::TokenBuffer;
66

77
pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
@@ -35,7 +35,7 @@ pub(crate) fn to_parser_tokens(buffer: &TokenBuffer) -> parser::Tokens {
3535
let is_negated = lit.text.starts_with('-');
3636
let inner_text = &lit.text[if is_negated { 1 } else { 0 }..];
3737

38-
let kind = lex_single_syntax_kind(inner_text)
38+
let kind = parser::LexedStr::single_token(inner_text)
3939
.map(|(kind, _error)| kind)
4040
.filter(|kind| {
4141
kind.is_literal()

crates/parser/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,8 @@ doctest = false
1111

1212
[dependencies]
1313
drop_bomb = "0.1.4"
14-
14+
rustc_lexer = { version = "725.0.0", package = "rustc-ap-rustc_lexer" }
1515
limit = { path = "../limit", version = "0.0.0" }
16+
17+
[dev-dependencies]
18+
expect-test = "1.2"

0 commit comments

Comments
 (0)