4
4
//! on tokens which originated from text. Macros, eg, can synthesize tokes out
5
5
//! of thin air. So, ideally, lexer should be an orthogonal crate. It is however
6
6
//! convenient to include a text-based lexer here!
7
+ //!
8
+ //! Note that these tokens, unlike the tokens we feed into the parser, do
9
+ //! include info about comments and whitespace.
7
10
8
11
use crate :: {
9
12
SyntaxKind :: { self , * } ,
10
13
T ,
11
14
} ;
12
15
13
- # [ derive ( Debug , Clone , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
14
- pub struct LexerToken {
15
- pub kind : SyntaxKind ,
16
- pub len : usize ,
17
- pub error : Option < String > ,
16
+ pub struct LexedStr < ' a > {
17
+ text : & ' a str ,
18
+ kind : Vec < SyntaxKind > ,
19
+ start : Vec < u32 > ,
20
+ error : Vec < LexError > ,
18
21
}
19
22
20
- impl LexerToken {
21
- pub fn new ( kind : SyntaxKind , len : usize ) -> Self {
22
- Self { kind , len , error : None }
23
- }
23
+ struct LexError {
24
+ msg : String ,
25
+ token : u32 ,
26
+ }
24
27
25
- /// Lexes text as a sequence of tokens.
26
- pub fn tokenize ( text : & str ) -> Vec < LexerToken > {
27
- let mut res = Vec :: new ( ) ;
28
- let mut offset = 0 ;
28
+ impl < ' a > LexedStr < ' a > {
29
+ pub fn new ( text : & ' a str ) -> LexedStr < ' a > {
30
+ let mut res = LexedStr { text, kind : Vec :: new ( ) , start : Vec :: new ( ) , error : Vec :: new ( ) } ;
29
31
32
+ let mut offset = 0 ;
30
33
if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
31
- res. push ( LexerToken :: new ( SHEBANG , shebang_len ) ) ;
34
+ res. push ( SHEBANG , offset ) ;
32
35
offset = shebang_len
33
36
} ;
34
-
35
37
for token in rustc_lexer:: tokenize ( & text[ offset..] ) {
36
38
let token_text = & text[ offset..] [ ..token. len ] ;
37
- offset += token. len ;
38
39
39
40
let ( kind, err) = from_rustc ( & token. kind , token_text) ;
40
- let mut token = LexerToken :: new ( kind, token. len ) ;
41
- token. error = err. map ( |it| it. to_string ( ) ) ;
42
- res. push ( token) ;
41
+ res. push ( kind, offset) ;
42
+ offset += token. len ;
43
+
44
+ if let Some ( err) = err {
45
+ let token = res. len ( ) as u32 ;
46
+ let msg = err. to_string ( ) ;
47
+ res. error . push ( LexError { msg, token } ) ;
48
+ }
43
49
}
50
+ res. push ( EOF , offset) ;
44
51
45
52
res
46
53
}
47
- /// Lexes text as a single token. Returns `None` if there's leftover text.
48
- pub fn from_str ( text : & str ) -> Option < LexerToken > {
54
+
55
+ pub fn single_token ( text : & ' a str ) -> Option < SyntaxKind > {
49
56
if text. is_empty ( ) {
50
57
return None ;
51
58
}
@@ -56,10 +63,40 @@ impl LexerToken {
56
63
}
57
64
58
65
let ( kind, err) = from_rustc ( & token. kind , text) ;
66
+ if err. is_some ( ) {
67
+ return None ;
68
+ }
69
+
70
+ Some ( kind)
71
+ }
72
+
73
+ pub fn as_str ( & self ) -> & str {
74
+ self . text
75
+ }
76
+
77
+ pub fn len ( & self ) -> usize {
78
+ self . kind . len ( ) - 1
79
+ }
80
+
81
+ pub fn kind ( & self , i : usize ) -> SyntaxKind {
82
+ assert ! ( i < self . len( ) ) ;
83
+ self . kind [ i]
84
+ }
85
+ pub fn text ( & self , i : usize ) -> & str {
86
+ assert ! ( i < self . len( ) ) ;
87
+ let lo = self . start [ i] as usize ;
88
+ let hi = self . start [ i + 1 ] as usize ;
89
+ & self . text [ lo..hi]
90
+ }
91
+ pub fn error ( & self , i : usize ) -> Option < & str > {
92
+ assert ! ( i < self . len( ) ) ;
93
+ let err = self . error . binary_search_by_key ( & ( i as u32 ) , |i| i. token ) . ok ( ) ?;
94
+ Some ( self . error [ err] . msg . as_str ( ) )
95
+ }
59
96
60
- let mut token = LexerToken :: new ( kind, token . len ) ;
61
- token . error = err . map ( |it| it . to_string ( ) ) ;
62
- Some ( token )
97
+ fn push ( & mut self , kind : SyntaxKind , offset : usize ) {
98
+ self . kind . push ( kind ) ;
99
+ self . start . push ( offset as u32 ) ;
63
100
}
64
101
}
65
102
0 commit comments