Skip to content

Commit 5161dff

Browse files
committed
Merge pull request #197 from vosen/incremental-lexer
Fix lexing
2 parents ac05ada + 70f4a94 commit 5161dff

18 files changed

+3474
-191
lines changed

RustLexer/RustLexer.g4

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
11
lexer grammar RustLexer;
22

3+
@lexer::members {
4+
public bool is_at(int pos) {
5+
return _input.Index == pos;
6+
}
7+
}
8+
9+
310
tokens {
4-
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE,
11+
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS,
512
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
6-
BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
13+
BINOPEQ, LARROW, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
714
MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
8-
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
9-
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
10-
LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
11-
COMMENT, SHEBANG, DOC_BLOCK_COMMENT, BLOCK_COMMENT
15+
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE,
16+
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR,
17+
LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
18+
COMMENT, SHEBANG, UTF8_BOM
1219
}
1320

1421
import xidstart , xidcontinue;
@@ -37,6 +44,7 @@ AND : '&' ;
3744
OR : '|' ;
3845
SHL : '<<' ;
3946
SHR : '>>' ;
47+
LARROW : '<-' ;
4048

4149
BINOP
4250
: PLUS
@@ -49,6 +57,7 @@ BINOP
4957
| OR
5058
| SHL
5159
| SHR
60+
| LARROW
5261
;
5362

5463
BINOPEQ : BINOP EQ ;
@@ -107,15 +116,15 @@ LIT_CHAR
107116
| ~[\\'\n\t\r]
108117
| '\ud800' .. '\udbff' '\udc00' .. '\udfff'
109118
)
110-
('\'' SUFFIX? | '\n'| '\r\n' | EOF)
119+
'\'' SUFFIX?
111120
;
112121

113122
LIT_BYTE
114123
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT
115124
| [nrt\\'"0] )
116125
| ~[\\'\n\t\r] '\udc00'..'\udfff'?
117126
)
118-
('\'' SUFFIX? | '\n'| '\r\n' | EOF)
127+
'\'' SUFFIX?
119128
;
120129

121130
LIT_INTEGER
@@ -138,21 +147,21 @@ LIT_FLOAT
138147
;
139148

140149
LIT_STR
141-
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? ('"' SUFFIX? | '\n'| '\r\n' | EOF)
150+
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? ('"' SUFFIX? | EOF)
142151
;
143152

144-
LIT_BINARY : 'b' LIT_STR ;
145-
LIT_BINARY_RAW : 'b' LIT_STR_RAW ;
153+
LIT_BYTE_STR : 'b' LIT_STR ;
154+
LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ;
146155

147156
/* this is a bit messy */
148157

149158
fragment LIT_STR_RAW_INNER
150-
: '"' .*? '"'
159+
: '"' .*? ('"'| EOF)
151160
| LIT_STR_RAW_INNER2
152161
;
153162

154163
fragment LIT_STR_RAW_INNER2
155-
: POUND LIT_STR_RAW_INNER POUND
164+
: POUND LIT_STR_RAW_INNER (POUND | EOF)
156165
;
157166

158167
LIT_STR_RAW
@@ -166,7 +175,7 @@ IDENT : XID_Start XID_Continue* ;
166175

167176
fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
168177

169-
LIFETIME : '\'' IDENT ;
178+
LIFETIME : '\'' IDENT? ;
170179

171180
WHITESPACE : [ \r\n\t]+ ;
172181

@@ -176,13 +185,13 @@ OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
176185
LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
177186

178187
DOC_BLOCK_COMMENT
179-
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? ('*/' | '\n'| '\r\n' | EOF) -> type(DOC_BLOCK_COMMENT)
188+
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? ('*/' | EOF) -> type(DOC_COMMENT)
180189
;
181190

182-
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? ('*/' | '\n'| '\r\n' | EOF) -> type(BLOCK_COMMENT) ;
191+
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? ('*/' | EOF) -> type(COMMENT) ;
183192

184193
/* these appear at the beginning of a file */
185194

186-
SHEBANG : '#!' { isAt(2) && _input.La(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
195+
SHEBANG : '#!' { is_at(2) && _input.La(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
187196

188-
UTF8_BOM : '\ufeff' { isAt(1) }? -> skip ;
197+
UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;

VisualRust/RustClassifier.cs

Lines changed: 0 additions & 155 deletions
This file was deleted.

VisualRust/Text/Antlr/Lexer.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
using Antlr4.Runtime;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Linq;
5+
using System.Text;
6+
using System.Threading.Tasks;
7+
using Antlr4.Runtime.Misc;
8+
using Microsoft.VisualStudio.Text;
9+
using System.ComponentModel.Composition;
10+
11+
namespace VisualRust.Text.Antlr
12+
{
13+
[Export(typeof(IRustLexer))]
14+
public class Lexer : IRustLexer
15+
{
16+
public IEnumerable<SpannedToken> Run(IEnumerable<string> segments, int offset)
17+
{
18+
var lexer = new RustLexer.RustLexer(new UnbufferedCharStream(new TextSegmentsCharStream(segments)));
19+
while (true)
20+
{
21+
IToken current = lexer.NextToken();
22+
if (current.Type == RustLexer.RustLexer.Eof)
23+
break;
24+
yield return new SpannedToken(current.Type, new Span(current.StartIndex + offset, current.StopIndex - current.StartIndex + 1));
25+
}
26+
}
27+
}
28+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
using Antlr4.Runtime;
2+
using Antlr4.Runtime.Misc;
3+
using System;
4+
using System.Collections.Generic;
5+
using System.IO;
6+
using System.Linq;
7+
using System.Text;
8+
using System.Threading.Tasks;
9+
10+
namespace VisualRust.Text.Antlr
11+
{
12+
class TextSegmentsCharStream : TextReader
13+
{
14+
private IEnumerator<string> segments;
15+
int index;
16+
bool finished;
17+
18+
public TextSegmentsCharStream(IEnumerable<string> segments)
19+
{
20+
this.segments = segments.GetEnumerator();
21+
this.segments.MoveNext();
22+
}
23+
24+
public override int Read()
25+
{
26+
if(finished)
27+
return -1;
28+
if(index >= segments.Current.Length)
29+
{
30+
if(!segments.MoveNext())
31+
{
32+
finished = true;
33+
return -1;
34+
}
35+
index = 0;
36+
}
37+
return segments.Current[index++];
38+
}
39+
40+
public override int Peek()
41+
{
42+
if(finished)
43+
return -1;
44+
return segments.Current[index];
45+
}
46+
}
47+
}

0 commit comments

Comments
 (0)