Skip to content

Fix lexing #197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 21, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 27 additions & 18 deletions RustLexer/RustLexer.g4
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
lexer grammar RustLexer;

@lexer::members {
public bool is_at(int pos) {
return _input.Index == pos;
}
}


tokens {
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE,
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS,
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
BINOPEQ, LARROW, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
COMMENT, SHEBANG, DOC_BLOCK_COMMENT, BLOCK_COMMENT
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE,
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR,
LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
COMMENT, SHEBANG, UTF8_BOM
}

import xidstart , xidcontinue;
Expand Down Expand Up @@ -37,6 +44,7 @@ AND : '&' ;
OR : '|' ;
SHL : '<<' ;
SHR : '>>' ;
LARROW : '<-' ;

BINOP
: PLUS
Expand All @@ -49,6 +57,7 @@ BINOP
| OR
| SHL
| SHR
| LARROW
;

BINOPEQ : BINOP EQ ;
Expand Down Expand Up @@ -107,15 +116,15 @@ LIT_CHAR
| ~[\\'\n\t\r]
| '\ud800' .. '\udbff' '\udc00' .. '\udfff'
)
('\'' SUFFIX? | '\n'| '\r\n' | EOF)
'\'' SUFFIX?
;

LIT_BYTE
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT
| [nrt\\'"0] )
| ~[\\'\n\t\r] '\udc00'..'\udfff'?
)
('\'' SUFFIX? | '\n'| '\r\n' | EOF)
'\'' SUFFIX?
;

LIT_INTEGER
Expand All @@ -138,21 +147,21 @@ LIT_FLOAT
;

LIT_STR
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? ('"' SUFFIX? | '\n'| '\r\n' | EOF)
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? ('"' SUFFIX? | EOF)
;

LIT_BINARY : 'b' LIT_STR ;
LIT_BINARY_RAW : 'b' LIT_STR_RAW ;
LIT_BYTE_STR : 'b' LIT_STR ;
LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ;

/* this is a bit messy */

fragment LIT_STR_RAW_INNER
: '"' .*? '"'
: '"' .*? ('"'| EOF)
| LIT_STR_RAW_INNER2
;

fragment LIT_STR_RAW_INNER2
: POUND LIT_STR_RAW_INNER POUND
: POUND LIT_STR_RAW_INNER (POUND | EOF)
;

LIT_STR_RAW
Expand All @@ -166,7 +175,7 @@ IDENT : XID_Start XID_Continue* ;

fragment QUESTION_IDENTIFIER : QUESTION? IDENT;

LIFETIME : '\'' IDENT ;
LIFETIME : '\'' IDENT? ;

WHITESPACE : [ \r\n\t]+ ;

Expand All @@ -176,13 +185,13 @@ OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;

DOC_BLOCK_COMMENT
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? ('*/' | '\n'| '\r\n' | EOF) -> type(DOC_BLOCK_COMMENT)
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? ('*/' | EOF) -> type(DOC_COMMENT)
;

BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? ('*/' | '\n'| '\r\n' | EOF) -> type(BLOCK_COMMENT) ;
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? ('*/' | EOF) -> type(COMMENT) ;

/* these appear at the beginning of a file */

SHEBANG : '#!' { isAt(2) && _input.La(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
SHEBANG : '#!' { is_at(2) && _input.La(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;

UTF8_BOM : '\ufeff' { isAt(1) }? -> skip ;
UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;
155 changes: 0 additions & 155 deletions VisualRust/RustClassifier.cs

This file was deleted.

28 changes: 28 additions & 0 deletions VisualRust/Text/Antlr/Lexer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using Antlr4.Runtime;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Antlr4.Runtime.Misc;
using Microsoft.VisualStudio.Text;
using System.ComponentModel.Composition;

namespace VisualRust.Text.Antlr
{
[Export(typeof(IRustLexer))]
public class Lexer : IRustLexer
{
public IEnumerable<SpannedToken> Run(IEnumerable<string> segments, int offset)
{
var lexer = new RustLexer.RustLexer(new UnbufferedCharStream(new TextSegmentsCharStream(segments)));
while (true)
{
IToken current = lexer.NextToken();
if (current.Type == RustLexer.RustLexer.Eof)
break;
yield return new SpannedToken(current.Type, new Span(current.StartIndex + offset, current.StopIndex - current.StartIndex + 1));
}
}
}
}
47 changes: 47 additions & 0 deletions VisualRust/Text/Antlr/TextSegmentsCharStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using Antlr4.Runtime;
using Antlr4.Runtime.Misc;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace VisualRust.Text.Antlr
{
class TextSegmentsCharStream : TextReader
{
private IEnumerator<string> segments;
int index;
bool finished;

public TextSegmentsCharStream(IEnumerable<string> segments)
{
this.segments = segments.GetEnumerator();
this.segments.MoveNext();
}

public override int Read()
{
if(finished)
return -1;
if(index >= segments.Current.Length)
{
if(!segments.MoveNext())
{
finished = true;
return -1;
}
index = 0;
}
return segments.Current[index++];
}

public override int Peek()
{
if(finished)
return -1;
return segments.Current[index];
}
}
}
Loading