1
1
lexer grammar RustLexer;
2
2
3
- @lexer::members {
4
- public boolean is_at(int pos) {
5
- return _input.index() == pos;
6
- }
7
- }
8
-
9
-
10
3
tokens {
11
4
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT,
12
5
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
@@ -15,10 +8,14 @@ tokens {
15
8
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
16
9
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
17
10
LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
18
- COMMENT, SHEBANG
11
+ COMMENT
19
12
}
20
13
21
- import xidstart , xidcontinue;
14
+ /* Note: due to antlr limitations, we can't represent XID_start and
15
+ * XID_continue properly. ASCII-only substitute. */
16
+
17
+ fragment XID_start : [_a-zA-Z ] ;
18
+ fragment XID_continue : [_a-zA-Z0 -9] ;
22
19
23
20
24
21
/* Expression-operator symbols */
@@ -93,63 +90,94 @@ fragment CHAR_ESCAPE
93
90
| [xX] HEXIT HEXIT
94
91
| ' u' HEXIT HEXIT HEXIT HEXIT
95
92
| ' U ' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
96
- | ' u{' HEXIT ' } '
97
- | ' u{' HEXIT HEXIT ' } '
98
- | ' u{' HEXIT HEXIT HEXIT ' } '
99
- | ' u{' HEXIT HEXIT HEXIT HEXIT ' } '
100
- | ' u{' HEXIT HEXIT HEXIT HEXIT HEXIT ' } '
101
- | ' u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT ' } '
102
93
;
103
94
104
95
fragment SUFFIX
105
96
: IDENT
106
97
;
107
98
108
- fragment INTEGER_SUFFIX
109
- : { _input.LA(1) != ' e' && _input.LA(1) != ' E ' }? SUFFIX
110
- ;
111
-
112
99
LIT_CHAR
113
- : ' \' ' ( ' \\ ' CHAR_ESCAPE
114
- | ~[\\' \n\t\r ]
115
- | ' \ud800' .. ' \udbff' ' \udc00' .. ' \udfff'
116
- )
117
- ' \' ' SUFFIX ?
100
+ : ' \' ' ( ' \\ ' CHAR_ESCAPE | ~[\\' \n\t\r ] ) ' \' ' SUFFIX ?
118
101
;
119
102
120
103
LIT_BYTE
121
- : ' b\' ' ( ' \\ ' ( [xX] HEXIT HEXIT
122
- | [nrt\\' "0] )
123
- | ~[\\ ' \n\t\r] ' \udc00 ' ..' \udfff ' ?
124
- )
125
- ' \' ' SUFFIX ?
104
+ : ' b\' ' ( ' \\ ' ( [xX] HEXIT HEXIT | [nrt\\' "0] ) | ~[\\ ' \n\t\r] ) ' \' ' SUFFIX ?
126
105
;
127
106
128
107
LIT_INTEGER
129
-
130
- : [0-9][0-9_]* INTEGER_SUFFIX ?
131
- | ' 0b' [01_]+ INTEGER_SUFFIX ?
132
- | ' 0o' [0-7_]+ INTEGER_SUFFIX ?
133
- | ' 0x' [0-9a-fA-F_ ]+ INTEGER_SUFFIX ?
108
+ : [0-9][0-9_]* SUFFIX ?
109
+ | ' 0b' [01][01_]* SUFFIX ?
110
+ | ' 0o' [0-7][0-7_]* SUFFIX ?
111
+ | ' 0x' [0-9a-fA-F ][0-9a-fA-F_ ]* SUFFIX ?
134
112
;
135
113
136
114
LIT_FLOAT
137
115
: [0-9][0-9_]* (' .' {
138
- /* dot followed by another dot is a range, not a float */
116
+ /* dot followed by another dot is a range, no float */
139
117
_input.LA(1 ) != ' .' &&
140
- /* dot followed by an identifier is an integer with a function call, not a float */
118
+ /* dot followed by an identifier is an integer with a function call, no float */
141
119
_input.LA(1 ) != ' _' &&
142
- !(_input.LA(1 ) >= ' a' && _input.LA(1 ) <= ' z' ) &&
143
- !(_input.LA(1 ) >= ' A' && _input.LA(1 ) <= ' Z' )
120
+ _input.LA(1 ) != ' a' &&
121
+ _input.LA(1 ) != ' b' &&
122
+ _input.LA(1 ) != ' c' &&
123
+ _input.LA(1 ) != ' d' &&
124
+ _input.LA(1 ) != ' e' &&
125
+ _input.LA(1 ) != ' f' &&
126
+ _input.LA(1 ) != ' g' &&
127
+ _input.LA(1 ) != ' h' &&
128
+ _input.LA(1 ) != ' i' &&
129
+ _input.LA(1 ) != ' j' &&
130
+ _input.LA(1 ) != ' k' &&
131
+ _input.LA(1 ) != ' l' &&
132
+ _input.LA(1 ) != ' m' &&
133
+ _input.LA(1 ) != ' n' &&
134
+ _input.LA(1 ) != ' o' &&
135
+ _input.LA(1 ) != ' p' &&
136
+ _input.LA(1 ) != ' q' &&
137
+ _input.LA(1 ) != ' r' &&
138
+ _input.LA(1 ) != ' s' &&
139
+ _input.LA(1 ) != ' t' &&
140
+ _input.LA(1 ) != ' u' &&
141
+ _input.LA(1 ) != ' v' &&
142
+ _input.LA(1 ) != ' w' &&
143
+ _input.LA(1 ) != ' x' &&
144
+ _input.LA(1 ) != ' y' &&
145
+ _input.LA(1 ) != ' z' &&
146
+ _input.LA(1 ) != ' A' &&
147
+ _input.LA(1 ) != ' B' &&
148
+ _input.LA(1 ) != ' C' &&
149
+ _input.LA(1 ) != ' D' &&
150
+ _input.LA(1 ) != ' E' &&
151
+ _input.LA(1 ) != ' F' &&
152
+ _input.LA(1 ) != ' G' &&
153
+ _input.LA(1 ) != ' H' &&
154
+ _input.LA(1 ) != ' I' &&
155
+ _input.LA(1 ) != ' J' &&
156
+ _input.LA(1 ) != ' K' &&
157
+ _input.LA(1 ) != ' L' &&
158
+ _input.LA(1 ) != ' M' &&
159
+ _input.LA(1 ) != ' N' &&
160
+ _input.LA(1 ) != ' O' &&
161
+ _input.LA(1 ) != ' P' &&
162
+ _input.LA(1 ) != ' Q' &&
163
+ _input.LA(1 ) != ' R' &&
164
+ _input.LA(1 ) != ' S' &&
165
+ _input.LA(1 ) != ' T' &&
166
+ _input.LA(1 ) != ' U' &&
167
+ _input.LA(1 ) != ' V' &&
168
+ _input.LA(1 ) != ' W' &&
169
+ _input.LA(1 ) != ' X' &&
170
+ _input.LA(1 ) != ' Y' &&
171
+ _input.LA(1 ) != ' Z'
144
172
} ? | (' .' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX ?)
145
173
;
146
174
147
175
LIT_STR
148
176
: ' "' (' \\\n ' | ' \\\r\n ' | ' \\ ' CHAR_ESCAPE | .)*? ' "' SUFFIX ?
149
177
;
150
178
151
- LIT_BINARY : ' b' LIT_STR ;
152
- LIT_BINARY_RAW : ' b ' LIT_STR_RAW ;
179
+ LIT_BINARY : ' b' LIT_STR SUFFIX ? ;
180
+ LIT_BINARY_RAW : ' rb ' LIT_STR_RAW SUFFIX ? ;
153
181
154
182
/* this is a bit messy */
155
183
@@ -169,27 +197,21 @@ LIT_STR_RAW
169
197
170
198
QUESTION : ' ?' ;
171
199
172
- IDENT : XID_Start XID_Continue * ;
200
+ IDENT : XID_start XID_continue * ;
173
201
174
202
fragment QUESTION_IDENTIFIER : QUESTION ? IDENT ;
175
203
176
204
LIFETIME : ' \' ' IDENT ;
177
205
178
206
WHITESPACE : [ \r\n\t]+ ;
179
207
180
- UNDOC_COMMENT : ' ////' ~[\n]* -> type(COMMENT ) ;
208
+ UNDOC_COMMENT : ' ////' ~[\r\ n]* -> type(COMMENT ) ;
181
209
YESDOC_COMMENT : ' ///' ~[\r\n]* -> type(DOC_COMMENT ) ;
182
210
OUTER_DOC_COMMENT : ' //!' ~[\r\n]* -> type(DOC_COMMENT ) ;
183
- LINE_COMMENT : ' //' ( ~[/\n] ~[ \n]* )? -> type(COMMENT ) ;
211
+ LINE_COMMENT : ' //' ~[\r \n]* -> type(COMMENT ) ;
184
212
185
213
DOC_BLOCK_COMMENT
186
214
: (' /**' ~[*] | ' /*!' ) (DOC_BLOCK_COMMENT | .)*? ' */' -> type(DOC_COMMENT )
187
215
;
188
216
189
217
BLOCK_COMMENT : ' /*' (BLOCK_COMMENT | .)*? ' */' -> type(COMMENT ) ;
190
-
191
- /* these appear at the beginning of a file */
192
-
193
- SHEBANG : ' #!' { is_at(2 ) && _input.LA(1 ) != ' [' } ? ~[\r\n]* -> type(SHEBANG ) ;
194
-
195
- UTF8_BOM : ' \ufeff ' { is_at(1 ) } ? -> skip ;
0 commit comments