15
15
package com .google .googlejavaformat .java ;
16
16
17
17
import static com .google .common .base .Preconditions .checkArgument ;
18
+ import static com .google .common .base .Preconditions .checkElementIndex ;
18
19
import static java .util .Arrays .stream ;
19
20
20
21
import com .google .common .collect .ImmutableList ;
28
29
import com .sun .tools .javac .parser .Tokens .TokenKind ;
29
30
import com .sun .tools .javac .parser .UnicodeReader ;
30
31
import com .sun .tools .javac .util .Context ;
32
+ import java .lang .reflect .Method ;
31
33
import java .util .ArrayList ;
32
34
import java .util .Collections ;
33
35
import java .util .Comparator ;
34
36
import java .util .HashSet ;
35
37
import java .util .List ;
36
38
import java .util .Objects ;
37
39
import java .util .Set ;
40
+ import org .checkerframework .checker .nullness .qual .Nullable ;
38
41
39
42
/** A wrapper around javac's lexer. */
40
- class JavacTokens {
43
+ final class JavacTokens {
41
44
42
45
/** The lexer eats terminal comments, so feed it one we don't care about. */
43
46
// TODO(b/33103797): fix javac and remove the work-around
@@ -51,6 +54,8 @@ static class RawTok {
51
54
private final int endPos ;
52
55
53
56
RawTok (String stringVal , TokenKind kind , int pos , int endPos ) {
57
+ checkElementIndex (pos , endPos , "pos" );
58
+ checkArgument (pos < endPos , "expected pos (%s) < endPos (%s)" , pos , endPos );
54
59
this .stringVal = stringVal ;
55
60
this .kind = kind ;
56
61
this .pos = pos ;
@@ -136,13 +141,30 @@ public static ImmutableList<RawTok> getTokens(
136
141
int last = 0 ;
137
142
for (Token t : javacTokens ) {
138
143
if (t .comments != null ) {
144
+ // javac accumulates comments in reverse order
139
145
for (Comment c : Lists .reverse (t .comments )) {
140
- if (last < c .getSourcePos (0 )) {
141
- tokens .add (new RawTok (null , null , last , c .getSourcePos (0 )));
146
+ int pos = c .getSourcePos (0 );
147
+ int length ;
148
+ if (pos == -1 ) {
149
+ // We've found a comment whose position hasn't been recorded. Deduce its position as the
150
+ // first `/` character after the end of the previous token.
151
+ //
152
+ // javac creates a new JavaTokenizer to process string template arguments, so
153
+ // CommentSavingTokenizer doesn't get a chance to preprocess those comments and save
154
+ // their text and positions.
155
+ //
156
+ // TODO: consider always using this approach once the minimum supported JDK is 16 and
157
+ // we can assume BasicComment#getRawCharacters is always available.
158
+ pos = source .indexOf ('/' , last );
159
+ length = CommentSavingTokenizer .commentLength (c );
160
+ } else {
161
+ length = c .getText ().length ();
142
162
}
143
- tokens .add (
144
- new RawTok (null , null , c .getSourcePos (0 ), c .getSourcePos (0 ) + c .getText ().length ()));
145
- last = c .getSourcePos (0 ) + c .getText ().length ();
163
+ if (last < pos ) {
164
+ tokens .add (new RawTok (null , null , last , pos ));
165
+ }
166
+ tokens .add (new RawTok (null , null , pos , pos + length ));
167
+ last = pos + length ;
146
168
}
147
169
}
148
170
if (stopTokens .contains (t .kind )) {
@@ -181,6 +203,32 @@ public static ImmutableList<RawTok> getTokens(
181
203
182
204
/** A {@link JavaTokenizer} that saves comments. */
183
205
static class CommentSavingTokenizer extends JavaTokenizer {
206
+
207
+ private static final Method GET_RAW_CHARACTERS_METHOD = getRawCharactersMethod ();
208
+
209
+ private static @ Nullable Method getRawCharactersMethod () {
210
+ try {
211
+ // This is a method in PositionTrackingReader, but that class is not public.
212
+ return BasicComment .class .getMethod ("getRawCharacters" );
213
+ } catch (NoSuchMethodException e ) {
214
+ return null ;
215
+ }
216
+ }
217
+
218
+ static int commentLength (Comment comment ) {
219
+ if (comment instanceof BasicComment && GET_RAW_CHARACTERS_METHOD != null ) {
220
+ // If we've seen a BasicComment instead of a CommentWithTextAndPosition, getText() will
221
+ // be null, so we deduce the length using getRawCharacters. See also the comment at the
222
+ // usage of this method in getTokens.
223
+ try {
224
+ return ((char []) GET_RAW_CHARACTERS_METHOD .invoke (((BasicComment ) comment ))).length ;
225
+ } catch (ReflectiveOperationException e ) {
226
+ throw new LinkageError (e .getMessage (), e );
227
+ }
228
+ }
229
+ return comment .getText ().length ();
230
+ }
231
+
184
232
CommentSavingTokenizer (ScannerFactory fac , char [] buffer , int length ) {
185
233
super (fac , buffer , length );
186
234
}
@@ -288,4 +336,6 @@ protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) {
288
336
super (fac , buffer , length );
289
337
}
290
338
}
339
+
340
+ private JavacTokens () {}
291
341
}
0 commit comments