28
28
import com .sun .tools .javac .parser .Tokens .TokenKind ;
29
29
import com .sun .tools .javac .parser .UnicodeReader ;
30
30
import com .sun .tools .javac .util .Context ;
31
+ import java .util .ArrayList ;
32
+ import java .util .Collections ;
33
+ import java .util .Comparator ;
34
+ import java .util .HashSet ;
35
+ import java .util .List ;
31
36
import java .util .Objects ;
32
37
import java .util .Set ;
33
38
@@ -83,22 +88,53 @@ static boolean isStringFragment(TokenKind kind) {
83
88
return STRINGFRAGMENT != null && Objects .equals (kind , STRINGFRAGMENT );
84
89
}
85
90
86
- /** Lex the input and return a list of {@link RawTok}s. */
87
- public static ImmutableList <RawTok > getTokens (
88
- String source , Context context , Set <TokenKind > stopTokens ) {
91
+ private static ImmutableList <Token > readAllTokens (
92
+ String source , Context context , Set <Integer > nonTerminalStringFragments ) {
89
93
if (source == null ) {
90
94
return ImmutableList .of ();
91
95
}
92
96
ScannerFactory fac = ScannerFactory .instance (context );
93
97
char [] buffer = (source + EOF_COMMENT ).toCharArray ();
94
98
Scanner scanner =
95
99
new AccessibleScanner (fac , new CommentSavingTokenizer (fac , buffer , buffer .length ));
100
+ List <Token > tokens = new ArrayList <>();
101
+ do {
102
+ scanner .nextToken ();
103
+ tokens .add (scanner .token ());
104
+ } while (scanner .token ().kind != TokenKind .EOF );
105
+ for (int i = 0 ; i < tokens .size (); i ++) {
106
+ if (isStringFragment (tokens .get (i ).kind )) {
107
+ int start = i ;
108
+ while (isStringFragment (tokens .get (i ).kind )) {
109
+ i ++;
110
+ }
111
+ for (int j = start ; j < i - 1 ; j ++) {
112
+ nonTerminalStringFragments .add (tokens .get (j ).pos );
113
+ }
114
+ }
115
+ }
116
+ // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string
117
+ // literal values, followed by the tokens for the template arguments. For the formatter, we
118
+ // want the stream of tokens to appear in order by their start position.
119
+ if (Runtime .version ().feature () >= 21 ) {
120
+ Collections .sort (tokens , Comparator .comparingInt (t -> t .pos ));
121
+ }
122
+ return ImmutableList .copyOf (tokens );
123
+ }
124
+
125
+ /** Lex the input and return a list of {@link RawTok}s. */
126
+ public static ImmutableList <RawTok > getTokens (
127
+ String source , Context context , Set <TokenKind > stopTokens ) {
128
+ if (source == null ) {
129
+ return ImmutableList .of ();
130
+ }
131
+ Set <Integer > nonTerminalStringFragments = new HashSet <>();
132
+ ImmutableList <Token > javacTokens = readAllTokens (source , context , nonTerminalStringFragments );
133
+
96
134
ImmutableList .Builder <RawTok > tokens = ImmutableList .builder ();
97
135
int end = source .length ();
98
136
int last = 0 ;
99
- do {
100
- scanner .nextToken ();
101
- Token t = scanner .token ();
137
+ for (Token t : javacTokens ) {
102
138
if (t .comments != null ) {
103
139
for (Comment c : Lists .reverse (t .comments )) {
104
140
if (last < c .getSourcePos (0 )) {
@@ -118,27 +154,12 @@ public static ImmutableList<RawTok> getTokens(
118
154
if (last < t .pos ) {
119
155
tokens .add (new RawTok (null , null , last , t .pos ));
120
156
}
121
- int pos = t .pos ;
122
- int endPos = t .endPos ;
123
157
if (isStringFragment (t .kind )) {
124
- // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string
125
- // literal values, followed by the tokens for the template arguments. For the formatter, we
126
- // want the stream of tokens to appear in order by their start position, and also to have
127
- // all the content from the original source text (including leading and trailing ", and the
128
- // \ escapes from template arguments). This logic processes the token stream from javac to
129
- // meet those requirements.
130
- while (isStringFragment (t .kind )) {
131
- endPos = t .endPos ;
132
- scanner .nextToken ();
133
- t = scanner .token ();
134
- }
135
- // Read tokens for the string template arguments, until we read the end of the string
136
- // template. The last token in a string template is always a trailing string fragment. Use
137
- // lookahead to defer reading the token after the template until the next iteration of the
138
- // outer loop.
139
- while (scanner .token (/* lookahead= */ 1 ).endPos < endPos ) {
140
- scanner .nextToken ();
141
- t = scanner .token ();
158
+ int endPos = t .endPos ;
159
+ int pos = t .pos ;
160
+ if (nonTerminalStringFragments .contains (t .pos )) {
161
+ // Include the \ escape from \{...} in the preceding string fragment
162
+ endPos ++;
142
163
}
143
164
tokens .add (new RawTok (source .substring (pos , endPos ), t .kind , pos , endPos ));
144
165
last = endPos ;
@@ -151,7 +172,7 @@ public static ImmutableList<RawTok> getTokens(
151
172
t .endPos ));
152
173
last = t .endPos ;
153
174
}
154
- } while ( scanner . token (). kind != TokenKind . EOF );
175
+ }
155
176
if (last < end ) {
156
177
tokens .add (new RawTok (null , null , last , end ));
157
178
}
0 commit comments