@@ -78,7 +78,7 @@ class TextTokenRetokenizer {
78
78
char peekNext (unsigned offset) const {
79
79
assert (!isEnd ());
80
80
assert (Pos.BufferPtr != Pos.BufferEnd );
81
- if (Pos.BufferPtr + offset <= Pos.BufferEnd ) {
81
+ if (Pos.BufferPtr + offset < Pos.BufferEnd ) {
82
82
return *(Pos.BufferPtr + offset);
83
83
} else {
84
84
return ' \0 ' ;
@@ -108,7 +108,7 @@ class TextTokenRetokenizer {
108
108
}
109
109
}
110
110
111
- bool continueInt (SmallString<32 > &NextToken) {
111
+ bool shouldContinueLexingIntegralType (SmallString<32 > &NextToken) {
112
112
return NextToken.ends_with (StringRef (" char" )) ||
113
113
NextToken.ends_with (StringRef (" int" )) ||
114
114
NextToken.ends_with (StringRef (" char*" )) ||
@@ -117,39 +117,44 @@ class TextTokenRetokenizer {
117
117
NextToken.ends_with (StringRef (" int&" ));
118
118
}
119
119
120
- bool lexInt (SmallString<32 > &WordText, SmallString<32 > &NextToken) {
120
+ // / Lex an integral type, such as unsigned long long, etc.
121
+ bool lexIntegral (SmallString<32 > &WordText, SmallString<32 > &NextToken) {
121
122
unsigned LongCounter = (WordText.ends_with (StringRef (" long" ))) ? 1 : 0 ;
122
- bool complete = false ;
123
+ bool IsLexingComplete = false ;
123
124
124
125
while (!isEnd ()) {
125
126
const char C = peek ();
126
127
if (!isWhitespace (C)) {
127
128
WordText.push_back (C);
128
129
consumeChar ();
129
130
} else {
130
-
131
131
NextToken.clear ();
132
132
peekNextToken (NextToken);
133
133
134
134
if (WordText.ends_with (StringRef (" long" ))) {
135
135
LongCounter++;
136
- if (continueInt (NextToken)) {
136
+ // Use the next token to determine if we should continue parsing
137
+ if (shouldContinueLexingIntegralType (NextToken)) {
137
138
WordText.push_back (C);
138
139
consumeChar ();
139
- complete = true ;
140
+ IsLexingComplete = true ;
140
141
continue ;
141
- } else {
142
- if (LongCounter == 2 ) {
143
- return true ;
144
- }
145
142
}
146
- } else {
143
+ // Maximum number of consecutive "long" is 2, so we can return if
144
+ // we've hit that.
145
+ if (LongCounter == 2 ) {
146
+ return true ;
147
+ }
148
+ }
147
149
148
- if (complete || continueInt (WordText)) {
150
+ // If current word doesn't end with long, check if we should exit early
151
+ else {
152
+ if (IsLexingComplete || shouldContinueLexingIntegralType (WordText)) {
149
153
return true ;
150
154
}
151
155
}
152
156
157
+ // If next token ends with long then we consume it and continue parsing
153
158
if (NextToken.ends_with (StringRef (" long" ))) {
154
159
WordText.push_back (C);
155
160
consumeChar ();
@@ -206,7 +211,7 @@ class TextTokenRetokenizer {
206
211
return WordText.ends_with (StringRef (" ::" ));
207
212
}
208
213
209
- bool isInt (SmallString<32 > &WordText) {
214
+ bool isIntegral (SmallString<32 > &WordText) {
210
215
return WordText.ends_with (StringRef (" unsigned" )) ||
211
216
WordText.ends_with (StringRef (" long" )) ||
212
217
WordText.ends_with (StringRef (" signed" ));
@@ -280,7 +285,12 @@ class TextTokenRetokenizer {
280
285
bool lexType (Token &Tok) {
281
286
if (isEnd ())
282
287
return false ;
288
+
289
+ // Save current position in case we need to rollback because the type is
290
+ // empty.
283
291
Position SavedPos = Pos;
292
+
293
+ // Consume any leading whitespace.
284
294
consumeWhitespace ();
285
295
SmallString<32 > NextToken;
286
296
SmallString<32 > WordText;
@@ -289,10 +299,12 @@ class TextTokenRetokenizer {
289
299
StringRef ConstVal = StringRef (" const" );
290
300
StringRef PointerVal = StringRef (" *" );
291
301
StringRef ReferenceVal = StringRef (" &" );
292
- bool ConstPointer = false ;
302
+ bool IsTypeConstPointerOrRef = false ;
293
303
294
304
while (!isEnd ()) {
295
305
const char C = peek ();
306
+ // For non-whitespace characters we check if it's a template or otherwise
307
+ // continue reading the text into a word.
296
308
if (!isWhitespace (C)) {
297
309
if (C == ' <' ) {
298
310
if (!lexTemplate (WordText))
@@ -301,47 +313,59 @@ class TextTokenRetokenizer {
301
313
WordText.push_back (C);
302
314
consumeChar ();
303
315
}
304
- } else {
305
- if (ConstPointer) {
316
+ }
317
+ // For whitespace, we start inspecting the constructed word
318
+ else {
319
+ // If we encounter a pointer/reference, we can stop parsing since we're
320
+ // only parsing expressions.
321
+ if (IsTypeConstPointerOrRef) {
306
322
consumeChar ();
307
323
break ;
308
- } else {
309
- if (isInt (WordText)) {
310
- WordText.push_back (C);
311
- consumeChar ();
312
- if (!lexInt (WordText, NextToken))
313
- return false ;
314
- }
315
- if (continueParsing (WordText)) {
316
- WordText.push_back (C);
317
- consumeChar ();
324
+ }
325
+ // Parse out integral types
326
+ if (isIntegral (WordText)) {
327
+ WordText.push_back (C);
328
+ consumeChar ();
329
+ if (!lexIntegral (WordText, NextToken))
330
+ return false ;
331
+ }
332
+ // Certain types, like qualified names or types with CVR to name a few,
333
+ // may have whitespace inside of the typename, so we need to check and
334
+ // continue parsing if that's the case
335
+ if (continueParsing (WordText)) {
336
+ WordText.push_back (C);
337
+ consumeChar ();
338
+ }
339
+ // Handles cases without qualified names or type qualifiers
340
+ else {
341
+ NextToken.clear ();
342
+ peekNextToken (NextToken);
343
+ // Check for pointer/ref vals, and mark the type as a pointer/ref for
344
+ // the rest of the lex
345
+ if (WordText.ends_with (PointerVal) ||
346
+ WordText.ends_with (ReferenceVal)) {
347
+ if (NextToken.equals (ConstVal)) {
348
+ IsTypeConstPointerOrRef = true ;
349
+ WordText.push_back (C);
350
+ consumeChar ();
351
+ } else {
352
+ consumeChar ();
353
+ break ;
354
+ }
318
355
} else {
319
- NextToken.clear ();
320
- peekNextToken (NextToken);
321
- if (WordText.ends_with (PointerVal) ||
322
- WordText.ends_with (ReferenceVal)) {
323
- if (NextToken.equals (ConstVal)) {
324
- ConstPointer = true ;
356
+ // Check if the next token is a pointer/ref
357
+ if ((NextToken.ends_with (PointerVal) ||
358
+ NextToken.ends_with (ReferenceVal))) {
359
+ WordText.push_back (C);
360
+ consumeChar ();
361
+ } else {
362
+ if (continueParsing (NextToken)) {
325
363
WordText.push_back (C);
326
364
consumeChar ();
327
365
} else {
328
366
consumeChar ();
329
367
break ;
330
368
}
331
- } else {
332
- if ((NextToken.ends_with (PointerVal) ||
333
- NextToken.ends_with (ReferenceVal))) {
334
- WordText.push_back (C);
335
- consumeChar ();
336
- } else {
337
- if (continueParsing (NextToken)) {
338
- WordText.push_back (C);
339
- consumeChar ();
340
- } else {
341
- consumeChar ();
342
- break ;
343
- }
344
- }
345
369
}
346
370
}
347
371
}
0 commit comments