12
12
13
13
#include " TGLexer.h"
14
14
#include " llvm/ADT/ArrayRef.h"
15
+ #include " llvm/ADT/StringExtras.h"
15
16
#include " llvm/ADT/StringSwitch.h"
16
17
#include " llvm/ADT/Twine.h"
17
18
#include " llvm/Config/config.h" // for strtoull()/strtoll() define
20
21
#include " llvm/Support/SourceMgr.h"
21
22
#include " llvm/TableGen/Error.h"
22
23
#include < algorithm>
23
- #include < cctype>
24
24
#include < cerrno>
25
25
#include < cstdint>
26
26
#include < cstdio>
@@ -38,6 +38,17 @@ struct PreprocessorDir {
38
38
};
39
39
} // end anonymous namespace
40
40
41
+ // / Returns true if `C` is a valid character in an identifier. If `First` is
42
+ // / true, returns true if `C` is a valid first character of an identifier,
43
+ // / else returns true if `C` is a valid non-first character of an identifier.
44
+ // / Identifiers match the following regular expression:
45
+ // / [a-zA-Z_][0-9a-zA-Z_]*
46
+ static bool isValidIDChar (char C, bool First) {
47
+ if (C == ' _' || isAlpha (C))
48
+ return true ;
49
+ return !First && isDigit (C);
50
+ }
51
+
41
52
constexpr PreprocessorDir PreprocessorDirs[] = {{tgtok::Ifdef, " ifdef" },
42
53
{tgtok::Ifndef, " ifndef" },
43
54
{tgtok::Else, " else" },
@@ -51,14 +62,14 @@ static const char *lexMacroName(StringRef Str) {
51
62
52
63
// Macro names start with [a-zA-Z_].
53
64
const char *Next = Str.begin ();
54
- if (*Next != ' _ ' && ! isalpha (*Next ))
65
+ if (! isValidIDChar ( *Next, /* First= */ true ))
55
66
return Next;
56
67
// Eat the first character of the name.
57
68
++Next;
58
69
59
70
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
60
71
const char *End = Str.end ();
61
- while (Next != End && ( isalpha ( *Next) || isdigit (*Next) || *Next == ' _ ' ))
72
+ while (Next != End && isValidIDChar ( *Next, /* First= */ false ))
62
73
++Next;
63
74
return Next;
64
75
}
@@ -173,7 +184,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
173
184
switch (CurChar) {
174
185
default :
175
186
// Handle letters: [a-zA-Z_]
176
- if (isalpha (CurChar) || CurChar == ' _ ' )
187
+ if (isValidIDChar (CurChar, /* First= */ true ) )
177
188
return LexIdentifier ();
178
189
179
190
// Unknown character, emit an error.
@@ -250,14 +261,14 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
250
261
case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' : case ' 5' : case ' 6' :
251
262
case ' 7' : case ' 8' : case ' 9' : {
252
263
int NextChar = 0 ;
253
- if (isdigit (CurChar)) {
264
+ if (isDigit (CurChar)) {
254
265
// Allow identifiers to start with a number if it is followed by
255
266
// an identifier. This can happen with paste operations like
256
267
// foo#8i.
257
268
int i = 0 ;
258
269
do {
259
270
NextChar = peekNextChar (i++);
260
- } while (isdigit (NextChar));
271
+ } while (isDigit (NextChar));
261
272
262
273
if (NextChar == ' x' || NextChar == ' b' ) {
263
274
// If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
@@ -281,7 +292,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
281
292
}
282
293
}
283
294
284
- if (isalpha (NextChar) || NextChar == ' _ ' )
295
+ if (isValidIDChar (NextChar, /* First= */ true ) )
285
296
return LexIdentifier ();
286
297
287
298
return LexNumber ();
@@ -347,13 +358,13 @@ tgtok::TokKind TGLexer::LexString() {
347
358
}
348
359
349
360
tgtok::TokKind TGLexer::LexVarName () {
350
- if (!isalpha (CurPtr[0 ]) && CurPtr[ 0 ] != ' _ ' )
361
+ if (!isValidIDChar (CurPtr[0 ], /* First= */ true ) )
351
362
return ReturnError (TokStart, " Invalid variable name" );
352
363
353
364
// Otherwise, we're ok, consume the rest of the characters.
354
365
const char *VarNameStart = CurPtr++;
355
366
356
- while (isalpha (*CurPtr) || isdigit (*CurPtr) || *CurPtr == ' _ ' )
367
+ while (isValidIDChar (*CurPtr, /* First= */ false ) )
357
368
++CurPtr;
358
369
359
370
CurStrVal.assign (VarNameStart, CurPtr);
@@ -365,7 +376,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
365
376
const char *IdentStart = TokStart;
366
377
367
378
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
368
- while (isalpha (*CurPtr) || isdigit (*CurPtr) || *CurPtr == ' _ ' )
379
+ while (isValidIDChar (*CurPtr, /* First= */ false ) )
369
380
++CurPtr;
370
381
371
382
// Check to see if this identifier is a reserved keyword.
@@ -500,7 +511,7 @@ tgtok::TokKind TGLexer::LexNumber() {
500
511
Base = 16 ;
501
512
do
502
513
++CurPtr;
503
- while (isxdigit (CurPtr[0 ]));
514
+ while (isHexDigit (CurPtr[0 ]));
504
515
} else if (CurPtr[0 ] == ' b' ) {
505
516
Base = 2 ;
506
517
do
@@ -515,7 +526,7 @@ tgtok::TokKind TGLexer::LexNumber() {
515
526
// Check if it's a decimal value.
516
527
if (Base == 0 ) {
517
528
// Check for a sign without a digit.
518
- if (!isdigit (CurPtr[0 ])) {
529
+ if (!isDigit (CurPtr[0 ])) {
519
530
if (CurPtr[-1 ] == ' -' )
520
531
return tgtok::minus;
521
532
else if (CurPtr[-1 ] == ' +' )
@@ -526,7 +537,7 @@ tgtok::TokKind TGLexer::LexNumber() {
526
537
NumStart = TokStart;
527
538
IsMinus = CurPtr[-1 ] == ' -' ;
528
539
529
- while (isdigit (CurPtr[0 ]))
540
+ while (isDigit (CurPtr[0 ]))
530
541
++CurPtr;
531
542
}
532
543
@@ -574,11 +585,11 @@ tgtok::TokKind TGLexer::LexBracket() {
574
585
575
586
// / LexExclaim - Lex '!' and '![a-zA-Z]+'.
576
587
tgtok::TokKind TGLexer::LexExclaim () {
577
- if (!isalpha (*CurPtr))
588
+ if (!isAlpha (*CurPtr))
578
589
return ReturnError (CurPtr - 1 , " Invalid \" !operator\" " );
579
590
580
591
const char *Start = CurPtr++;
581
- while (isalpha (*CurPtr))
592
+ while (isAlpha (*CurPtr))
582
593
++CurPtr;
583
594
584
595
// Check to see which operator this is.
0 commit comments