Skip to content

Commit e6751bf

Browse files
authored
[NFC][TableGen] Elminate use of isalpha/isdigit from TGLexer (#104837)
- Replace use of std::isalpha, std::isdigit, std:isxdigit with LLVM's StringExtras versions, to avoid possibly locale dependent behavior (e.g. glibc). - Create helper function for common checks for valid identifier characters.
1 parent 6a38e19 commit e6751bf

File tree

1 file changed

+26
-15
lines changed

1 file changed

+26
-15
lines changed

llvm/lib/TableGen/TGLexer.cpp

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "TGLexer.h"
1414
#include "llvm/ADT/ArrayRef.h"
15+
#include "llvm/ADT/StringExtras.h"
1516
#include "llvm/ADT/StringSwitch.h"
1617
#include "llvm/ADT/Twine.h"
1718
#include "llvm/Config/config.h" // for strtoull()/strtoll() define
@@ -20,7 +21,6 @@
2021
#include "llvm/Support/SourceMgr.h"
2122
#include "llvm/TableGen/Error.h"
2223
#include <algorithm>
23-
#include <cctype>
2424
#include <cerrno>
2525
#include <cstdint>
2626
#include <cstdio>
@@ -38,6 +38,17 @@ struct PreprocessorDir {
3838
};
3939
} // end anonymous namespace
4040

41+
/// Returns true if `C` is a valid character in an identifier. If `First` is
42+
/// true, returns true if `C` is a valid first character of an identifier,
43+
/// else returns true if `C` is a valid non-first character of an identifier.
44+
/// Identifiers match the following regular expression:
45+
/// [a-zA-Z_][0-9a-zA-Z_]*
46+
static bool isValidIDChar(char C, bool First) {
47+
if (C == '_' || isAlpha(C))
48+
return true;
49+
return !First && isDigit(C);
50+
}
51+
4152
constexpr PreprocessorDir PreprocessorDirs[] = {{tgtok::Ifdef, "ifdef"},
4253
{tgtok::Ifndef, "ifndef"},
4354
{tgtok::Else, "else"},
@@ -51,14 +62,14 @@ static const char *lexMacroName(StringRef Str) {
5162

5263
// Macro names start with [a-zA-Z_].
5364
const char *Next = Str.begin();
54-
if (*Next != '_' && !isalpha(*Next))
65+
if (!isValidIDChar(*Next, /*First=*/true))
5566
return Next;
5667
// Eat the first character of the name.
5768
++Next;
5869

5970
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
6071
const char *End = Str.end();
61-
while (Next != End && (isalpha(*Next) || isdigit(*Next) || *Next == '_'))
72+
while (Next != End && isValidIDChar(*Next, /*First=*/false))
6273
++Next;
6374
return Next;
6475
}
@@ -173,7 +184,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
173184
switch (CurChar) {
174185
default:
175186
// Handle letters: [a-zA-Z_]
176-
if (isalpha(CurChar) || CurChar == '_')
187+
if (isValidIDChar(CurChar, /*First=*/true))
177188
return LexIdentifier();
178189

179190
// Unknown character, emit an error.
@@ -250,14 +261,14 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
250261
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
251262
case '7': case '8': case '9': {
252263
int NextChar = 0;
253-
if (isdigit(CurChar)) {
264+
if (isDigit(CurChar)) {
254265
// Allow identifiers to start with a number if it is followed by
255266
// an identifier. This can happen with paste operations like
256267
// foo#8i.
257268
int i = 0;
258269
do {
259270
NextChar = peekNextChar(i++);
260-
} while (isdigit(NextChar));
271+
} while (isDigit(NextChar));
261272

262273
if (NextChar == 'x' || NextChar == 'b') {
263274
// If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
@@ -281,7 +292,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
281292
}
282293
}
283294

284-
if (isalpha(NextChar) || NextChar == '_')
295+
if (isValidIDChar(NextChar, /*First=*/true))
285296
return LexIdentifier();
286297

287298
return LexNumber();
@@ -347,13 +358,13 @@ tgtok::TokKind TGLexer::LexString() {
347358
}
348359

349360
tgtok::TokKind TGLexer::LexVarName() {
350-
if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
361+
if (!isValidIDChar(CurPtr[0], /*First=*/true))
351362
return ReturnError(TokStart, "Invalid variable name");
352363

353364
// Otherwise, we're ok, consume the rest of the characters.
354365
const char *VarNameStart = CurPtr++;
355366

356-
while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
367+
while (isValidIDChar(*CurPtr, /*First=*/false))
357368
++CurPtr;
358369

359370
CurStrVal.assign(VarNameStart, CurPtr);
@@ -365,7 +376,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
365376
const char *IdentStart = TokStart;
366377

367378
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
368-
while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
379+
while (isValidIDChar(*CurPtr, /*First=*/false))
369380
++CurPtr;
370381

371382
// Check to see if this identifier is a reserved keyword.
@@ -500,7 +511,7 @@ tgtok::TokKind TGLexer::LexNumber() {
500511
Base = 16;
501512
do
502513
++CurPtr;
503-
while (isxdigit(CurPtr[0]));
514+
while (isHexDigit(CurPtr[0]));
504515
} else if (CurPtr[0] == 'b') {
505516
Base = 2;
506517
do
@@ -515,7 +526,7 @@ tgtok::TokKind TGLexer::LexNumber() {
515526
// Check if it's a decimal value.
516527
if (Base == 0) {
517528
// Check for a sign without a digit.
518-
if (!isdigit(CurPtr[0])) {
529+
if (!isDigit(CurPtr[0])) {
519530
if (CurPtr[-1] == '-')
520531
return tgtok::minus;
521532
else if (CurPtr[-1] == '+')
@@ -526,7 +537,7 @@ tgtok::TokKind TGLexer::LexNumber() {
526537
NumStart = TokStart;
527538
IsMinus = CurPtr[-1] == '-';
528539

529-
while (isdigit(CurPtr[0]))
540+
while (isDigit(CurPtr[0]))
530541
++CurPtr;
531542
}
532543

@@ -574,11 +585,11 @@ tgtok::TokKind TGLexer::LexBracket() {
574585

575586
/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
576587
tgtok::TokKind TGLexer::LexExclaim() {
577-
if (!isalpha(*CurPtr))
588+
if (!isAlpha(*CurPtr))
578589
return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
579590

580591
const char *Start = CurPtr++;
581-
while (isalpha(*CurPtr))
592+
while (isAlpha(*CurPtr))
582593
++CurPtr;
583594

584595
// Check to see which operator this is.

0 commit comments

Comments
 (0)