Skip to content

Commit 3bace7e

Browse files
authored
[LLVM][AsmParser] Make error reporting of lexer errors more precise (#111077)
When the lexer hits an error during assembly parsing, it just logs the error in the `ErrorMsg` object, and it's possible that error gets overwritten later in by the parser with a more generic error message. This makes some errors reported by the LLVM's asm parser less precise. Address this by not having the parser overwrite the message logged by the lexer by assigning error messages generated by the lexer higher "priority" than those generated by parser and overwriting the error message only if its same or higher priority. Update several Assembler unit test to now check the more precise error messaged reported by the LLVM's AsmParser.
1 parent d2457e6 commit 3bace7e

File tree

9 files changed

+75
-37
lines changed

9 files changed

+75
-37
lines changed

llvm/include/llvm/AsmParser/LLLexer.h

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,20 @@ namespace llvm {
2828
class LLLexer {
2929
const char *CurPtr;
3030
StringRef CurBuf;
31-
SMDiagnostic &ErrorInfo;
31+
32+
enum class ErrorPriority {
33+
None, // No error message present.
34+
Parser, // Errors issued by parser.
35+
Lexer, // Errors issued by lexer.
36+
};
37+
38+
struct ErrorInfo {
39+
ErrorPriority Priority = ErrorPriority::None;
40+
SMDiagnostic &Error;
41+
42+
explicit ErrorInfo(SMDiagnostic &Error) : Error(Error) {}
43+
} ErrorInfo;
44+
3245
SourceMgr &SM;
3346
LLVMContext &Context;
3447

@@ -66,8 +79,13 @@ namespace llvm {
6679
IgnoreColonInIdentifiers = val;
6780
}
6881

69-
bool Error(LocTy ErrorLoc, const Twine &Msg) const;
70-
bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
82+
// This returns true as a convenience for the parser functions that return
83+
// true on error.
84+
bool ParseError(LocTy ErrorLoc, const Twine &Msg) {
85+
Error(ErrorLoc, Msg, ErrorPriority::Parser);
86+
return true;
87+
}
88+
bool ParseError(const Twine &Msg) { return ParseError(getLoc(), Msg); }
7189

7290
void Warning(LocTy WarningLoc, const Twine &Msg) const;
7391
void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); }
@@ -97,7 +115,15 @@ namespace llvm {
97115
uint64_t atoull(const char *Buffer, const char *End);
98116
uint64_t HexIntToVal(const char *Buffer, const char *End);
99117
void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
100-
void FP80HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
118+
void FP80HexToIntPair(const char *Buffer, const char *End,
119+
uint64_t Pair[2]);
120+
121+
void Error(LocTy ErrorLoc, const Twine &Msg, ErrorPriority Origin);
122+
123+
void LexError(LocTy ErrorLoc, const Twine &Msg) {
124+
Error(ErrorLoc, Msg, ErrorPriority::Lexer);
125+
}
126+
void LexError(const Twine &Msg) { LexError(getLoc(), Msg); }
101127
};
102128
} // end namespace llvm
103129

llvm/include/llvm/AsmParser/LLParser.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,11 @@ namespace llvm {
207207
LLVMContext &getContext() { return Context; }
208208

209209
private:
210-
bool error(LocTy L, const Twine &Msg) const { return Lex.Error(L, Msg); }
211-
bool tokError(const Twine &Msg) const { return error(Lex.getLoc(), Msg); }
210+
bool error(LocTy L, const Twine &Msg) { return Lex.ParseError(L, Msg); }
211+
bool tokError(const Twine &Msg) { return error(Lex.getLoc(), Msg); }
212212

213213
bool checkValueID(LocTy L, StringRef Kind, StringRef Prefix,
214-
unsigned NextID, unsigned ID) const;
214+
unsigned NextID, unsigned ID);
215215

216216
/// Restore the internal name and slot mappings using the mappings that
217217
/// were created at an earlier parsing stage.

llvm/lib/AsmParser/LLLexer.cpp

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,21 @@
2525

2626
using namespace llvm;
2727

28-
bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
29-
ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
30-
return true;
28+
// Both the lexer and parser can issue error messages. If the lexer issues a
29+
// lexer error, since we do not terminate execution immediately, usually that
30+
// is followed by the parser issuing a parser error. However, the error issued
31+
// by the lexer is more relevant in that case as opposed to potentially more
32+
// generic parser error. So instead of always recording the last error message
33+
// use the `Priority` to establish a priority, with Lexer > Parser > None. We
34+
// record the issued message only if the message has same or higher priority
35+
// than the existing one. This prevents lexer errors from being overwritten by
36+
// parser errors.
37+
void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg,
38+
LLLexer::ErrorPriority Priority) {
39+
if (Priority < ErrorInfo.Priority)
40+
return;
41+
ErrorInfo.Error = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
42+
ErrorInfo.Priority = Priority;
3143
}
3244

3345
void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
@@ -49,7 +61,7 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
4961
Result *= 10;
5062
Result += *Buffer-'0';
5163
if (Result < OldRes) { // Uh, oh, overflow detected!!!
52-
Error("constant bigger than 64 bits detected!");
64+
LexError("constant bigger than 64 bits detected!");
5365
return 0;
5466
}
5567
}
@@ -64,7 +76,7 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
6476
Result += hexDigitValue(*Buffer);
6577

6678
if (Result < OldRes) { // Uh, oh, overflow detected!!!
67-
Error("constant bigger than 64 bits detected!");
79+
LexError("constant bigger than 64 bits detected!");
6880
return 0;
6981
}
7082
}
@@ -87,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End,
8799
Pair[1] += hexDigitValue(*Buffer);
88100
}
89101
if (Buffer != End)
90-
Error("constant bigger than 128 bits detected!");
102+
LexError("constant bigger than 128 bits detected!");
91103
}
92104

93105
/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
@@ -106,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
106118
Pair[0] += hexDigitValue(*Buffer);
107119
}
108120
if (Buffer != End)
109-
Error("constant bigger than 128 bits detected!");
121+
LexError("constant bigger than 128 bits detected!");
110122
}
111123

112124
// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
@@ -273,14 +285,14 @@ lltok::Kind LLLexer::LexDollar() {
273285
int CurChar = getNextChar();
274286

275287
if (CurChar == EOF) {
276-
Error("end of file in COMDAT variable name");
288+
LexError("end of file in COMDAT variable name");
277289
return lltok::Error;
278290
}
279291
if (CurChar == '"') {
280292
StrVal.assign(TokStart + 2, CurPtr - 1);
281293
UnEscapeLexed(StrVal);
282294
if (StringRef(StrVal).contains(0)) {
283-
Error("Null bytes are not allowed in names");
295+
LexError("Null bytes are not allowed in names");
284296
return lltok::Error;
285297
}
286298
return lltok::ComdatVar;
@@ -302,7 +314,7 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
302314
int CurChar = getNextChar();
303315

304316
if (CurChar == EOF) {
305-
Error("end of file in string constant");
317+
LexError("end of file in string constant");
306318
return lltok::Error;
307319
}
308320
if (CurChar == '"') {
@@ -342,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
342354

343355
uint64_t Val = atoull(TokStart + 1, CurPtr);
344356
if ((unsigned)Val != Val)
345-
Error("invalid value number (too large)!");
357+
LexError("invalid value number (too large)!");
346358
UIntVal = unsigned(Val);
347359
return Token;
348360
}
@@ -356,14 +368,14 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
356368
int CurChar = getNextChar();
357369

358370
if (CurChar == EOF) {
359-
Error("end of file in global variable name");
371+
LexError("end of file in global variable name");
360372
return lltok::Error;
361373
}
362374
if (CurChar == '"') {
363375
StrVal.assign(TokStart+2, CurPtr-1);
364376
UnEscapeLexed(StrVal);
365377
if (StringRef(StrVal).contains(0)) {
366-
Error("Null bytes are not allowed in names");
378+
LexError("Null bytes are not allowed in names");
367379
return lltok::Error;
368380
}
369381
return Var;
@@ -398,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() {
398410
if (CurPtr[0] == ':') {
399411
++CurPtr;
400412
if (StringRef(StrVal).contains(0)) {
401-
Error("Null bytes are not allowed in names");
413+
LexError("Null bytes are not allowed in names");
402414
kind = lltok::Error;
403415
} else {
404416
kind = lltok::LabelStr;
@@ -480,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() {
480492
uint64_t NumBits = atoull(StartChar, CurPtr);
481493
if (NumBits < IntegerType::MIN_INT_BITS ||
482494
NumBits > IntegerType::MAX_INT_BITS) {
483-
Error("bitwidth for integer type out of range!");
495+
LexError("bitwidth for integer type out of range!");
484496
return lltok::Error;
485497
}
486498
TyVal = IntegerType::get(Context, NumBits);
@@ -1109,7 +1121,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
11091121
uint64_t Val = atoull(TokStart, CurPtr);
11101122
++CurPtr; // Skip the colon.
11111123
if ((unsigned)Val != Val)
1112-
Error("invalid value number (too large)!");
1124+
LexError("invalid value number (too large)!");
11131125
UIntVal = unsigned(Val);
11141126
return lltok::LabelID;
11151127
}

llvm/lib/AsmParser/LLParser.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3220,7 +3220,7 @@ bool LLParser::parseOptionalOperandBundles(
32203220
}
32213221

32223222
bool LLParser::checkValueID(LocTy Loc, StringRef Kind, StringRef Prefix,
3223-
unsigned NextID, unsigned ID) const {
3223+
unsigned NextID, unsigned ID) {
32243224
if (ID < NextID)
32253225
return error(Loc, Kind + " expected to be numbered '" + Prefix +
32263226
Twine(NextID) + "' or greater");
@@ -5192,7 +5192,7 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) {
51925192
/// ::= distinct !DIAssignID()
51935193
bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) {
51945194
if (!IsDistinct)
5195-
return Lex.Error("missing 'distinct', required for !DIAssignID()");
5195+
return tokError("missing 'distinct', required for !DIAssignID()");
51965196

51975197
Lex.Lex();
51985198

@@ -5500,7 +5500,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) {
55005500
if (checksumkind.Seen && checksum.Seen)
55015501
OptChecksum.emplace(checksumkind.Val, checksum.Val);
55025502
else if (checksumkind.Seen || checksum.Seen)
5503-
return Lex.Error("'checksumkind' and 'checksum' must be provided together");
5503+
return tokError("'checksumkind' and 'checksum' must be provided together");
55045504

55055505
MDString *Source = nullptr;
55065506
if (source.Seen)
@@ -5519,7 +5519,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) {
55195519
/// sysroot: "/", sdk: "MacOSX.sdk")
55205520
bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
55215521
if (!IsDistinct)
5522-
return Lex.Error("missing 'distinct', required for !DICompileUnit");
5522+
return tokError("missing 'distinct', required for !DICompileUnit");
55235523

55245524
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
55255525
REQUIRED(language, DwarfLangField, ); \
@@ -5599,7 +5599,7 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
55995599
: DISubprogram::toSPFlags(isLocal.Val, isDefinition.Val,
56005600
isOptimized.Val, virtuality.Val);
56015601
if ((SPFlags & DISubprogram::SPFlagDefinition) && !IsDistinct)
5602-
return Lex.Error(
5602+
return error(
56035603
Loc,
56045604
"missing 'distinct', required for !DISubprogram that is a Definition");
56055605
Result = GET_OR_DISTINCT(
@@ -7952,10 +7952,10 @@ bool LLParser::parseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
79527952
// array constant.
79537953
if (CT == LandingPadInst::Catch) {
79547954
if (isa<ArrayType>(V->getType()))
7955-
error(VLoc, "'catch' clause has an invalid type");
7955+
return error(VLoc, "'catch' clause has an invalid type");
79567956
} else {
79577957
if (!isa<ArrayType>(V->getType()))
7958-
error(VLoc, "'filter' clause has an invalid type");
7958+
return error(VLoc, "'filter' clause has an invalid type");
79597959
}
79607960

79617961
Constant *CV = dyn_cast<Constant>(V);
@@ -8639,7 +8639,7 @@ bool LLParser::parseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) {
86398639
if (parseToken(lltok::lbrace, "expected '{' here"))
86408640
return true;
86418641
if (Lex.getKind() == lltok::rbrace)
8642-
return Lex.Error("expected non-empty list of uselistorder indexes");
8642+
return tokError("expected non-empty list of uselistorder indexes");
86438643

86448644
// Use Offset, Max, and IsOrdered to check consistency of indexes. The
86458645
// indexes should be distinct numbers in the range [0, size-1], and should
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
1+
; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s
22

3+
; CHECK: [[FILE]]:[[@LINE+1]]:30: error: expected allockind value
34
declare void @f0() allockind()
4-
; CHECK: :[[#@LINE-1]]:30: error: expected allockind value
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: not llvm-as < %s 2>&1 | FileCheck %s
1+
; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s
22

33
; i8388609 is the smallest integer type that can't be represented in LLVM IR
4+
; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range!
45
@i2 = common global i8388609 0, align 4
5-
; CHECK: expected type
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
; RUN: not llvm-as < %s 2>&1 | FileCheck %s
1+
; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s
22

3-
; CHECK: clause argument must be a constant
43

54
define void @test(i32 %in) personality ptr null {
5+
; CHECK: [[FILE]]:[[@LINE+1]]:24: error: 'filter' clause has an invalid type
66
landingpad {} filter i32 %in
77
}

llvm/test/Assembler/invalid-name.ll

65 Bytes
Binary file not shown.

llvm/test/Assembler/invalid-name2.ll

65 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)