Skip to content

Commit 5dd1b6d

Browse files
committed
[ms] [llvm-ml] Add support for .radix directive, and accept all radix specifiers
Add support for .radix directive, and radix specifiers [yY] (binary), [oOqQ] (octal), and [tT] (decimal). Also, when lexing MASM integers, require radix specifier; MASM requires that all literals without a radix specifier be treated as in the default radix. (e.g., 0100 = 100) Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D87400
1 parent b393118 commit 5dd1b6d

File tree

7 files changed

+288
-22
lines changed

7 files changed

+288
-22
lines changed

llvm/include/llvm/MC/MCParser/MCAsmLexer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class MCAsmLexer {
5050
bool AllowAtInIdentifier;
5151
bool IsAtStartOfStatement = true;
5252
bool LexMasmIntegers = false;
53+
unsigned DefaultRadix = 10;
5354
AsmCommentConsumer *CommentConsumer = nullptr;
5455

5556
MCAsmLexer();
@@ -143,6 +144,9 @@ class MCAsmLexer {
143144
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
144145
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
145146

147+
unsigned getDefaultRadix() const { return DefaultRadix; }
148+
void setDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
149+
146150
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
147151
this->CommentConsumer = CommentConsumer;
148152
}

llvm/lib/MC/MCParser/AsmLexer.cpp

Lines changed: 101 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/MC/MCParser/AsmLexer.h"
14+
#include "third_party/llvm/llvm-project/llvm/include/llvm/Support/Compiler.h"
1415
#include "llvm/ADT/APInt.h"
1516
#include "llvm/ADT/ArrayRef.h"
1617
#include "llvm/ADT/StringExtras.h"
@@ -271,13 +272,35 @@ static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
271272
return DefaultRadix;
272273
}
273274

275+
static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
276+
while (hexDigitValue(*CurPtr) < DefaultRadix) {
277+
++CurPtr;
278+
}
279+
return CurPtr;
280+
}
281+
274282
static AsmToken intToken(StringRef Ref, APInt &Value)
275283
{
276284
if (Value.isIntN(64))
277285
return AsmToken(AsmToken::Integer, Ref, Value);
278286
return AsmToken(AsmToken::BigNum, Ref, Value);
279287
}
280288

289+
static std::string radixName(unsigned Radix) {
290+
switch (Radix) {
291+
case 2:
292+
return "binary";
293+
case 8:
294+
return "octal";
295+
case 10:
296+
return "decimal";
297+
case 16:
298+
return "hexadecimal";
299+
default:
300+
return "base-" + std::to_string(Radix);
301+
}
302+
}
303+
281304
/// LexDigit: First character is [0-9].
282305
/// Local Label: [0-9][:]
283306
/// Forward/Backward Label: [0-9][fb]
@@ -286,45 +309,108 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
286309
/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
287310
/// Decimal integer: [1-9][0-9]*
288311
AsmToken AsmLexer::LexDigit() {
289-
// MASM-flavor binary integer: [01]+[bB]
312+
// MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
313+
// MASM-flavor octal integer: [0-7]+[oOqQ]
314+
// MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
290315
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
291316
if (LexMasmIntegers && isdigit(CurPtr[-1])) {
292-
const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
293-
CurPtr - 1 : nullptr;
317+
const char *FirstNonBinary =
318+
(CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
319+
const char *FirstNonDecimal =
320+
(CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
294321
const char *OldCurPtr = CurPtr;
295322
while (isHexDigit(*CurPtr)) {
296-
if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
297-
FirstNonBinary = CurPtr;
323+
switch (*CurPtr) {
324+
default:
325+
if (!FirstNonDecimal) {
326+
FirstNonDecimal = CurPtr;
327+
}
328+
LLVM_FALLTHROUGH;
329+
case '9':
330+
case '8':
331+
case '7':
332+
case '6':
333+
case '5':
334+
case '4':
335+
case '3':
336+
case '2':
337+
if (!FirstNonBinary) {
338+
FirstNonBinary = CurPtr;
339+
}
340+
break;
341+
case '1':
342+
case '0':
343+
break;
344+
}
345+
++CurPtr;
346+
}
347+
if (*CurPtr == '.') {
348+
// MASM float literals (other than hex floats) always contain a ".", and
349+
// are always written in decimal.
298350
++CurPtr;
351+
return LexFloatLiteral();
299352
}
300353

301354
unsigned Radix = 0;
302355
if (*CurPtr == 'h' || *CurPtr == 'H') {
303356
// hexadecimal number
304357
++CurPtr;
305358
Radix = 16;
359+
} else if (*CurPtr == 't' || *CurPtr == 'T') {
360+
// decimal number
361+
++CurPtr;
362+
Radix = 10;
363+
} else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
364+
*CurPtr == 'Q') {
365+
// octal number
366+
++CurPtr;
367+
Radix = 8;
368+
} else if (*CurPtr == 'y' || *CurPtr == 'Y') {
369+
// binary number
370+
++CurPtr;
371+
Radix = 2;
372+
} else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
373+
DefaultRadix < 14 &&
374+
(*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
375+
Radix = 10;
306376
} else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
307-
(*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
377+
DefaultRadix < 12 &&
378+
(*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
308379
Radix = 2;
380+
}
309381

310-
if (Radix == 2 || Radix == 16) {
382+
if (Radix) {
311383
StringRef Result(TokStart, CurPtr - TokStart);
312384
APInt Value(128, 0, true);
313385

314386
if (Result.drop_back().getAsInteger(Radix, Value))
315-
return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
316-
"invalid hexdecimal number");
387+
return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
317388

318389
// MSVC accepts and ignores type suffices on integer literals.
319390
SkipIgnoredIntegerSuffix(CurPtr);
320391

321392
return intToken(Result, Value);
322-
}
393+
}
323394

324-
// octal/decimal integers, or floating point numbers, fall through
395+
// default-radix integers, or floating point numbers, fall through
325396
CurPtr = OldCurPtr;
326397
}
327398

399+
// MASM default-radix integers: [0-9a-fA-F]+
400+
// (All other integer literals have a radix specifier.)
401+
if (LexMasmIntegers) {
402+
CurPtr = findLastDigit(CurPtr, 16);
403+
StringRef Result(TokStart, CurPtr - TokStart);
404+
405+
APInt Value(128, 0, true);
406+
if (Result.getAsInteger(DefaultRadix, Value)) {
407+
return ReturnError(TokStart,
408+
"invalid " + radixName(DefaultRadix) + " number");
409+
}
410+
411+
return intToken(Result, Value);
412+
}
413+
328414
// Decimal integer: [1-9][0-9]*
329415
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
330416
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
@@ -339,13 +425,9 @@ AsmToken AsmLexer::LexDigit() {
339425
StringRef Result(TokStart, CurPtr - TokStart);
340426

341427
APInt Value(128, 0, true);
342-
if (Result.getAsInteger(Radix, Value))
343-
return ReturnError(TokStart, !isHex ? "invalid decimal number" :
344-
"invalid hexdecimal number");
345-
346-
// Consume the [hH].
347-
if (LexMasmIntegers && Radix == 16)
348-
++CurPtr;
428+
if (Result.getAsInteger(Radix, Value)) {
429+
return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
430+
}
349431

350432
// The darwin/x86 (and x86-64) assembler accepts and ignores type
351433
// suffices on integer literals.
@@ -416,11 +498,9 @@ AsmToken AsmLexer::LexDigit() {
416498
// Either octal or hexadecimal.
417499
APInt Value(128, 0, true);
418500
unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
419-
bool isHex = Radix == 16;
420501
StringRef Result(TokStart, CurPtr - TokStart);
421502
if (Result.getAsInteger(Radix, Value))
422-
return ReturnError(TokStart, !isHex ? "invalid octal number" :
423-
"invalid hexdecimal number");
503+
return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
424504

425505
// Consume the [hH].
426506
if (Radix == 16)

llvm/lib/MC/MCParser/COFFMasmParser.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ class COFFMasmParser : public MCAsmParserExtension {
132132
// option
133133
// popcontext
134134
// pushcontext
135-
// .radix
136135
// .safeseh
137136

138137
// Procedure directives

llvm/lib/MC/MCParser/MasmParser.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,7 @@ class MasmParser : public MCAsmParser {
732732
DK_SAVEREG,
733733
DK_SAVEXMM128,
734734
DK_SETFRAME,
735+
DK_RADIX,
735736
};
736737

737738
/// Maps directive name --> DirectiveKind enum, for directives parsed by this
@@ -964,6 +965,9 @@ class MasmParser : public MCAsmParser {
964965
// ".erre" or ".errnz", depending on ExpectZero.
965966
bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
966967

968+
// ".radix"
969+
bool parseDirectiveRadix(SMLoc DirectiveLoc);
970+
967971
// "echo"
968972
bool parseDirectiveEcho();
969973

@@ -2284,6 +2288,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
22842288
return parseDirectiveErrorIfe(IDLoc, true);
22852289
case DK_ERRNZ:
22862290
return parseDirectiveErrorIfe(IDLoc, false);
2291+
case DK_RADIX:
2292+
return parseDirectiveRadix(IDLoc);
22872293
case DK_ECHO:
22882294
return parseDirectiveEcho();
22892295
}
@@ -6343,6 +6349,7 @@ void MasmParser::initializeDirectiveKindMap() {
63436349
DirectiveKindMap[".savereg"] = DK_SAVEREG;
63446350
DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
63456351
DirectiveKindMap[".setframe"] = DK_SETFRAME;
6352+
DirectiveKindMap[".radix"] = DK_RADIX;
63466353
// DirectiveKindMap[".altmacro"] = DK_ALTMACRO;
63476354
// DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO;
63486355
DirectiveKindMap["db"] = DK_DB;
@@ -6584,6 +6591,22 @@ bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
65846591
return false;
65856592
}
65866593

6594+
bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
6595+
const SMLoc Loc = getLexer().getLoc();
6596+
StringRef RadixString = parseStringToEndOfStatement().trim();
6597+
unsigned Radix;
6598+
if (RadixString.getAsInteger(10, Radix)) {
6599+
return Error(Loc,
6600+
"radix must be a decimal number in the range 2 to 16; was " +
6601+
RadixString);
6602+
}
6603+
if (Radix < 2 || Radix > 16)
6604+
return Error(Loc, "radix must be in the range 2 to 16; was " +
6605+
std::to_string(Radix));
6606+
getLexer().setDefaultRadix(Radix);
6607+
return false;
6608+
}
6609+
65876610
bool MasmParser::parseDirectiveEcho() {
65886611
StringRef Message = parseStringToEndOfStatement();
65896612
Lex(); // eat end of statement

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
16621662
if ((Done = SM.isValidEndState()))
16631663
break;
16641664
return Error(Tok.getLoc(), "unknown token in expression");
1665+
case AsmToken::Error:
1666+
return Error(getLexer().getErrLoc(), getLexer().getErr());
1667+
break;
16651668
case AsmToken::EndOfStatement:
16661669
Done = true;
16671670
break;

llvm/test/tools/llvm-ml/radix.test

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# RUN: llvm-ml -filetype=asm %s | FileCheck %s
2+
3+
.code
4+
5+
t1:
6+
mov eax, 100b
7+
mov eax, 100y
8+
9+
; CHECK-LABEL: t1:
10+
; CHECK-NEXT: mov eax, 4
11+
; CHECK-NEXT: mov eax, 4
12+
13+
t2:
14+
mov eax, 100o
15+
mov eax, 100q
16+
17+
; CHECK-LABEL: t2:
18+
; CHECK-NEXT: mov eax, 64
19+
; CHECK-NEXT: mov eax, 64
20+
21+
t3:
22+
mov eax, 100d
23+
mov eax, 100t
24+
25+
; CHECK-LABEL: t3:
26+
; CHECK-NEXT: mov eax, 100
27+
; CHECK-NEXT: mov eax, 100
28+
29+
t4:
30+
mov eax, 100h
31+
32+
; CHECK-LABEL: t4:
33+
; CHECK-NEXT: mov eax, 256
34+
35+
t5:
36+
mov eax, 100
37+
.radix 2
38+
mov eax, 100
39+
.radix 16
40+
mov eax, 100
41+
.radix 10
42+
mov eax, 100
43+
44+
; CHECK-LABEL: t5:
45+
; CHECK: mov eax, 100
46+
; CHECK: mov eax, 4
47+
; CHECK: mov eax, 256
48+
; CHECK: mov eax, 100
49+
50+
t6:
51+
.radix 9
52+
mov eax, 100
53+
.radix 10
54+
55+
; CHECK-LABEL: t6:
56+
; CHECK: mov eax, 81
57+
58+
t7:
59+
.radix 12
60+
mov eax, 100b
61+
mov eax, 100y
62+
.radix 10
63+
64+
; CHECK-LABEL: t7:
65+
; CHECK: mov eax, 1739
66+
; CHECK: mov eax, 4
67+
68+
t8:
69+
.radix 16
70+
mov eax, 100d
71+
mov eax, 100t
72+
.radix 10
73+
74+
; CHECK-LABEL: t8:
75+
; CHECK: mov eax, 4109
76+
; CHECK: mov eax, 100
77+
78+
t9:
79+
.radix 12
80+
mov eax, 102b
81+
.radix 16
82+
mov eax, 10fd
83+
.radix 10
84+
85+
; CHECK-LABEL: t9:
86+
; CHECK: mov eax, 1763
87+
; CHECK: mov eax, 4349
88+
89+
t10:
90+
.radix 16
91+
mov eax, 1e1
92+
.radix 10
93+
94+
; CHECK-LABEL: t10:
95+
; CHECK: mov eax, 481
96+
97+
END

0 commit comments

Comments
 (0)