Skip to content

Commit 95291a0

Browse files
committed
Reland "[ms] [llvm-ml] Improve data support, adding names and complex initializers."
This reverts commit 9fe769a, and re-lands commit c2e272f. Summary: Add support for ?, DUP, and string initializers, as well as MASM syntax for named data locations. This version avoids the use of a C++17-only feature, if-statements with initializer. Reviewers: rnk, thakis Reviewed By: thakis Tags: #llvm Differential Revision: https://reviews.llvm.org/D73226
1 parent 6f87b16 commit 95291a0

File tree

2 files changed

+225
-26
lines changed

2 files changed

+225
-26
lines changed

llvm/lib/MC/MCParser/MasmParser.cpp

Lines changed: 187 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -482,12 +482,25 @@ class MasmParser : public MCAsmParser {
482482
/// Codeview def_range types parsed by this class.
483483
StringMap<CVDefRangeType> CVDefRangeTypeMap;
484484

485+
bool parseInitValue(unsigned Size);
486+
485487
// ".ascii", ".asciz", ".string"
486488
bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
487-
bool parseDirectiveValue(StringRef IDVal,
488-
unsigned Size); // "byte", "word", ...
489-
bool parseDirectiveRealValue(StringRef IDVal,
490-
const fltSemantics &); // "real4", ...
489+
490+
// "byte", "word", ...
491+
bool parseScalarInstList(unsigned Size,
492+
SmallVectorImpl<const MCExpr *> &Values);
493+
bool parseDirectiveValue(StringRef IDVal, unsigned Size);
494+
bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name,
495+
SMLoc NameLoc);
496+
497+
// "real4", "real8"
498+
bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics);
499+
bool parseRealInstList(const fltSemantics &Semantics,
500+
SmallVectorImpl<APInt> &Values);
501+
bool parseDirectiveNamedRealValue(StringRef IDVal,
502+
const fltSemantics &Semantics,
503+
StringRef Name, SMLoc NameLoc);
491504

492505
// "=", "equ", "textequ"
493506
bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
@@ -1903,6 +1916,33 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
19031916
case DK_TEXTEQU:
19041917
Lex();
19051918
return parseDirectiveEquate(nextVal, IDVal, DirKind);
1919+
case DK_BYTE:
1920+
case DK_DB:
1921+
Lex();
1922+
return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
1923+
case DK_WORD:
1924+
case DK_DW:
1925+
Lex();
1926+
return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
1927+
case DK_DWORD:
1928+
case DK_DD:
1929+
Lex();
1930+
return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
1931+
case DK_FWORD:
1932+
Lex();
1933+
return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
1934+
case DK_QWORD:
1935+
case DK_DQ:
1936+
Lex();
1937+
return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
1938+
case DK_REAL4:
1939+
Lex();
1940+
return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal,
1941+
IDLoc);
1942+
case DK_REAL8:
1943+
Lex();
1944+
return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal,
1945+
IDLoc);
19061946
}
19071947

19081948
// __asm _emit or __asm __emit
@@ -2739,31 +2779,101 @@ bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
27392779
return false;
27402780
}
27412781

2782+
bool MasmParser::parseScalarInstList(unsigned Size,
2783+
SmallVectorImpl<const MCExpr *> &Values) {
2784+
do {
2785+
if (getTok().is(AsmToken::String)) {
2786+
StringRef Value = getTok().getStringContents();
2787+
if (Size == 1) {
2788+
// Treat each character as an initializer.
2789+
for (const char CharVal : Value)
2790+
Values.push_back(MCConstantExpr::create(CharVal, getContext()));
2791+
} else {
2792+
// Treat the string as an initial value in big-endian representation.
2793+
if (Value.size() > Size)
2794+
return Error(getTok().getLoc(), "out of range literal value");
2795+
2796+
uint64_t IntValue = 0;
2797+
for (const unsigned char CharVal : Value.bytes())
2798+
IntValue = (IntValue << 8) | CharVal;
2799+
Values.push_back(MCConstantExpr::create(IntValue, getContext()));
2800+
}
2801+
Lex();
2802+
} else {
2803+
const MCExpr *Value;
2804+
if (checkForValidSection() || parseExpression(Value))
2805+
return true;
2806+
if (getTok().is(AsmToken::Identifier) &&
2807+
getTok().getString().equals_lower("dup")) {
2808+
Lex(); // eat 'dup'
2809+
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
2810+
if (!MCE)
2811+
return Error(Value->getLoc(),
2812+
"cannot repeat value a non-constant number of times");
2813+
const int64_t Repetitions = MCE->getValue();
2814+
if (Repetitions < 0)
2815+
return Error(Value->getLoc(),
2816+
"cannot repeat value a negative number of times");
2817+
2818+
SmallVector<const MCExpr *, 1> DuplicatedValues;
2819+
if (parseToken(AsmToken::LParen,
2820+
"parentheses required for 'dup' contents") ||
2821+
parseScalarInstList(Size, DuplicatedValues) ||
2822+
parseToken(AsmToken::RParen, "unmatched parentheses"))
2823+
return true;
2824+
2825+
for (int i = 0; i < Repetitions; ++i)
2826+
Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
2827+
} else {
2828+
Values.push_back(Value);
2829+
}
2830+
}
2831+
2832+
// Continue if we see a comma. (Also, allow line continuation.)
2833+
} while (parseOptionalToken(AsmToken::Comma) &&
2834+
(getTok().isNot(AsmToken::EndOfStatement) ||
2835+
!parseToken(AsmToken::EndOfStatement)));
2836+
2837+
return false;
2838+
}
2839+
27422840
/// parseDirectiveValue
27432841
/// ::= (byte | word | ... ) [ expression (, expression)* ]
27442842
bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
2745-
auto parseOp = [&]() -> bool {
2746-
const MCExpr *Value;
2747-
SMLoc ExprLoc = getLexer().getLoc();
2748-
if (checkForValidSection() || parseExpression(Value))
2749-
return true;
2843+
SmallVector<const MCExpr *, 1> Values;
2844+
if (parseScalarInstList(Size, Values))
2845+
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2846+
2847+
for (const MCExpr *Value : Values) {
27502848
// Special case constant expressions to match code generator.
27512849
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
27522850
assert(Size <= 8 && "Invalid size");
27532851
int64_t IntValue = MCE->getValue();
27542852
if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
2755-
return Error(ExprLoc, "out of range literal value");
2853+
return Error(MCE->getLoc(), "out of range literal value");
27562854
getStreamer().emitIntValue(IntValue, Size);
2757-
} else
2758-
getStreamer().emitValue(Value, Size, ExprLoc);
2759-
return false;
2760-
};
2761-
2762-
if (parseMany(parseOp))
2763-
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2855+
} else {
2856+
const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
2857+
if (MSE && MSE->getSymbol().getName() == "?") {
2858+
// ? initializer; treat as 0.
2859+
getStreamer().emitIntValue(0, Size);
2860+
} else {
2861+
getStreamer().emitValue(Value, Size, Value->getLoc());
2862+
}
2863+
}
2864+
}
27642865
return false;
27652866
}
27662867

2868+
/// parseDirectiveNamedValue
2869+
/// ::= name (byte | word | ... ) [ expression (, expression)* ]
2870+
bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size,
2871+
StringRef Name, SMLoc NameLoc) {
2872+
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
2873+
getStreamer().emitLabel(Sym);
2874+
return parseDirectiveValue(IDVal, Size);
2875+
}
2876+
27672877
static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
27682878
if (Asm.getTok().isNot(AsmToken::Integer) &&
27692879
Asm.getTok().isNot(AsmToken::BigNum))
@@ -2824,24 +2934,75 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
28242934
return false;
28252935
}
28262936

2937+
bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
2938+
SmallVectorImpl<APInt> &ValuesAsInt) {
2939+
do {
2940+
const AsmToken NextTok = Lexer.peekTok();
2941+
if (NextTok.is(AsmToken::Identifier) &&
2942+
NextTok.getString().equals_lower("dup")) {
2943+
const MCExpr *Value;
2944+
if (parseExpression(Value) || parseToken(AsmToken::Identifier))
2945+
return true;
2946+
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
2947+
if (!MCE)
2948+
return Error(Value->getLoc(),
2949+
"cannot repeat value a non-constant number of times");
2950+
const int64_t Repetitions = MCE->getValue();
2951+
if (Repetitions < 0)
2952+
return Error(Value->getLoc(),
2953+
"cannot repeat value a negative number of times");
2954+
2955+
SmallVector<APInt, 1> DuplicatedValues;
2956+
if (parseToken(AsmToken::LParen,
2957+
"parentheses required for 'dup' contents") ||
2958+
parseRealInstList(Semantics, DuplicatedValues) ||
2959+
parseToken(AsmToken::RParen, "unmatched parentheses"))
2960+
return true;
2961+
2962+
for (int i = 0; i < Repetitions; ++i)
2963+
ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
2964+
} else {
2965+
APInt AsInt;
2966+
if (parseRealValue(Semantics, AsInt))
2967+
return true;
2968+
ValuesAsInt.push_back(AsInt);
2969+
}
2970+
// Continue if we see a comma. (Also, allow line continuation.)
2971+
} while (parseOptionalToken(AsmToken::Comma) &&
2972+
(getTok().isNot(AsmToken::EndOfStatement) ||
2973+
!parseToken(AsmToken::EndOfStatement)));
2974+
2975+
return false;
2976+
}
2977+
28272978
/// parseDirectiveRealValue
28282979
/// ::= (real4 | real8) [ expression (, expression)* ]
28292980
bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
28302981
const fltSemantics &Semantics) {
2831-
auto parseOp = [&]() -> bool {
2832-
APInt AsInt;
2833-
if (checkForValidSection() || parseRealValue(Semantics, AsInt))
2834-
return true;
2835-
getStreamer().emitIntValue(AsInt.getLimitedValue(),
2836-
AsInt.getBitWidth() / 8);
2837-
return false;
2838-
};
2982+
if (checkForValidSection())
2983+
return true;
28392984

2840-
if (parseMany(parseOp))
2985+
SmallVector<APInt, 1> ValuesAsInt;
2986+
if (parseRealInstList(Semantics, ValuesAsInt))
28412987
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2988+
2989+
for (const APInt &AsInt : ValuesAsInt) {
2990+
getStreamer().emitIntValue(AsInt.getLimitedValue(),
2991+
AsInt.getBitWidth() / 8);
2992+
}
28422993
return false;
28432994
}
28442995

2996+
/// parseDirectiveNamedRealValue
2997+
/// ::= name (real4 | real8) [ expression (, expression)* ]
2998+
bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal,
2999+
const fltSemantics &Semantics,
3000+
StringRef Name, SMLoc NameLoc) {
3001+
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3002+
getStreamer().emitLabel(Sym);
3003+
return parseDirectiveRealValue(IDVal, Semantics);
3004+
}
3005+
28453006
/// parseDirectiveOrg
28463007
/// ::= .org expression [ , expression ]
28473008
bool MasmParser::parseDirectiveOrg() {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# RUN: llvm-ml -filetype=asm %s | FileCheck %s
2+
3+
.data
4+
BYTE 2, 4, 6, 8
5+
; CHECK: .data
6+
; CHECK-NEXT: .byte 2
7+
; CHECK-NEXT: .byte 4
8+
; CHECK-NEXT: .byte 6
9+
; CHECK-NEXT: .byte 8
10+
11+
BYTE 2 dup (1, 2 dup (2)),
12+
3
13+
; CHECK: .byte 1
14+
; CHECK-NEXT: .byte 2
15+
; CHECK-NEXT: .byte 2
16+
; CHECK-NEXT: .byte 1
17+
; CHECK-NEXT: .byte 2
18+
; CHECK-NEXT: .byte 2
19+
; CHECK-NEXT: .byte 3
20+
21+
REAL4 1, 0
22+
; CHECK: .long 1065353216
23+
; CHECK-NEXT: .long 0
24+
25+
REAL4 2 DUP (2.5, 2 dup (0)),
26+
4
27+
; CHECK: .long 1075838976
28+
; CHECK-NEXT: .long 0
29+
; CHECK-NEXT: .long 0
30+
; CHECK-NEXT: .long 1075838976
31+
; CHECK-NEXT: .long 0
32+
; CHECK-NEXT: .long 0
33+
; CHECK-NEXT: .long 1082130432
34+
35+
.code
36+
BYTE 5
37+
; CHECK: .text
38+
; CHECK-NEXT: .byte 5

0 commit comments

Comments
 (0)