Skip to content

Commit c2e272f

Browse files
committed
[ms] [llvm-ml] Improve data support, adding names and complex initializers.
Summary: Add support for ?, DUP, and string initializers, as well as MASM syntax for named data locations. Reviewers: rnk, thakis Reviewed By: thakis Subscribers: merge_guards_bot, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73226
1 parent 28e8695 commit c2e272f

File tree

2 files changed

+223
-26
lines changed

2 files changed

+223
-26
lines changed

llvm/lib/MC/MCParser/MasmParser.cpp

Lines changed: 185 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -482,12 +482,25 @@ class MasmParser : public MCAsmParser {
482482
/// Codeview def_range types parsed by this class.
483483
StringMap<CVDefRangeType> CVDefRangeTypeMap;
484484

485+
bool parseInitValue(unsigned Size);
486+
485487
// ".ascii", ".asciz", ".string"
486488
bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
487-
bool parseDirectiveValue(StringRef IDVal,
488-
unsigned Size); // "byte", "word", ...
489-
bool parseDirectiveRealValue(StringRef IDVal,
490-
const fltSemantics &); // "real4", ...
489+
490+
// "byte", "word", ...
491+
bool parseScalarInstList(unsigned Size,
492+
SmallVectorImpl<const MCExpr *> &Values);
493+
bool parseDirectiveValue(StringRef IDVal, unsigned Size);
494+
bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name,
495+
SMLoc NameLoc);
496+
497+
// "real4", "real8"
498+
bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics);
499+
bool parseRealInstList(const fltSemantics &Semantics,
500+
SmallVectorImpl<APInt> &Values);
501+
bool parseDirectiveNamedRealValue(StringRef IDVal,
502+
const fltSemantics &Semantics,
503+
StringRef Name, SMLoc NameLoc);
491504

492505
// "=", "equ", "textequ"
493506
bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
@@ -1903,6 +1916,33 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
19031916
case DK_TEXTEQU:
19041917
Lex();
19051918
return parseDirectiveEquate(nextVal, IDVal, DirKind);
1919+
case DK_BYTE:
1920+
case DK_DB:
1921+
Lex();
1922+
return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
1923+
case DK_WORD:
1924+
case DK_DW:
1925+
Lex();
1926+
return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
1927+
case DK_DWORD:
1928+
case DK_DD:
1929+
Lex();
1930+
return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
1931+
case DK_FWORD:
1932+
Lex();
1933+
return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
1934+
case DK_QWORD:
1935+
case DK_DQ:
1936+
Lex();
1937+
return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
1938+
case DK_REAL4:
1939+
Lex();
1940+
return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal,
1941+
IDLoc);
1942+
case DK_REAL8:
1943+
Lex();
1944+
return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal,
1945+
IDLoc);
19061946
}
19071947

19081948
// __asm _emit or __asm __emit
@@ -2739,31 +2779,99 @@ bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
27392779
return false;
27402780
}
27412781

2782+
bool MasmParser::parseScalarInstList(unsigned Size,
2783+
SmallVectorImpl<const MCExpr *> &Values) {
2784+
do {
2785+
if (getTok().is(AsmToken::String)) {
2786+
StringRef Value = getTok().getStringContents();
2787+
if (Size == 1) {
2788+
// Treat each character as an initializer.
2789+
for (const char CharVal : Value)
2790+
Values.push_back(MCConstantExpr::create(CharVal, getContext()));
2791+
} else {
2792+
// Treat the string as an initial value in big-endian representation.
2793+
if (Value.size() > Size)
2794+
return Error(getTok().getLoc(), "out of range literal value");
2795+
2796+
uint64_t IntValue = 0;
2797+
for (const unsigned char CharVal : Value.bytes())
2798+
IntValue = (IntValue << 8) | CharVal;
2799+
Values.push_back(MCConstantExpr::create(IntValue, getContext()));
2800+
}
2801+
Lex();
2802+
} else {
2803+
const MCExpr *Value;
2804+
if (checkForValidSection() || parseExpression(Value))
2805+
return true;
2806+
if (getTok().is(AsmToken::Identifier) &&
2807+
getTok().getString().equals_lower("dup")) {
2808+
Lex(); // eat 'dup'
2809+
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
2810+
if (!MCE)
2811+
return Error(Value->getLoc(),
2812+
"cannot repeat value a non-constant number of times");
2813+
const int64_t Repetitions = MCE->getValue();
2814+
if (Repetitions < 0)
2815+
return Error(Value->getLoc(),
2816+
"cannot repeat value a negative number of times");
2817+
2818+
SmallVector<const MCExpr *, 1> DuplicatedValues;
2819+
if (parseToken(AsmToken::LParen,
2820+
"parentheses required for 'dup' contents") ||
2821+
parseScalarInstList(Size, DuplicatedValues) ||
2822+
parseToken(AsmToken::RParen, "unmatched parentheses"))
2823+
return true;
2824+
2825+
for (int i = 0; i < Repetitions; ++i)
2826+
Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
2827+
} else {
2828+
Values.push_back(Value);
2829+
}
2830+
}
2831+
2832+
// Continue if we see a comma. (Also, allow line continuation.)
2833+
} while (parseOptionalToken(AsmToken::Comma) &&
2834+
(getTok().isNot(AsmToken::EndOfStatement) ||
2835+
!parseToken(AsmToken::EndOfStatement)));
2836+
2837+
return false;
2838+
}
2839+
27422840
/// parseDirectiveValue
27432841
/// ::= (byte | word | ... ) [ expression (, expression)* ]
27442842
bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
2745-
auto parseOp = [&]() -> bool {
2746-
const MCExpr *Value;
2747-
SMLoc ExprLoc = getLexer().getLoc();
2748-
if (checkForValidSection() || parseExpression(Value))
2749-
return true;
2843+
SmallVector<const MCExpr *, 1> Values;
2844+
if (parseScalarInstList(Size, Values))
2845+
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2846+
2847+
for (const MCExpr *Value : Values) {
27502848
// Special case constant expressions to match code generator.
27512849
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
27522850
assert(Size <= 8 && "Invalid size");
27532851
int64_t IntValue = MCE->getValue();
27542852
if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
2755-
return Error(ExprLoc, "out of range literal value");
2853+
return Error(MCE->getLoc(), "out of range literal value");
27562854
getStreamer().emitIntValue(IntValue, Size);
2757-
} else
2758-
getStreamer().emitValue(Value, Size, ExprLoc);
2759-
return false;
2760-
};
2761-
2762-
if (parseMany(parseOp))
2763-
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2855+
} else if (const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
2856+
MSE && MSE->getSymbol().getName() == "?") {
2857+
// ? initializer; treat as 0.
2858+
getStreamer().emitIntValue(0, Size);
2859+
} else {
2860+
getStreamer().emitValue(Value, Size, Value->getLoc());
2861+
}
2862+
}
27642863
return false;
27652864
}
27662865

2866+
/// parseDirectiveNamedValue
2867+
/// ::= name (byte | word | ... ) [ expression (, expression)* ]
2868+
bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size,
2869+
StringRef Name, SMLoc NameLoc) {
2870+
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
2871+
getStreamer().emitLabel(Sym);
2872+
return parseDirectiveValue(IDVal, Size);
2873+
}
2874+
27672875
static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
27682876
if (Asm.getTok().isNot(AsmToken::Integer) &&
27692877
Asm.getTok().isNot(AsmToken::BigNum))
@@ -2824,24 +2932,75 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
28242932
return false;
28252933
}
28262934

2935+
bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
2936+
SmallVectorImpl<APInt> &ValuesAsInt) {
2937+
do {
2938+
const AsmToken NextTok = Lexer.peekTok();
2939+
if (NextTok.is(AsmToken::Identifier) &&
2940+
NextTok.getString().equals_lower("dup")) {
2941+
const MCExpr *Value;
2942+
if (parseExpression(Value) || parseToken(AsmToken::Identifier))
2943+
return true;
2944+
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
2945+
if (!MCE)
2946+
return Error(Value->getLoc(),
2947+
"cannot repeat value a non-constant number of times");
2948+
const int64_t Repetitions = MCE->getValue();
2949+
if (Repetitions < 0)
2950+
return Error(Value->getLoc(),
2951+
"cannot repeat value a negative number of times");
2952+
2953+
SmallVector<APInt, 1> DuplicatedValues;
2954+
if (parseToken(AsmToken::LParen,
2955+
"parentheses required for 'dup' contents") ||
2956+
parseRealInstList(Semantics, DuplicatedValues) ||
2957+
parseToken(AsmToken::RParen, "unmatched parentheses"))
2958+
return true;
2959+
2960+
for (int i = 0; i < Repetitions; ++i)
2961+
ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
2962+
} else {
2963+
APInt AsInt;
2964+
if (parseRealValue(Semantics, AsInt))
2965+
return true;
2966+
ValuesAsInt.push_back(AsInt);
2967+
}
2968+
// Continue if we see a comma. (Also, allow line continuation.)
2969+
} while (parseOptionalToken(AsmToken::Comma) &&
2970+
(getTok().isNot(AsmToken::EndOfStatement) ||
2971+
!parseToken(AsmToken::EndOfStatement)));
2972+
2973+
return false;
2974+
}
2975+
28272976
/// parseDirectiveRealValue
28282977
/// ::= (real4 | real8) [ expression (, expression)* ]
28292978
bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
28302979
const fltSemantics &Semantics) {
2831-
auto parseOp = [&]() -> bool {
2832-
APInt AsInt;
2833-
if (checkForValidSection() || parseRealValue(Semantics, AsInt))
2834-
return true;
2835-
getStreamer().emitIntValue(AsInt.getLimitedValue(),
2836-
AsInt.getBitWidth() / 8);
2837-
return false;
2838-
};
2980+
if (checkForValidSection())
2981+
return true;
28392982

2840-
if (parseMany(parseOp))
2983+
SmallVector<APInt, 1> ValuesAsInt;
2984+
if (parseRealInstList(Semantics, ValuesAsInt))
28412985
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2986+
2987+
for (const APInt &AsInt : ValuesAsInt) {
2988+
getStreamer().emitIntValue(AsInt.getLimitedValue(),
2989+
AsInt.getBitWidth() / 8);
2990+
}
28422991
return false;
28432992
}
28442993

2994+
/// parseDirectiveNamedRealValue
2995+
/// ::= name (real4 | real8) [ expression (, expression)* ]
2996+
bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal,
2997+
const fltSemantics &Semantics,
2998+
StringRef Name, SMLoc NameLoc) {
2999+
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3000+
getStreamer().emitLabel(Sym);
3001+
return parseDirectiveRealValue(IDVal, Semantics);
3002+
}
3003+
28453004
/// parseDirectiveOrg
28463005
/// ::= .org expression [ , expression ]
28473006
bool MasmParser::parseDirectiveOrg() {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# RUN: llvm-ml -filetype=asm %s | FileCheck %s
2+
3+
.data
4+
BYTE 2, 4, 6, 8
5+
; CHECK: .data
6+
; CHECK-NEXT: .byte 2
7+
; CHECK-NEXT: .byte 4
8+
; CHECK-NEXT: .byte 6
9+
; CHECK-NEXT: .byte 8
10+
11+
BYTE 2 dup (1, 2 dup (2)),
12+
3
13+
; CHECK: .byte 1
14+
; CHECK-NEXT: .byte 2
15+
; CHECK-NEXT: .byte 2
16+
; CHECK-NEXT: .byte 1
17+
; CHECK-NEXT: .byte 2
18+
; CHECK-NEXT: .byte 2
19+
; CHECK-NEXT: .byte 3
20+
21+
REAL4 1, 0
22+
; CHECK: .long 1065353216
23+
; CHECK-NEXT: .long 0
24+
25+
REAL4 2 DUP (2.5, 2 dup (0)),
26+
4
27+
; CHECK: .long 1075838976
28+
; CHECK-NEXT: .long 0
29+
; CHECK-NEXT: .long 0
30+
; CHECK-NEXT: .long 1075838976
31+
; CHECK-NEXT: .long 0
32+
; CHECK-NEXT: .long 0
33+
; CHECK-NEXT: .long 1082130432
34+
35+
.code
36+
BYTE 5
37+
; CHECK: .text
38+
; CHECK-NEXT: .byte 5

0 commit comments

Comments
 (0)