Skip to content

Commit 36978fa

Browse files
committed
[MC] Add UseAtForSpecifier
Some ELF targets don't use @ for relocation specifiers. We should not report `error: invalid variant` when @ is used. Attempt to make expr@specifier parsing less hacky.
1 parent 7267dbf commit 36978fa

File tree

12 files changed

+50
-25
lines changed

12 files changed

+50
-25
lines changed

llvm/include/llvm/MC/MCAsmInfo.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,12 @@ class MCAsmInfo {
377377
/// names in .cfi_* directives. Defaults to false.
378378
bool DwarfRegNumForCFI = false;
379379

380-
/// True if target uses parens to indicate the symbol variant instead of @.
381-
/// For example, foo(plt) instead of foo@plt. Defaults to false.
382-
bool UseParensForSymbolVariant = false;
380+
/// True if target uses @ (expr@specifier) for relocation specifiers.
381+
bool UseAtForSpecifier = true;
382+
383+
/// (ARM-specific) Uses parens for relocation specifier in data
384+
/// directives, e.g. .word foo(got).
385+
bool UseParensForSpecifier = false;
383386

384387
/// True if the target uses parens for symbol names starting with
385388
/// '$' character to distinguish them from absolute names.
@@ -649,7 +652,8 @@ class MCAsmInfo {
649652

650653
bool doDwarfFDESymbolsUseAbsDiff() const { return DwarfFDESymbolsUseAbsDiff; }
651654
bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; }
652-
bool useParensForSymbolVariant() const { return UseParensForSymbolVariant; }
655+
bool useAtForSpecifier() const { return UseAtForSpecifier; }
656+
bool useParensForSpecifier() const { return UseParensForSpecifier; }
653657
bool supportsExtendedDwarfLocDirective() const {
654658
return SupportsExtendedDwarfLocDirective;
655659
}

llvm/include/llvm/MC/MCParser/MCAsmLexer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <cassert>
1616
#include <cstddef>
1717
#include <string>
18+
#include <utility>
1819

1920
namespace llvm {
2021

llvm/lib/MC/MCExpr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI,
9393
if (Kind != MCSymbolRefExpr::VK_None) {
9494
if (!MAI) // should only be used by dump()
9595
OS << "@<variant " << Kind << '>';
96-
else if (MAI->useParensForSymbolVariant()) // ARM
96+
else if (MAI->useParensForSpecifier()) // ARM
9797
OS << '(' << MAI->getSpecifierName(Kind) << ')';
9898
else
9999
OS << '@' << MAI->getSpecifierName(Kind);

llvm/lib/MC/MCParser/AsmLexer.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
using namespace llvm;
3333

3434
AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
35-
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with("@");
35+
// For COFF targets, this is true, while for ELF targets, it should be false.
36+
// Currently, @specifier parsing depends on '@' being included in the token.
37+
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with("@") &&
38+
MAI.useAtForSpecifier();
3639
LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
3740
}
3841

llvm/lib/MC/MCParser/AsmParser.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,9 +1191,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
11911191
return false;
11921192
}
11931193
}
1194-
// Parse symbol variant
1194+
// Parse an optional relocation specifier.
11951195
std::pair<StringRef, StringRef> Split;
1196-
if (!MAI.useParensForSymbolVariant()) {
1196+
if (MAI.useAtForSpecifier()) {
11971197
if (FirstTokenKind == AsmToken::String) {
11981198
if (Lexer.is(AsmToken::At)) {
11991199
Lex(); // eat @
@@ -1207,8 +1207,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
12071207
} else {
12081208
Split = Identifier.split('@');
12091209
}
1210-
} else if (Lexer.is(AsmToken::LParen)) {
1211-
Lex(); // eat '('.
1210+
} else if (MAI.useParensForSpecifier() &&
1211+
parseOptionalToken(AsmToken::LParen)) {
12121212
StringRef VName;
12131213
parseIdentifier(VName);
12141214
if (parseRParen())
@@ -1231,7 +1231,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
12311231
if (MaybeVariant) {
12321232
SymbolName = Split.first;
12331233
Variant = MCSymbolRefExpr::VariantKind(*MaybeVariant);
1234-
} else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1234+
} else if (MAI.doesAllowAtInName()) {
12351235
Variant = MCSymbolRefExpr::VK_None;
12361236
} else {
12371237
return Error(SMLoc::getFromPointer(Split.second.begin()),
@@ -1463,7 +1463,8 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
14631463
// As a special case, we support 'a op b @ modifier' by rewriting the
14641464
// expression to include the modifier. This is inefficient, but in general we
14651465
// expect users to use 'a@modifier op b'.
1466-
if (parseOptionalToken(AsmToken::At)) {
1466+
if (Ctx.getAsmInfo()->useAtForSpecifier() &&
1467+
parseOptionalToken(AsmToken::At)) {
14671468
if (Lexer.isNot(AsmToken::Identifier))
14681469
return TokError("unexpected symbol modifier following '@'");
14691470

llvm/lib/MC/MCParser/ELFAsmParser.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "llvm/ADT/ScopeExit.h"
910
#include "llvm/ADT/StringExtras.h"
1011
#include "llvm/ADT/StringRef.h"
1112
#include "llvm/ADT/StringSwitch.h"
@@ -741,6 +742,13 @@ bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) {
741742
// Handle the identifier as the key symbol.
742743
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
743744

745+
bool AllowAt = getLexer().getAllowAtInIdentifier();
746+
if (!AllowAt &&
747+
!getContext().getAsmInfo()->getCommentString().starts_with("@"))
748+
getLexer().setAllowAtInIdentifier(true);
749+
auto _ =
750+
make_scope_exit([&]() { getLexer().setAllowAtInIdentifier(AllowAt); });
751+
744752
// NOTE the comma is optional in all cases. It is only documented as being
745753
// optional in the first case, however, GAS will silently treat the comma as
746754
// optional in all cases. Furthermore, although the documentation states that

llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
101101
}
102102

103103
// foo(plt) instead of foo@plt
104-
UseParensForSymbolVariant = true;
104+
UseAtForSpecifier = false;
105+
UseParensForSpecifier = true;
105106

106107
initializeVariantKinds(variantKindDescs);
107108
}
@@ -148,7 +149,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
148149
SupportsDebugInformation = true;
149150
ExceptionsType = ExceptionHandling::WinEH;
150151
WinEHEncodingType = WinEH::EncodingType::Itanium;
151-
UseParensForSymbolVariant = true;
152+
UseAtForSpecifier = false;
153+
UseParensForSpecifier = true;
152154

153155
DwarfRegNumForCFI = false;
154156

llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2079,19 +2079,23 @@ ParseStatus RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
20792079

20802080
if (getLexer().getKind() != AsmToken::Identifier)
20812081
return ParseStatus::NoMatch;
2082+
std::string Identifier(getTok().getIdentifier());
20822083

2083-
// Avoid parsing the register in `call rd, foo` as a call symbol.
2084-
if (getLexer().peekTok().getKind() != AsmToken::EndOfStatement)
2084+
if (getLexer().peekTok().is(AsmToken::At)) {
2085+
Lex();
2086+
Lex();
2087+
StringRef PLT;
2088+
if (getParser().parseIdentifier(PLT) || PLT != "plt")
2089+
return ParseStatus::Failure;
2090+
} else if (!getLexer().peekTok().is(AsmToken::EndOfStatement)) {
2091+
// Avoid parsing the register in `call rd, foo` as a call symbol.
20852092
return ParseStatus::NoMatch;
2086-
2087-
StringRef Identifier;
2088-
if (getParser().parseIdentifier(Identifier))
2089-
return ParseStatus::Failure;
2093+
} else {
2094+
Lex();
2095+
}
20902096

20912097
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
2092-
20932098
RISCVMCExpr::Specifier Kind = RISCVMCExpr::VK_CALL_PLT;
2094-
(void)Identifier.consume_back("@plt");
20952099

20962100
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
20972101
Res = MCSymbolRefExpr::create(Sym, getContext());

llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
2626
AlignmentIsInBytes = false;
2727
SupportsDebugInformation = true;
2828
ExceptionsType = ExceptionHandling::DwarfCFI;
29+
UseAtForSpecifier = false;
2930
Data16bitsDirective = "\t.half\t";
3031
Data32bitsDirective = "\t.word\t";
3132
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# RUN: not llvm-mc -triple riscv32 < %s 2>&1 | FileCheck %s
22

33
jump 1234, x31 # CHECK: :[[@LINE]]:6: error: operand must be a valid jump target
4-
jump foo@plt, x31 # CHECK: :[[@LINE]]:10: error: invalid variant 'plt'
4+
jump foo@plt, x31 # CHECK: :[[@LINE]]:9: error: unexpected token
55
jump %pcrel_lo(1234), x31 # CHECK: :[[@LINE]]:6: error: unknown token in expression
6+
jump foo@xxx # CHECK: :[[@LINE]]:9: error: unexpected token

llvm/test/MC/RISCV/rv32i-aliases-invalid.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ lla x1, %hi(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a bare sy
3232
lla x1, %lo(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
3333
lla x1, %hi(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
3434
lla x1, %lo(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
35-
lla a2, foo@plt # CHECK: :[[@LINE]]:17: error: '@plt' operand not valid for instruction
35+
lla a2, foo@plt # CHECK: :[[@LINE]]:12: error: unexpected token
3636

3737
negw x1, x2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
3838
sext.w x3, x4 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}

llvm/test/MC/RISCV/rv64i-aliases-invalid.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ lla x1, %lo(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a constan
2626
lla x1, %hi(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
2727
lla x1, %lo(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
2828
lla a1, foo+foo # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
29-
lla a2, foo@plt # CHECK: :[[@LINE]]:17: error: '@plt' operand not valid for instruction
29+
lla a2, foo@plt # CHECK: :[[@LINE]]:12: error: unexpected token
3030

3131
rdinstreth x29 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV32I Base Instruction Set{{$}}
3232
rdcycleh x27 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV32I Base Instruction Set{{$}}

0 commit comments

Comments
 (0)