Skip to content

Commit f6fb780

Browse files
committed
MC: Split the x86 asm matcher implementations by dialect
The existing matcher has lots of AT&T assembly dialect assumptions baked into it. In particular, the hack for resolving the size of a memory operand by appending the four most common suffixes doesn't work at all. The Intel assembly dialect mnemonic table has ambiguous entries, so we need to try matching multiple times with different operand sizes, since that's the only way to choose different instruction variants. This makes us more compatible with gas's implementation of Intel assembly syntax. MSVC assumes you want byte-sized operations for the instructions that we reject as ambiguous. Reviewed By: grosbach Differential Revision: http://reviews.llvm.org/D4747 llvm-svn: 216481
1 parent a2a1e53 commit f6fb780

File tree

5 files changed

+296
-33
lines changed

5 files changed

+296
-33
lines changed

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Lines changed: 195 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,29 @@ class X86AsmParser : public MCTargetAsmParser {
697697
uint64_t &ErrorInfo,
698698
bool MatchingInlineAsm) override;
699699

700+
void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
701+
MCStreamer &Out, bool MatchingInlineAsm);
702+
703+
bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
704+
bool MatchingInlineAsm);
705+
706+
bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
707+
OperandVector &Operands, MCStreamer &Out,
708+
uint64_t &ErrorInfo,
709+
bool MatchingInlineAsm);
710+
711+
bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
712+
OperandVector &Operands, MCStreamer &Out,
713+
uint64_t &ErrorInfo,
714+
bool MatchingInlineAsm);
715+
716+
unsigned getPointerSize() {
717+
if (is16BitMode()) return 16;
718+
if (is32BitMode()) return 32;
719+
if (is64BitMode()) return 64;
720+
llvm_unreachable("invalid mode");
721+
}
722+
700723
virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override;
701724

702725
/// doSrcDstMatch - Returns true if operands are matching in their
@@ -2309,12 +2332,16 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
23092332
OperandVector &Operands,
23102333
MCStreamer &Out, uint64_t &ErrorInfo,
23112334
bool MatchingInlineAsm) {
2312-
assert(!Operands.empty() && "Unexpect empty operand list!");
2313-
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2314-
assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2315-
ArrayRef<SMRange> EmptyRanges = None;
2335+
if (isParsingIntelSyntax())
2336+
return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2337+
MatchingInlineAsm);
2338+
return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2339+
MatchingInlineAsm);
2340+
}
23162341

2317-
// First, handle aliases that expand to multiple instructions.
2342+
void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2343+
OperandVector &Operands, MCStreamer &Out,
2344+
bool MatchingInlineAsm) {
23182345
// FIXME: This should be replaced with a real .td file alias mechanism.
23192346
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
23202347
// call.
@@ -2336,6 +2363,36 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
23362363
EmitInstruction(Inst, Operands, Out);
23372364
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
23382365
}
2366+
}
2367+
2368+
bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2369+
bool MatchingInlineAsm) {
2370+
assert(ErrorInfo && "Unknown missing feature!");
2371+
ArrayRef<SMRange> EmptyRanges = None;
2372+
SmallString<126> Msg;
2373+
raw_svector_ostream OS(Msg);
2374+
OS << "instruction requires:";
2375+
uint64_t Mask = 1;
2376+
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2377+
if (ErrorInfo & Mask)
2378+
OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2379+
Mask <<= 1;
2380+
}
2381+
return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2382+
}
2383+
2384+
bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2385+
OperandVector &Operands,
2386+
MCStreamer &Out,
2387+
uint64_t &ErrorInfo,
2388+
bool MatchingInlineAsm) {
2389+
assert(!Operands.empty() && "Unexpect empty operand list!");
2390+
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2391+
assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2392+
ArrayRef<SMRange> EmptyRanges = None;
2393+
2394+
// First, handle aliases that expand to multiple instructions.
2395+
MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
23392396

23402397
bool WasOriginallyInvalidOperand = false;
23412398
MCInst Inst;
@@ -2358,21 +2415,8 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
23582415
EmitInstruction(Inst, Operands, Out);
23592416
Opcode = Inst.getOpcode();
23602417
return false;
2361-
case Match_MissingFeature: {
2362-
assert(ErrorInfo && "Unknown missing feature!");
2363-
// Special case the error message for the very common case where only
2364-
// a single subtarget feature is missing.
2365-
std::string Msg = "instruction requires:";
2366-
uint64_t Mask = 1;
2367-
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2368-
if (ErrorInfo & Mask) {
2369-
Msg += " ";
2370-
Msg += getSubtargetFeatureName(ErrorInfo & Mask);
2371-
}
2372-
Mask <<= 1;
2373-
}
2374-
return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2375-
}
2418+
case Match_MissingFeature:
2419+
return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
23762420
case Match_InvalidOperand:
23772421
WasOriginallyInvalidOperand = true;
23782422
break;
@@ -2490,25 +2534,17 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
24902534
// missing feature.
24912535
if (std::count(std::begin(Match), std::end(Match),
24922536
Match_MissingFeature) == 1) {
2493-
std::string Msg = "instruction requires:";
2494-
uint64_t Mask = 1;
2495-
for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
2496-
if (ErrorInfoMissingFeature & Mask) {
2497-
Msg += " ";
2498-
Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
2499-
}
2500-
Mask <<= 1;
2501-
}
2502-
return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
2537+
ErrorInfo = ErrorInfoMissingFeature;
2538+
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2539+
MatchingInlineAsm);
25032540
}
25042541

25052542
// If one instruction matched with an invalid operand, report this as an
25062543
// operand failure.
25072544
if (std::count(std::begin(Match), std::end(Match),
25082545
Match_InvalidOperand) == 1) {
2509-
Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2510-
MatchingInlineAsm);
2511-
return true;
2546+
return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2547+
MatchingInlineAsm);
25122548
}
25132549

25142550
// If all of these were an outright failure, report it in a useless way.
@@ -2517,6 +2553,132 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
25172553
return true;
25182554
}
25192555

2556+
bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2557+
OperandVector &Operands,
2558+
MCStreamer &Out,
2559+
uint64_t &ErrorInfo,
2560+
bool MatchingInlineAsm) {
2561+
assert(!Operands.empty() && "Unexpect empty operand list!");
2562+
X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2563+
assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2564+
StringRef Mnemonic = Op.getToken();
2565+
ArrayRef<SMRange> EmptyRanges = None;
2566+
2567+
// First, handle aliases that expand to multiple instructions.
2568+
MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2569+
2570+
MCInst Inst;
2571+
2572+
// Find one unsized memory operand, if present.
2573+
X86Operand *UnsizedMemOp = nullptr;
2574+
for (const auto &Op : Operands) {
2575+
X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2576+
// FIXME: Remove this exception for absolute memory references. Currently it
2577+
// allows us to assemble 'call foo', because foo is represented as a memory
2578+
// operand.
2579+
if (X86Op->isMemUnsized() && !X86Op->isAbsMem())
2580+
UnsizedMemOp = X86Op;
2581+
}
2582+
2583+
// Allow some instructions to have implicitly pointer-sized operands. This is
2584+
// compatible with gas.
2585+
if (UnsizedMemOp) {
2586+
static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2587+
for (const char *Instr : PtrSizedInstrs) {
2588+
if (Mnemonic == Instr) {
2589+
UnsizedMemOp->Mem.Size = getPointerSize();
2590+
break;
2591+
}
2592+
}
2593+
}
2594+
2595+
// If an unsized memory operand is present, try to match with each memory
2596+
// operand size. In Intel assembly, the size is not part of the instruction
2597+
// mnemonic.
2598+
SmallVector<unsigned, 8> Match;
2599+
uint64_t ErrorInfoMissingFeature = 0;
2600+
if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2601+
static const unsigned MopSizes[] = {8, 16, 32, 64, 80};
2602+
for (unsigned Size : MopSizes) {
2603+
UnsizedMemOp->Mem.Size = Size;
2604+
uint64_t ErrorInfoIgnore;
2605+
Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2606+
MatchingInlineAsm,
2607+
isParsingIntelSyntax()));
2608+
// If this returned as a missing feature failure, remember that.
2609+
if (Match.back() == Match_MissingFeature)
2610+
ErrorInfoMissingFeature = ErrorInfoIgnore;
2611+
}
2612+
} else {
2613+
Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2614+
MatchingInlineAsm,
2615+
isParsingIntelSyntax()));
2616+
// If this returned as a missing feature failure, remember that.
2617+
if (Match.back() == Match_MissingFeature)
2618+
ErrorInfoMissingFeature = ErrorInfo;
2619+
}
2620+
2621+
// Restore the size of the unsized memory operand if we modified it.
2622+
if (UnsizedMemOp)
2623+
UnsizedMemOp->Mem.Size = 0;
2624+
2625+
// If it's a bad mnemonic, all results will be the same.
2626+
if (Match.back() == Match_MnemonicFail) {
2627+
ArrayRef<SMRange> Ranges =
2628+
MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2629+
return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2630+
Ranges, MatchingInlineAsm);
2631+
}
2632+
2633+
// If exactly one matched, then we treat that as a successful match (and the
2634+
// instruction will already have been filled in correctly, since the failing
2635+
// matches won't have modified it).
2636+
unsigned NumSuccessfulMatches =
2637+
std::count(std::begin(Match), std::end(Match), Match_Success);
2638+
if (NumSuccessfulMatches == 1) {
2639+
// Some instructions need post-processing to, for example, tweak which
2640+
// encoding is selected. Loop on it while changes happen so the individual
2641+
// transformations can chain off each other.
2642+
if (!MatchingInlineAsm)
2643+
while (processInstruction(Inst, Operands))
2644+
;
2645+
Inst.setLoc(IDLoc);
2646+
if (!MatchingInlineAsm)
2647+
EmitInstruction(Inst, Operands, Out);
2648+
Opcode = Inst.getOpcode();
2649+
return false;
2650+
} else if (NumSuccessfulMatches > 1) {
2651+
assert(UnsizedMemOp &&
2652+
"multiple matches only possible with unsized memory operands");
2653+
ArrayRef<SMRange> Ranges =
2654+
MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2655+
return Error(UnsizedMemOp->getStartLoc(),
2656+
"ambiguous operand size for instruction '" + Mnemonic + "\'",
2657+
Ranges, MatchingInlineAsm);
2658+
}
2659+
2660+
// If one instruction matched with a missing feature, report this as a
2661+
// missing feature.
2662+
if (std::count(std::begin(Match), std::end(Match),
2663+
Match_MissingFeature) == 1) {
2664+
ErrorInfo = ErrorInfoMissingFeature;
2665+
return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2666+
MatchingInlineAsm);
2667+
}
2668+
2669+
// If one instruction matched with an invalid operand, report this as an
2670+
// operand failure.
2671+
if (std::count(std::begin(Match), std::end(Match),
2672+
Match_InvalidOperand) == 1) {
2673+
return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2674+
MatchingInlineAsm);
2675+
}
2676+
2677+
// If all of these were an outright failure, report it in a useless way.
2678+
return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2679+
MatchingInlineAsm);
2680+
}
2681+
25202682
bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
25212683
return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
25222684
}

llvm/lib/Target/X86/AsmParser/X86Operand.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ struct X86Operand : public MCParsedAsmOperand {
205205
}
206206

207207
bool isMem() const override { return Kind == Memory; }
208+
bool isMemUnsized() const {
209+
return Kind == Memory && Mem.Size == 0;
210+
}
208211
bool isMem8() const {
209212
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
210213
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: not llvm-mc -triple i686-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
2+
3+
.intel_syntax
4+
5+
// Basic case of ambiguity for inc.
6+
7+
inc [eax]
8+
// CHECK: error: ambiguous operand size for instruction 'inc'
9+
inc dword ptr [eax]
10+
inc word ptr [eax]
11+
inc byte ptr [eax]
12+
// CHECK-NOT: error:
13+
14+
// Other ambiguous instructions. Anything that doesn't take a register,
15+
// basically.
16+
17+
dec [eax]
18+
// CHECK: error: ambiguous operand size for instruction 'dec'
19+
mov [eax], 1
20+
// CHECK: error: ambiguous operand size for instruction 'mov'
21+
and [eax], 0
22+
// CHECK: error: ambiguous operand size for instruction 'and'
23+
or [eax], 1
24+
// CHECK: error: ambiguous operand size for instruction 'or'
25+
add [eax], 1
26+
// CHECK: error: ambiguous operand size for instruction 'add'
27+
sub [eax], 1
28+
// CHECK: error: ambiguous operand size for instruction 'sub'
29+
30+
// gas assumes these instructions are pointer-sized by default, and we follow
31+
// suit.
32+
push [eax]
33+
call [eax]
34+
jmp [eax]
35+
// CHECK-NOT: error:
36+
37+
add byte ptr [eax], eax
38+
// CHECK: error: invalid operand for instruction
39+
40+
add byte ptr [eax], eax
41+
// CHECK: error: invalid operand for instruction
42+
43+
add rax, 3
44+
// CHECK: error: register %rax is only available in 64-bit mode
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: llvm-mc %s -triple=i686-pc-windows | FileCheck %s
2+
3+
.intel_syntax
4+
5+
push [eax]
6+
// CHECK: pushl (%eax)
7+
call [eax]
8+
// CHECK: calll *(%eax)
9+
jmp [eax]
10+
// CHECK: jmpl *(%eax)
11+
12+
// mode switch
13+
.code16
14+
15+
push [eax]
16+
// CHECK: pushw (%eax)
17+
call [eax]
18+
// CHECK: callw *(%eax)
19+
jmp [eax]
20+
// CHECK: jmpw *(%eax)

llvm/test/MC/X86/intel-syntax.s

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,3 +607,37 @@ fadd "?half@?0??bar@@YAXXZ@4NA"
607607
fadd "?half@?0??bar@@YAXXZ@4NA"@IMGREL
608608
// CHECK: fadds "?half@?0??bar@@YAXXZ@4NA"
609609
// CHECK: fadds "?half@?0??bar@@YAXXZ@4NA"@IMGREL32
610+
611+
inc qword ptr [rax]
612+
inc dword ptr [rax]
613+
inc word ptr [rax]
614+
inc byte ptr [rax]
615+
// CHECK: incq (%rax)
616+
// CHECK: incl (%rax)
617+
// CHECK: incw (%rax)
618+
// CHECK: incb (%rax)
619+
620+
dec qword ptr [rax]
621+
dec dword ptr [rax]
622+
dec word ptr [rax]
623+
dec byte ptr [rax]
624+
// CHECK: decq (%rax)
625+
// CHECK: decl (%rax)
626+
// CHECK: decw (%rax)
627+
// CHECK: decb (%rax)
628+
629+
add qword ptr [rax], 1
630+
add dword ptr [rax], 1
631+
add word ptr [rax], 1
632+
add byte ptr [rax], 1
633+
// CHECK: addq $1, (%rax)
634+
// CHECK: addl $1, (%rax)
635+
// CHECK: addw $1, (%rax)
636+
// CHECK: addb $1, (%rax)
637+
638+
fstp xword ptr [rax]
639+
fstp qword ptr [rax]
640+
fstp dword ptr [rax]
641+
// CHECK: fstpt (%rax)
642+
// CHECK: fstpl (%rax)
643+
// CHECK: fstps (%rax)

0 commit comments

Comments
 (0)