Skip to content

Commit f8ec0f8

Browse files
ericastorIanWood1
authored andcommitted
[ms] [llvm-ml] Implement support for PROC NEAR/FAR (llvm#131707)
Matches ML.EXE by translating "ret" instructions inside a `PROC FAR` to "retf", and automatically prepending a `push cs` to all near calls to a `PROC FAR`.
1 parent 8eefa57 commit f8ec0f8

File tree

6 files changed

+214
-26
lines changed

6 files changed

+214
-26
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===- llvm/MC/MasmParser.h - MASM Parser Interface -------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_MC_MCPARSER_MCMASMPARSER_H
10+
#define LLVM_MC_MCPARSER_MCMASMPARSER_H
11+
12+
#include "llvm/MC/MCParser/MCAsmParser.h"
13+
14+
namespace llvm {
15+
16+
/// MASM-type assembler parser interface.
17+
class MCMasmParser : public MCAsmParser {
18+
public:
19+
virtual bool getDefaultRetIsFar() const = 0;
20+
virtual void setDefaultRetIsFar(bool IsFar) = 0;
21+
22+
bool isParsingMasm() const override { return true; }
23+
24+
static bool classof(const MCAsmParser *AP) { return AP->isParsingMasm(); }
25+
};
26+
27+
} // end namespace llvm
28+
29+
#endif // LLVM_MC_MCPARSER_MCMASMPARSER_H

llvm/include/llvm/MC/MCSymbolCOFF.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class MCSymbolCOFF : public MCSymbol {
2525
SF_ClassShift = 0,
2626

2727
SF_SafeSEH = 0x0100,
28+
SF_FarProc = 0x0200,
2829
SF_WeakExternalCharacteristicsMask = 0x0E00,
2930
SF_WeakExternalCharacteristicsShift = 9,
3031
};
@@ -66,6 +67,9 @@ class MCSymbolCOFF : public MCSymbol {
6667
modifyFlags(SF_SafeSEH, SF_SafeSEH);
6768
}
6869

70+
bool isFarProc() const { return getFlags() & SF_FarProc; }
71+
void setIsFarProc() const { modifyFlags(SF_FarProc, SF_FarProc); }
72+
6973
static bool classof(const MCSymbol *S) { return S->isCOFF(); }
7074
};
7175

llvm/lib/MC/MCParser/COFFMasmParser.cpp

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
#include "llvm/MC/MCAsmMacro.h"
1313
#include "llvm/MC/MCContext.h"
1414
#include "llvm/MC/MCParser/MCAsmLexer.h"
15+
#include "llvm/MC/MCParser/MCAsmParser.h"
1516
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
17+
#include "llvm/MC/MCParser/MCMasmParser.h"
1618
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
1719
#include "llvm/MC/MCSectionCOFF.h"
1820
#include "llvm/MC/MCStreamer.h"
@@ -41,6 +43,7 @@ class COFFMasmParser : public MCAsmParserExtension {
4143
StringRef COMDATSymName, COFF::COMDATType Type,
4244
Align Alignment);
4345

46+
bool parseDirectiveModel(StringRef, SMLoc);
4447
bool parseDirectiveProc(StringRef, SMLoc);
4548
bool parseDirectiveEndProc(StringRef, SMLoc);
4649
bool parseDirectiveSegment(StringRef, SMLoc);
@@ -167,7 +170,7 @@ class COFFMasmParser : public MCAsmParserExtension {
167170
// .exit
168171
// .fardata
169172
// .fardata?
170-
addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".model");
173+
addDirectiveHandler<&COFFMasmParser::parseDirectiveModel>(".model");
171174
// .stack
172175
// .startup
173176

@@ -201,8 +204,13 @@ class COFFMasmParser : public MCAsmParserExtension {
201204
}
202205

203206
/// Stack of active procedure definitions.
204-
SmallVector<StringRef, 1> CurrentProcedures;
205-
SmallVector<bool, 1> CurrentProceduresFramed;
207+
enum ProcDistance { PROC_DISTANCE_NEAR = 0, PROC_DISTANCE_FAR = 1 };
208+
struct ProcInfo {
209+
StringRef Name;
210+
ProcDistance Distance = PROC_DISTANCE_NEAR;
211+
bool IsFramed = false;
212+
};
213+
SmallVector<ProcInfo, 1> CurrentProcedures;
206214

207215
public:
208216
COFFMasmParser() = default;
@@ -435,48 +443,75 @@ bool COFFMasmParser::parseDirectiveOption(StringRef Directive, SMLoc Loc) {
435443
return false;
436444
}
437445

446+
/// parseDirectiveModel
447+
/// ::= ".model" "flat"
448+
bool COFFMasmParser::parseDirectiveModel(StringRef Directive, SMLoc Loc) {
449+
if (!getLexer().is(AsmToken::Identifier))
450+
return TokError("expected identifier in directive");
451+
452+
StringRef ModelType = getTok().getIdentifier();
453+
if (!ModelType.equals_insensitive("flat")) {
454+
return TokError(
455+
"expected 'flat' for memory model; no other models supported");
456+
}
457+
458+
// Ignore; no action necessary.
459+
Lex();
460+
return false;
461+
}
462+
438463
/// parseDirectiveProc
439464
/// TODO(epastor): Implement parameters and other attributes.
440-
/// ::= label "proc" [[distance]]
465+
/// ::= label "proc" [[distance]] [[frame]]
441466
/// statements
442467
/// label "endproc"
443468
bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) {
444469
if (!getStreamer().getCurrentFragment())
445470
return Error(getTok().getLoc(), "expected section directive");
446471

447-
StringRef Label;
448-
if (getParser().parseIdentifier(Label))
472+
ProcInfo Proc;
473+
if (getParser().parseIdentifier(Proc.Name))
449474
return Error(Loc, "expected identifier for procedure");
450-
if (getLexer().is(AsmToken::Identifier)) {
475+
while (getLexer().is(AsmToken::Identifier)) {
451476
StringRef nextVal = getTok().getString();
452477
SMLoc nextLoc = getTok().getLoc();
453478
if (nextVal.equals_insensitive("far")) {
454-
// TODO(epastor): Handle far procedure definitions.
455479
Lex();
456-
return Error(nextLoc, "far procedure definitions not yet supported");
480+
Proc.Distance = PROC_DISTANCE_FAR;
481+
nextVal = getTok().getString();
482+
nextLoc = getTok().getLoc();
457483
} else if (nextVal.equals_insensitive("near")) {
458484
Lex();
485+
Proc.Distance = PROC_DISTANCE_NEAR;
486+
nextVal = getTok().getString();
487+
nextLoc = getTok().getLoc();
488+
} else if (nextVal.equals_insensitive("frame")) {
489+
Lex();
490+
Proc.IsFramed = true;
459491
nextVal = getTok().getString();
460492
nextLoc = getTok().getLoc();
493+
} else {
494+
break;
461495
}
462496
}
463-
MCSymbolCOFF *Sym = cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Label));
497+
MCSymbolCOFF *Sym =
498+
cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Proc.Name));
464499

465500
// Define symbol as simple external function
466501
Sym->setExternal(true);
467502
Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT);
503+
if (Proc.Distance == PROC_DISTANCE_FAR)
504+
Sym->setIsFarProc();
505+
506+
cast<MCMasmParser>(getParser())
507+
.setDefaultRetIsFar(Proc.Distance == PROC_DISTANCE_FAR);
468508

469-
bool Framed = false;
470-
if (getLexer().is(AsmToken::Identifier) &&
471-
getTok().getString().equals_insensitive("frame")) {
472-
Lex();
473-
Framed = true;
509+
if (Proc.IsFramed) {
474510
getStreamer().emitWinCFIStartProc(Sym, Loc);
475511
}
476512
getStreamer().emitLabel(Sym, Loc);
477513

478-
CurrentProcedures.push_back(Label);
479-
CurrentProceduresFramed.push_back(Framed);
514+
CurrentProcedures.push_back(std::move(Proc));
480515
return false;
481516
}
482517
bool COFFMasmParser::parseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
@@ -487,15 +522,18 @@ bool COFFMasmParser::parseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
487522

488523
if (CurrentProcedures.empty())
489524
return Error(Loc, "endp outside of procedure block");
490-
else if (!CurrentProcedures.back().equals_insensitive(Label))
525+
else if (!CurrentProcedures.back().Name.equals_insensitive(Label))
491526
return Error(LabelLoc, "endp does not match current procedure '" +
492-
CurrentProcedures.back() + "'");
527+
CurrentProcedures.back().Name + "'");
493528

494-
if (CurrentProceduresFramed.back()) {
529+
if (CurrentProcedures.back().IsFramed) {
495530
getStreamer().emitWinCFIEndProc(Loc);
496531
}
497532
CurrentProcedures.pop_back();
498-
CurrentProceduresFramed.pop_back();
533+
cast<MCMasmParser>(getParser())
534+
.setDefaultRetIsFar(!CurrentProcedures.empty() &&
535+
CurrentProcedures.back().Distance ==
536+
PROC_DISTANCE_FAR);
499537
return false;
500538
}
501539

llvm/lib/MC/MCParser/MasmParser.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/MC/MCParser/MCAsmLexer.h"
3737
#include "llvm/MC/MCParser/MCAsmParser.h"
3838
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
39+
#include "llvm/MC/MCParser/MCMasmParser.h"
3940
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
4041
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
4142
#include "llvm/MC/MCRegisterInfo.h"
@@ -65,6 +66,7 @@
6566
#include <memory>
6667
#include <optional>
6768
#include <sstream>
69+
#include <stdbool.h>
6870
#include <string>
6971
#include <tuple>
7072
#include <utility>
@@ -373,7 +375,7 @@ FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
373375
/// The concrete assembly parser instance.
374376
// Note that this is a full MCAsmParser, not an MCAsmParserExtension!
375377
// It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
376-
class MasmParser : public MCAsmParser {
378+
class MasmParser : public MCMasmParser {
377379
private:
378380
SourceMgr::DiagHandlerTy SavedDiagHandler;
379381
void *SavedDiagContext;
@@ -448,6 +450,9 @@ class MasmParser : public MCAsmParser {
448450
/// Are we parsing ms-style inline assembly?
449451
bool ParsingMSInlineAsm = false;
450452

453+
/// Is the current default `ret` instruction far?
454+
bool DefaultRetIsFar = false;
455+
451456
// Current <...> expression depth.
452457
unsigned AngleBracketDepth = 0U;
453458

@@ -473,6 +478,14 @@ class MasmParser : public MCAsmParser {
473478
DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
474479
}
475480

481+
/// @name MCMasmParser Interface
482+
/// {
483+
484+
bool getDefaultRetIsFar() const override { return DefaultRetIsFar; }
485+
void setDefaultRetIsFar(bool IsFar) override { DefaultRetIsFar = IsFar; }
486+
487+
/// }
488+
476489
/// @name MCAsmParser Interface
477490
/// {
478491

@@ -504,8 +517,6 @@ class MasmParser : public MCAsmParser {
504517
}
505518
bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
506519

507-
bool isParsingMasm() const override { return true; }
508-
509520
bool defineMacro(StringRef Name, StringRef Value) override;
510521

511522
bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@
2525
#include "llvm/MC/MCInstrInfo.h"
2626
#include "llvm/MC/MCParser/MCAsmLexer.h"
2727
#include "llvm/MC/MCParser/MCAsmParser.h"
28+
#include "llvm/MC/MCParser/MCMasmParser.h"
2829
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
2930
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
3031
#include "llvm/MC/MCRegisterInfo.h"
3132
#include "llvm/MC/MCSection.h"
3233
#include "llvm/MC/MCStreamer.h"
3334
#include "llvm/MC/MCSubtargetInfo.h"
3435
#include "llvm/MC/MCSymbol.h"
36+
#include "llvm/MC/MCSymbolCOFF.h"
3537
#include "llvm/MC/TargetRegistry.h"
3638
#include "llvm/Support/CommandLine.h"
3739
#include "llvm/Support/Compiler.h"
@@ -1200,6 +1202,10 @@ class X86AsmParser : public MCTargetAsmParser {
12001202
void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
12011203
MCStreamer &Out, bool MatchingInlineAsm);
12021204

1205+
void MatchMASMFarCallToNear(SMLoc IDLoc, X86Operand &Op,
1206+
OperandVector &Operands, MCStreamer &Out,
1207+
bool MatchingInlineAsm);
1208+
12031209
bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
12041210
bool MatchingInlineAsm);
12051211

@@ -2738,11 +2744,11 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
27382744
if ((BaseReg || IndexReg || RegNo || DefaultBaseReg))
27392745
Operands.push_back(X86Operand::CreateMem(
27402746
getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2741-
Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2747+
Size, DefaultBaseReg, /*SymName=*/SM.getSymName(), /*OpDecl=*/nullptr,
27422748
/*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
27432749
else
27442750
Operands.push_back(X86Operand::CreateMem(
2745-
getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2751+
getPointerWidth(), Disp, Start, End, Size, /*SymName=*/SM.getSymName(),
27462752
/*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
27472753
MaybeDirectBranchDest));
27482754
return false;
@@ -3440,6 +3446,14 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
34403446
}
34413447
}
34423448

3449+
if (Parser.isParsingMasm() && !is64BitMode()) {
3450+
// MASM implicitly converts "ret" to "retf" in far procedures; this is
3451+
// reflected in the default return type in the MCContext.
3452+
if (PatchedName == "ret" &&
3453+
cast<MCMasmParser>(getParser()).getDefaultRetIsFar())
3454+
PatchedName = "retf";
3455+
}
3456+
34433457
// Determine whether this is an instruction prefix.
34443458
// FIXME:
34453459
// Enhance prefixes integrity robustness. for example, following forms
@@ -4128,6 +4142,11 @@ bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
41284142
// First, handle aliases that expand to multiple instructions.
41294143
MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
41304144
Out, MatchingInlineAsm);
4145+
if (getParser().isParsingMasm() && !is64BitMode()) {
4146+
MatchMASMFarCallToNear(IDLoc, static_cast<X86Operand &>(*Operands[0]),
4147+
Operands, Out, MatchingInlineAsm);
4148+
}
4149+
41314150
unsigned Prefixes = getPrefixes(Operands);
41324151

41334152
MCInst Inst;
@@ -4189,6 +4208,37 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
41894208
}
41904209
}
41914210

4211+
void X86AsmParser::MatchMASMFarCallToNear(SMLoc IDLoc, X86Operand &Op,
4212+
OperandVector &Operands,
4213+
MCStreamer &Out,
4214+
bool MatchingInlineAsm) {
4215+
// FIXME: This should be replaced with a real .td file alias mechanism.
4216+
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
4217+
// call.
4218+
if (Op.getToken() != "call")
4219+
return;
4220+
// This is a call instruction...
4221+
4222+
X86Operand &Operand = static_cast<X86Operand &>(*Operands[1]);
4223+
MCSymbol *Sym = getContext().lookupSymbol(Operand.getSymName());
4224+
if (Sym == nullptr || !Sym->isInSection() || !Sym->isCOFF() ||
4225+
!dyn_cast<MCSymbolCOFF>(Sym)->isFarProc())
4226+
return;
4227+
// Sym is a reference to a far proc in a code section....
4228+
4229+
if (Out.getCurrentSectionOnly() == &Sym->getSection()) {
4230+
// This is a call to a symbol declared as a far proc, and will be emitted as
4231+
// a near call... so we need to explicitly push the code section register
4232+
// before the call.
4233+
MCInst Inst;
4234+
Inst.setOpcode(X86::PUSH32r);
4235+
Inst.addOperand(MCOperand::createReg(MCRegister(X86::CS)));
4236+
Inst.setLoc(IDLoc);
4237+
if (!MatchingInlineAsm)
4238+
emitInstruction(Inst, Operands, Out);
4239+
}
4240+
}
4241+
41924242
bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
41934243
const FeatureBitset &MissingFeatures,
41944244
bool MatchingInlineAsm) {

0 commit comments

Comments
 (0)