Skip to content

Commit 29c2892

Browse files
committed
Implement Import Call Optimization for x64
1 parent db9caf6 commit 29c2892

15 files changed

+506
-27
lines changed

llvm/include/llvm/Transforms/CFGuard.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
namespace llvm {
1717

1818
class FunctionPass;
19+
class GlobalValue;
1920

2021
class CFGuardPass : public PassInfoMixin<CFGuardPass> {
2122
public:
@@ -34,6 +35,8 @@ FunctionPass *createCFGuardCheckPass();
3435
/// Insert Control FLow Guard dispatches on indirect function calls.
3536
FunctionPass *createCFGuardDispatchPass();
3637

38+
bool isCFGuardFunction(const GlobalValue *GV);
39+
3740
} // namespace llvm
3841

3942
#endif

llvm/lib/MC/MCObjectFileInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
599599
if (T.getArch() == Triple::aarch64) {
600600
ImportCallSection =
601601
Ctx->getCOFFSection(".impcall", COFF::IMAGE_SCN_LNK_INFO);
602+
} else if (T.getArch() == Triple::x86_64) {
603+
// Import Call Optimization on x64 leverages the same metadata as the
604+
// retpoline mitigation, hence the unusual section name.
605+
ImportCallSection =
606+
Ctx->getCOFFSection(".retplne", COFF::IMAGE_SCN_LNK_INFO);
602607
}
603608

604609
// Debug info.

llvm/lib/Target/X86/X86AsmPrinter.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,9 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
920920
OutStreamer->emitSymbolAttribute(S, MCSA_Global);
921921
OutStreamer->emitAssignment(
922922
S, MCConstantExpr::create(Feat00Value, MMI->getContext()));
923+
924+
if (M.getModuleFlag("import-call-optimization"))
925+
EnableImportCallOptimization = true;
923926
}
924927
OutStreamer->emitSyntaxDirective();
925928

@@ -1021,6 +1024,35 @@ void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
10211024
// safe to set.
10221025
OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
10231026
} else if (TT.isOSBinFormatCOFF()) {
1027+
// If import call optimization is enabled, emit the appropriate section.
1028+
// We do this whether or not we recorded any items.
1029+
if (EnableImportCallOptimization) {
1030+
OutStreamer->switchSection(getObjFileLowering().getImportCallSection());
1031+
1032+
// Section always starts with some magic.
1033+
constexpr char ImpCallMagic[12] = "RetpolineV1";
1034+
OutStreamer->emitBytes(StringRef{ImpCallMagic, sizeof(ImpCallMagic)});
1035+
1036+
// Layout of this section is:
1037+
// Per section that contains an item to record:
1038+
// uint32_t SectionSize: Size in bytes for information in this section.
1039+
// uint32_t Section Number
1040+
// Per call to imported function in section:
1041+
// uint32_t Kind: the kind of item.
1042+
// uint32_t InstOffset: the offset of the instr in its parent section.
1043+
for (auto &[Section, CallsToImportedFuncs] :
1044+
SectionToImportedFunctionCalls) {
1045+
unsigned SectionSize =
1046+
sizeof(uint32_t) * (2 + 2 * CallsToImportedFuncs.size());
1047+
OutStreamer->emitInt32(SectionSize);
1048+
OutStreamer->emitCOFFSecNumber(Section->getBeginSymbol());
1049+
for (auto &[CallsiteSymbol, Kind] : CallsToImportedFuncs) {
1050+
OutStreamer->emitInt32(Kind);
1051+
OutStreamer->emitCOFFSecOffset(CallsiteSymbol);
1052+
}
1053+
}
1054+
}
1055+
10241056
if (usesMSVCFloatingPoint(TT, M)) {
10251057
// In Windows' libcmt.lib, there is a file which is linked in only if the
10261058
// symbol _fltused is referenced. Linking this in causes some

llvm/lib/Target/X86/X86AsmPrinter.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,26 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
3131
bool EmitFPOData = false;
3232
bool ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = false;
3333
bool IndCSPrefix = false;
34+
bool EnableImportCallOptimization = false;
35+
36+
enum ImportCallKind : unsigned {
37+
IMAGE_RETPOLINE_AMD64_IMPORT_BR = 0x02,
38+
IMAGE_RETPOLINE_AMD64_IMPORT_CALL = 0x03,
39+
IMAGE_RETPOLINE_AMD64_INDIR_BR = 0x04,
40+
IMAGE_RETPOLINE_AMD64_INDIR_CALL = 0x05,
41+
IMAGE_RETPOLINE_AMD64_INDIR_BR_REX = 0x06,
42+
IMAGE_RETPOLINE_AMD64_CFG_BR = 0x08,
43+
IMAGE_RETPOLINE_AMD64_CFG_CALL = 0x09,
44+
IMAGE_RETPOLINE_AMD64_CFG_BR_REX = 0x0A,
45+
IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST = 0x010,
46+
IMAGE_RETPOLINE_AMD64_SWITCHTABLE_LAST = 0x01F,
47+
};
48+
struct ImportCallInfo {
49+
MCSymbol *CalleeSymbol;
50+
ImportCallKind Kind;
51+
};
52+
DenseMap<MCSection *, std::vector<ImportCallInfo>>
53+
SectionToImportedFunctionCalls;
3454

3555
// This utility class tracks the length of a stackmap instruction's 'shadow'.
3656
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
@@ -45,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
4565
void startFunction(MachineFunction &MF) {
4666
this->MF = &MF;
4767
}
48-
void count(MCInst &Inst, const MCSubtargetInfo &STI,
68+
void count(const MCInst &Inst, const MCSubtargetInfo &STI,
4969
MCCodeEmitter *CodeEmitter);
5070

5171
// Called to signal the start of a shadow of RequiredSize bytes.
@@ -126,6 +146,17 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
126146
void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI,
127147
MCSymbol *LazyPointer) override;
128148

149+
void emitCallInstruction(const llvm::MCInst &MCI);
150+
151+
// Emits a label to mark the next instruction as being relevant to Import Call
152+
// Optimization.
153+
void emitLabelAndRecordForImportCallOptimization(ImportCallKind Kind);
154+
155+
// Ensure that rax is used as the operand for the given instruction.
156+
//
157+
// NOTE: This assumes that it is safe to clobber rax.
158+
void ensureRaxUsedForOperand(MCInst &TmpInst);
159+
129160
public:
130161
X86AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
131162

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18922,7 +18922,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1892218922

1892318923
SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
1892418924
SelectionDAG &DAG) const {
18925-
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
18925+
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
1892618926
}
1892718927

1892818928
SDValue
@@ -18950,7 +18950,8 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1895018950
/// Creates target global address or external symbol nodes for calls or
1895118951
/// other uses.
1895218952
SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
18953-
bool ForCall) const {
18953+
bool ForCall,
18954+
bool *IsImpCall) const {
1895418955
// Unpack the global address or external symbol.
1895518956
SDLoc dl(Op);
1895618957
const GlobalValue *GV = nullptr;
@@ -19000,6 +19001,16 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1900019001
if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0)
1900119002
return Result;
1900219003

19004+
// If Import Call Optimization is enabled and this is an imported function
19005+
// then make a note of it and return the global address without wrapping.
19006+
if (IsImpCall && (OpFlags == X86II::MO_DLLIMPORT) &&
19007+
Mod.getModuleFlag("import-call-optimization")) {
19008+
assert(ForCall && "Should only enable import call optimization if we are "
19009+
"lowering a call");
19010+
*IsImpCall = true;
19011+
return Result;
19012+
}
19013+
1900319014
Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);
1900419015

1900519016
// With PIC, the address is actually $g + Offset.
@@ -19025,7 +19036,7 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1902519036

1902619037
SDValue
1902719038
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
19028-
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
19039+
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
1902919040
}
1903019041

1903119042
static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
@@ -34562,6 +34573,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3456234573
NODE_NAME_CASE(FST)
3456334574
NODE_NAME_CASE(CALL)
3456434575
NODE_NAME_CASE(CALL_RVMARKER)
34576+
NODE_NAME_CASE(IMP_CALL)
3456534577
NODE_NAME_CASE(BT)
3456634578
NODE_NAME_CASE(CMP)
3456734579
NODE_NAME_CASE(FCMP)

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ namespace llvm {
8181
// marker instruction.
8282
CALL_RVMARKER,
8383

84+
// Pseudo for a call to an imported function to ensure the correct machine
85+
// instruction is emitted for Import Call Optimization.
86+
IMP_CALL,
87+
8488
/// X86 compare and logical compare instructions.
8589
CMP,
8690
FCMP,
@@ -1733,8 +1737,8 @@ namespace llvm {
17331737

17341738
/// Creates target global address or external symbol nodes for calls or
17351739
/// other uses.
1736-
SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1737-
bool ForCall) const;
1740+
SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall,
1741+
bool *IsImpCall) const;
17381742

17391743
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
17401744
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2402,6 +2402,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24022402
InGlue = Chain.getValue(1);
24032403
}
24042404

2405+
bool IsImpCall = false;
24052406
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
24062407
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
24072408
// In the 64-bit large code model, we have to make all calls
@@ -2414,7 +2415,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24142415
// ForCall to true here has the effect of removing WrapperRIP when possible
24152416
// to allow direct calls to be selected without first materializing the
24162417
// address into a register.
2417-
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2418+
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
24182419
} else if (Subtarget.isTarget64BitILP32() &&
24192420
Callee.getValueType() == MVT::i32) {
24202421
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -2536,7 +2537,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
25362537

25372538
// Returns a chain & a glue for retval copy to use.
25382539
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2539-
if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2540+
if (IsImpCall) {
2541+
Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2542+
} else if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
25402543
Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
25412544
} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
25422545
// Calls with a "clang.arc.attachedcall" bundle are special. They should be

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,8 @@ def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)),
13091309
def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
13101310
(CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>;
13111311

1312+
def : Pat<(X86imp_call (i64 tglobaladdr:$dst)),
1313+
(CALL64pcrel32 tglobaladdr:$dst)>;
13121314

13131315
// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
13141316
// can never use callee-saved registers. That is the purpose of the GR64_TC

llvm/lib/Target/X86/X86InstrFragments.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
210210
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
211211
SDNPVariadic]>;
212212

213+
def X86imp_call : SDNode<"X86ISD::IMP_CALL", SDT_X86Call,
214+
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
215+
SDNPVariadic]>;
213216

214217
def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call,
215218
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,

0 commit comments

Comments
 (0)