Skip to content

Commit 2caa871

Browse files
committed
Merging r325049:
------------------------------------------------------------------------ r325049 | rnk | 2018-02-13 12:47:49 -0800 (Tue, 13 Feb 2018) | 17 lines [X86] Use EDI for retpoline when no scratch regs are left Summary: Instead of solving the hard problem of how to pass the callee to the indirect jump thunk without a register, just use a CSR. At a call boundary, there's nothing stopping us from using a CSR to hold the callee as long as we save and restore it in the prologue. Also, add tests for this mregparm=3 case. I wrote execution tests for __llvm_retpoline_push, but they never got committed as lit tests, either because I never rewrote them or because they got lost in merge conflicts. Reviewers: chandlerc, dwmw2 Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D43214 ------------------------------------------------------------------------ llvm-svn: 325090
1 parent 940d56c commit 2caa871

File tree

4 files changed

+76
-72
lines changed

4 files changed

+76
-72
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26265,9 +26265,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2626526265
// attempt to help out kernels and other systems where duplicating the
2626626266
// thunks is costly.
2626726267
switch (Reg) {
26268-
case 0:
26269-
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
26270-
return "__x86_indirect_thunk";
2627126268
case X86::EAX:
2627226269
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2627326270
return "__x86_indirect_thunk_eax";
@@ -26277,6 +26274,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2627726274
case X86::EDX:
2627826275
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2627926276
return "__x86_indirect_thunk_edx";
26277+
case X86::EDI:
26278+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26279+
return "__x86_indirect_thunk_edi";
2628026280
case X86::R11:
2628126281
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
2628226282
return "__x86_indirect_thunk_r11";
@@ -26286,9 +26286,6 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2628626286

2628726287
// When targeting an internal COMDAT thunk use an LLVM-specific name.
2628826288
switch (Reg) {
26289-
case 0:
26290-
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
26291-
return "__llvm_retpoline_push";
2629226289
case X86::EAX:
2629326290
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2629426291
return "__llvm_retpoline_eax";
@@ -26298,6 +26295,9 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2629826295
case X86::EDX:
2629926296
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
2630026297
return "__llvm_retpoline_edx";
26298+
case X86::EDI:
26299+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26300+
return "__llvm_retpoline_edi";
2630126301
case X86::R11:
2630226302
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
2630326303
return "__llvm_retpoline_r11";
@@ -26319,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2631926319
// just use R11, but we scan for uses anyway to ensure we don't generate
2632026320
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
2632126321
// already a register use operand to the call to hold the callee. If none
26322-
// are available, push the callee instead. This is less efficient, but is
26323-
// necessary for functions using 3 regparms. Such function calls are
26324-
// (currently) not eligible for tail call optimization, because there is no
26325-
// scratch register available to hold the address of the callee.
26322+
// are available, use EDI instead. EDI is chosen because EBX is the PIC base
26323+
// register and ESI is the base pointer to realigned stack frames with VLAs.
2632626324
SmallVector<unsigned, 3> AvailableRegs;
2632726325
if (Subtarget.is64Bit())
2632826326
AvailableRegs.push_back(X86::R11);
2632926327
else
26330-
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
26328+
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
2633126329

2633226330
// Zero out any registers that are already used.
2633326331
for (const auto &MO : MI.operands()) {
@@ -26345,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2634526343
break;
2634626344
}
2634726345
}
26346+
if (!AvailableReg)
26347+
report_fatal_error("calling convention incompatible with retpoline, no "
26348+
"available registers");
2634826349

2634926350
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
2635026351

26351-
if (AvailableReg == 0) {
26352-
// No register available. Use PUSH. This must not be a tailcall, and this
26353-
// must not be x64.
26354-
if (Subtarget.is64Bit())
26355-
report_fatal_error(
26356-
"Cannot make an indirect call on x86-64 using both retpoline and a "
26357-
"calling convention that preservers r11");
26358-
if (Opc != X86::CALLpcrel32)
26359-
report_fatal_error("Cannot make an indirect tail call on x86 using "
26360-
"retpoline without a preserved register");
26361-
BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
26362-
MI.getOperand(0).ChangeToES(Symbol);
26363-
MI.setDesc(TII->get(Opc));
26364-
} else {
26365-
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26366-
.addReg(CalleeVReg);
26367-
MI.getOperand(0).ChangeToES(Symbol);
26368-
MI.setDesc(TII->get(Opc));
26369-
MachineInstrBuilder(*BB->getParent(), &MI)
26370-
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
26371-
}
26352+
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26353+
.addReg(CalleeVReg);
26354+
MI.getOperand(0).ChangeToES(Symbol);
26355+
MI.setDesc(TII->get(Opc));
26356+
MachineInstrBuilder(*BB->getParent(), &MI)
26357+
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
2637226358
return BB;
2637326359
}
2637426360

llvm/lib/Target/X86/X86RetpolineThunks.cpp

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
4343
static const char EAXThunkName[] = "__llvm_retpoline_eax";
4444
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
4545
static const char EDXThunkName[] = "__llvm_retpoline_edx";
46-
static const char PushThunkName[] = "__llvm_retpoline_push";
46+
static const char EDIThunkName[] = "__llvm_retpoline_edi";
4747

4848
namespace {
4949
class X86RetpolineThunks : public MachineFunctionPass {
@@ -127,7 +127,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
127127
createThunkFunction(M, R11ThunkName);
128128
else
129129
for (StringRef Name :
130-
{EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
130+
{EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
131131
createThunkFunction(M, Name);
132132
InsertedThunks = true;
133133
return true;
@@ -151,9 +151,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
151151
populateThunk(MF, X86::R11);
152152
} else {
153153
// For 32-bit targets we need to emit a collection of thunks for various
154-
// possible scratch registers as well as a fallback that is used when
155-
// there are no scratch registers and assumes the retpoline target has
156-
// been pushed.
154+
// possible scratch registers as well as a fallback that uses EDI, which is
155+
// normally callee saved.
157156
// __llvm_retpoline_eax:
158157
// calll .Leax_call_target
159158
// .Leax_capture_spec:
@@ -174,32 +173,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
174173
// movl %edx, (%esp)
175174
// retl
176175
//
177-
// This last one is a bit more special and so needs a little extra
178-
// handling.
179-
// __llvm_retpoline_push:
180-
// calll .Lpush_call_target
181-
// .Lpush_capture_spec:
182-
// pause
183-
// lfence
184-
// jmp .Lpush_capture_spec
185-
// .align 16
186-
// .Lpush_call_target:
187-
// # Clear pause_loop return address.
188-
// addl $4, %esp
189-
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
190-
// pushl 4(%esp) # Push callee
191-
// pushl 4(%esp) # Push RA
192-
// popl 8(%esp) # Pop RA to final RA
193-
// popl (%esp) # Pop callee to next top of stack
194-
// retl # Ret to callee
176+
// __llvm_retpoline_edi:
177+
// ... # Same setup
178+
// movl %edi, (%esp)
179+
// retl
195180
if (MF.getName() == EAXThunkName)
196181
populateThunk(MF, X86::EAX);
197182
else if (MF.getName() == ECXThunkName)
198183
populateThunk(MF, X86::ECX);
199184
else if (MF.getName() == EDXThunkName)
200185
populateThunk(MF, X86::EDX);
201-
else if (MF.getName() == PushThunkName)
202-
populateThunk(MF);
186+
else if (MF.getName() == EDIThunkName)
187+
populateThunk(MF, X86::EDI);
203188
else
204189
llvm_unreachable("Invalid thunk name on x86-32!");
205190
}
@@ -301,11 +286,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
301286
CaptureSpec->addSuccessor(CaptureSpec);
302287

303288
CallTarget->setAlignment(4);
304-
if (Reg) {
305-
insertRegReturnAddrClobber(*CallTarget, *Reg);
306-
} else {
307-
assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
308-
insert32BitPushReturnAddrClobber(*CallTarget);
309-
}
289+
insertRegReturnAddrClobber(*CallTarget, *Reg);
310290
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
311291
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
2+
3+
; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
4+
; because there are no available scratch registers. The Linux kernel builds
5+
; with -mregparm=3, so we need to support it. TCO should fail because we need
6+
; to restore EDI.
7+
8+
define void @call_edi(void (i32, i32, i32)* %fp) #0 {
9+
entry:
10+
tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
11+
ret void
12+
}
13+
14+
; CHECK-LABEL: call_edi:
15+
; EDI is used, so it must be saved.
16+
; CHECK: pushl %edi
17+
; CHECK-DAG: xorl %eax, %eax
18+
; CHECK-DAG: xorl %edx, %edx
19+
; CHECK-DAG: xorl %ecx, %ecx
20+
; CHECK-DAG: movl {{.*}}, %edi
21+
; CHECK: calll __llvm_retpoline_edi
22+
; CHECK: popl %edi
23+
; CHECK: retl
24+
25+
define void @edi_external(void (i32, i32, i32)* %fp) #1 {
26+
entry:
27+
tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
28+
ret void
29+
}
30+
31+
; CHECK-LABEL: edi_external:
32+
; CHECK: pushl %edi
33+
; CHECK-DAG: xorl %eax, %eax
34+
; CHECK-DAG: xorl %edx, %edx
35+
; CHECK-DAG: xorl %ecx, %ecx
36+
; CHECK-DAG: movl {{.*}}, %edi
37+
; CHECK: calll __x86_indirect_thunk_edi
38+
; CHECK: popl %edi
39+
; CHECK: retl
40+
41+
attributes #0 = { "target-features"="+retpoline" }
42+
attributes #1 = { "target-features"="+retpoline-external-thunk" }

llvm/test/CodeGen/X86/retpoline.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,10 @@ latch:
336336
; X86-NEXT: movl %edx, (%esp)
337337
; X86-NEXT: retl
338338
;
339-
; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
340-
; X86-NEXT: .hidden __llvm_retpoline_push
341-
; X86-NEXT: .weak __llvm_retpoline_push
342-
; X86: __llvm_retpoline_push:
339+
; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
340+
; X86-NEXT: .hidden __llvm_retpoline_edi
341+
; X86-NEXT: .weak __llvm_retpoline_edi
342+
; X86: __llvm_retpoline_edi:
343343
; X86-NEXT: # {{.*}} # %entry
344344
; X86-NEXT: calll [[CALL_TARGET:.*]]
345345
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
@@ -351,11 +351,7 @@ latch:
351351
; X86-NEXT: .p2align 4, 0x90
352352
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
353353
; X86-NEXT: # %entry
354-
; X86-NEXT: addl $4, %esp
355-
; X86-NEXT: pushl 4(%esp)
356-
; X86-NEXT: pushl 4(%esp)
357-
; X86-NEXT: popl 8(%esp)
358-
; X86-NEXT: popl (%esp)
354+
; X86-NEXT: movl %edi, (%esp)
359355
; X86-NEXT: retl
360356

361357

0 commit comments

Comments
 (0)