Skip to content

Commit 14b567d

Browse files
authored
[X86][IPRA] Add getIPRACSRegs since frame registers are risked to be optimized out. (#109597)
X86 IPRA had below correctness issue: https://gcc.godbolt.org/z/6hh88xv9r This patch is a workaround to fix it.
1 parent e237d8a commit 14b567d

File tree

6 files changed

+182
-6
lines changed

6 files changed

+182
-6
lines changed

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,16 @@ class TargetRegisterInfo : public MCRegisterInfo {
489489
virtual const MCPhysReg*
490490
getCalleeSavedRegs(const MachineFunction *MF) const = 0;
491491

492+
/// Return a null-terminated list of all of the callee-saved registers on
493+
/// this target when IPRA is on. The list should include any non-allocatable
494+
/// registers that the backend uses and assumes will be saved by all calling
495+
/// conventions. This is typically the ISA-standard frame pointer, but could
496+
/// include the thread pointer, TOC pointer, or base pointer for different
497+
/// targets.
498+
virtual const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const {
499+
return nullptr;
500+
}
501+
492502
/// Return a mask of call-preserved registers for the given calling convention
493503
/// on the current function. The mask should include all call-preserved
494504
/// aliases. This is used by the register allocator to determine which

llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,18 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
103103
// saved registers.
104104
SavedRegs.resize(TRI.getNumRegs());
105105

106-
// When interprocedural register allocation is enabled caller saved registers
107-
// are preferred over callee saved registers.
106+
// Get the callee saved register list...
107+
const MCPhysReg *CSRegs = nullptr;
108+
109+
// When interprocedural register allocation is enabled, callee saved register
110+
// list should be empty, since caller saved registers are preferred over
111+
// callee saved registers. Unless it has some risked CSR to be optimized out.
108112
if (MF.getTarget().Options.EnableIPRA &&
109113
isSafeForNoCSROpt(MF.getFunction()) &&
110114
isProfitableForNoCSROpt(MF.getFunction()))
111-
return;
112-
113-
// Get the callee saved register list...
114-
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
115+
CSRegs = TRI.getIPRACSRegs(&MF);
116+
else
117+
CSRegs = MF.getRegInfo().getCalleeSavedRegs();
115118

116119
// Early exit if there are no callee saved registers.
117120
if (!CSRegs || CSRegs[0] == 0)

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,9 @@ def CC_X86 : CallingConv<[
11041104

11051105
def CSR_NoRegs : CalleeSavedRegs<(add)>;
11061106

1107+
def CSR_IPRA_32 : CalleeSavedRegs<(add EBP, ESI)>;
1108+
def CSR_IPRA_64 : CalleeSavedRegs<(add RBP, RBX)>;
1109+
11071110
def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
11081111
def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;
11091112

llvm/lib/Target/X86/X86RegisterInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
410410
return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
411411
}
412412

413+
const MCPhysReg *
414+
X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
415+
return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
416+
}
417+
413418
const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
414419
const MachineFunction *MF) const {
415420
assert(MF && "Invalid MachineFunction pointer.");

llvm/lib/Target/X86/X86RegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
9999
/// callee-save registers on this target.
100100
const MCPhysReg *
101101
getCalleeSavedRegs(const MachineFunction* MF) const override;
102+
/// getIPRACSRegs - This API can be removed when rbp is safe to optimized out
103+
/// when IPRA is on.
104+
const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override;
102105
const MCPhysReg *
103106
getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
104107
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 %s
3+
; RUN: llc --mtriple=i386-- < %s | FileCheck --check-prefix=X86 %s
4+
5+
; This test is to ensure rbp/rbx/ebp/esi is correctly saved/restored before clobbered when enable ipra.
6+
7+
define internal void @callee_clobber_rbp() nounwind norecurse {
8+
; X64-LABEL: callee_clobber_rbp:
9+
; X64: # %bb.0:
10+
; X64-NEXT: pushq %rbp
11+
; X64-NEXT: #APP
12+
; X64-NEXT: xorl %ebp, %ebp
13+
; X64-NEXT: #NO_APP
14+
; X64-NEXT: popq %rbp
15+
; X64-NEXT: retq
16+
;
17+
; X86-LABEL: callee_clobber_rbp:
18+
; X86: # %bb.0:
19+
; X86-NEXT: pushl %ebp
20+
; X86-NEXT: #APP
21+
; X86-NEXT: xorl %ebp, %ebp
22+
; X86-NEXT: #NO_APP
23+
; X86-NEXT: popl %ebp
24+
; X86-NEXT: retl
25+
call void asm sideeffect "xor %ebp, %ebp", "~{ebp}"()
26+
ret void
27+
}
28+
29+
define internal void @callee_clobber_rbx(ptr %addr) nounwind norecurse {
30+
; X64-LABEL: callee_clobber_rbx:
31+
; X64: # %bb.0:
32+
; X64-NEXT: pushq %rbx
33+
; X64-NEXT: #APP
34+
; X64-NEXT: xorl %ebx, %ebx
35+
; X64-NEXT: #NO_APP
36+
; X64-NEXT: popq %rbx
37+
; X64-NEXT: retq
38+
call void asm sideeffect "xor %ebx, %ebx", "~{ebx}"()
39+
ret void
40+
}
41+
42+
define internal void @callee_clobber_esi(ptr %addr) nounwind norecurse {
43+
; X86-LABEL: callee_clobber_esi:
44+
; X86: # %bb.0:
45+
; X86-NEXT: pushl %esi
46+
; X86-NEXT: #APP
47+
; X86-NEXT: xorl %esi, %esi
48+
; X86-NEXT: #NO_APP
49+
; X86-NEXT: popl %esi
50+
; X86-NEXT: retl
51+
call void asm sideeffect "xor %esi, %esi", "~{esi}"()
52+
ret void
53+
}
54+
55+
define void @caller_use_rbp() "frame-pointer"="all" nounwind {
56+
; X64-LABEL: caller_use_rbp:
57+
; X64: # %bb.0:
58+
; X64-NEXT: pushq %rbp
59+
; X64-NEXT: movq %rsp, %rbp
60+
; X64-NEXT: subq $16, %rsp
61+
; X64-NEXT: callq callee_clobber_rbp
62+
; X64-NEXT: movl $5, -4(%rbp)
63+
; X64-NEXT: addq $16, %rsp
64+
; X64-NEXT: popq %rbp
65+
; X64-NEXT: retq
66+
;
67+
; X86-LABEL: caller_use_rbp:
68+
; X86: # %bb.0:
69+
; X86-NEXT: pushl %ebp
70+
; X86-NEXT: movl %esp, %ebp
71+
; X86-NEXT: pushl %eax
72+
; X86-NEXT: calll callee_clobber_rbp
73+
; X86-NEXT: movl $5, -4(%ebp)
74+
; X86-NEXT: addl $4, %esp
75+
; X86-NEXT: popl %ebp
76+
; X86-NEXT: retl
77+
call void @callee_clobber_rbp()
78+
%addr = alloca i32, align 4
79+
store i32 5, ptr %addr, align 4
80+
ret void
81+
}
82+
83+
define void @caller_use_rbx(i32 %X) nounwind ssp {
84+
; X64-LABEL: caller_use_rbx:
85+
; X64: # %bb.0:
86+
; X64-NEXT: pushq %rbp
87+
; X64-NEXT: movq %rsp, %rbp
88+
; X64-NEXT: pushq %rbx
89+
; X64-NEXT: andq $-32, %rsp
90+
; X64-NEXT: subq $64, %rsp
91+
; X64-NEXT: movq %rsp, %rbx
92+
; X64-NEXT: movq __stack_chk_guard(%rip), %rax
93+
; X64-NEXT: movq %rax, 32(%rbx)
94+
; X64-NEXT: movq %rsp, %rax
95+
; X64-NEXT: movl %edi, %ecx
96+
; X64-NEXT: leaq 15(,%rcx,4), %rcx
97+
; X64-NEXT: andq $-16, %rcx
98+
; X64-NEXT: subq %rcx, %rax
99+
; X64-NEXT: movq %rax, %rsp
100+
; X64-NEXT: movq %rbx, %rdi
101+
; X64-NEXT: callq callee_clobber_rbx
102+
; X64-NEXT: movq __stack_chk_guard(%rip), %rax
103+
; X64-NEXT: cmpq 32(%rbx), %rax
104+
; X64-NEXT: jne .LBB4_2
105+
; X64-NEXT: # %bb.1:
106+
; X64-NEXT: leaq -8(%rbp), %rsp
107+
; X64-NEXT: popq %rbx
108+
; X64-NEXT: popq %rbp
109+
; X64-NEXT: retq
110+
; X64-NEXT: .LBB4_2:
111+
; X64-NEXT: callq __stack_chk_fail@PLT
112+
%realign = alloca i32, align 32
113+
%addr = alloca i32, i32 %X
114+
call void @callee_clobber_rbx(ptr %realign)
115+
ret void
116+
}
117+
118+
define void @caller_use_esi(i32 %X) nounwind ssp {
119+
; X86-LABEL: caller_use_esi:
120+
; X86: # %bb.0:
121+
; X86-NEXT: pushl %ebp
122+
; X86-NEXT: movl %esp, %ebp
123+
; X86-NEXT: pushl %esi
124+
; X86-NEXT: andl $-32, %esp
125+
; X86-NEXT: subl $32, %esp
126+
; X86-NEXT: movl %esp, %esi
127+
; X86-NEXT: movl 8(%ebp), %eax
128+
; X86-NEXT: movl __stack_chk_guard, %ecx
129+
; X86-NEXT: movl %ecx, 16(%esi)
130+
; X86-NEXT: movl %esp, %ecx
131+
; X86-NEXT: shll $2, %eax
132+
; X86-NEXT: subl %eax, %ecx
133+
; X86-NEXT: movl %ecx, %esp
134+
; X86-NEXT: movl %esi, %eax
135+
; X86-NEXT: pushl %eax
136+
; X86-NEXT: calll callee_clobber_esi
137+
; X86-NEXT: addl $4, %esp
138+
; X86-NEXT: movl __stack_chk_guard, %eax
139+
; X86-NEXT: cmpl 16(%esi), %eax
140+
; X86-NEXT: jne .LBB5_2
141+
; X86-NEXT: # %bb.1:
142+
; X86-NEXT: leal -4(%ebp), %esp
143+
; X86-NEXT: popl %esi
144+
; X86-NEXT: popl %ebp
145+
; X86-NEXT: retl
146+
; X86-NEXT: .LBB5_2:
147+
; X86-NEXT: calll __stack_chk_fail
148+
%realign = alloca i32, align 32
149+
%addr = alloca i32, i32 %X
150+
call void @callee_clobber_esi(ptr %realign)
151+
ret void
152+
}

0 commit comments

Comments
 (0)