Skip to content

[X86][IPRA] Add getIPRACSRegs since frame registers are risked to be optimized out. #109597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/include/llvm/CodeGen/TargetRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,16 @@ class TargetRegisterInfo : public MCRegisterInfo {
virtual const MCPhysReg*
getCalleeSavedRegs(const MachineFunction *MF) const = 0;

/// Return a null-terminated list of all of the callee-saved registers on
/// this target when IPRA is on. The list should include any non-allocatable
/// registers that the backend uses and assumes will be saved by all calling
/// conventions. This is typically the ISA-standard frame pointer, but could
/// include the thread pointer, TOC pointer, or base pointer for different
/// targets.
virtual const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const {
return nullptr;
}

/// Return a mask of call-preserved registers for the given calling convention
/// on the current function. The mask should include all call-preserved
/// aliases. This is used by the register allocator to determine which
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,18 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
// saved registers.
SavedRegs.resize(TRI.getNumRegs());

// When interprocedural register allocation is enabled caller saved registers
// are preferred over callee saved registers.
// Get the callee saved register list...
const MCPhysReg *CSRegs = nullptr;

// When interprocedural register allocation is enabled, callee saved register
// list should be empty, since caller saved registers are preferred over
// callee saved registers. Unless it has some risked CSR to be optimized out.
if (MF.getTarget().Options.EnableIPRA &&
isSafeForNoCSROpt(MF.getFunction()) &&
isProfitableForNoCSROpt(MF.getFunction()))
return;

// Get the callee saved register list...
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
CSRegs = TRI.getIPRACSRegs(&MF);
else
CSRegs = MF.getRegInfo().getCalleeSavedRegs();

// Early exit if there are no callee saved registers.
if (!CSRegs || CSRegs[0] == 0)
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,9 @@ def CC_X86 : CallingConv<[

def CSR_NoRegs : CalleeSavedRegs<(add)>;

def CSR_IPRA_32 : CalleeSavedRegs<(add EBP, ESI)>;
def CSR_IPRA_64 : CalleeSavedRegs<(add RBP, RBX)>;

def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
}

const MCPhysReg *
X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
}

const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86RegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
/// callee-save registers on this target.
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction* MF) const override;
/// getIPRACSRegs - This API can be removed when rbp is safe to optimized out
/// when IPRA is on.
const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override;
const MCPhysReg *
getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
Expand Down
152 changes: 152 additions & 0 deletions llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 %s
; RUN: llc --mtriple=i386-- < %s | FileCheck --check-prefix=X86 %s

; This test is to ensure rbp/rbx/ebp/esi is correctly saved/restored before clobbered when enable ipra.

define internal void @callee_clobber_rbp() nounwind norecurse {
; X64-LABEL: callee_clobber_rbp:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: #APP
; X64-NEXT: xorl %ebp, %ebp
; X64-NEXT: #NO_APP
; X64-NEXT: popq %rbp
; X64-NEXT: retq
;
; X86-LABEL: callee_clobber_rbp:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: #APP
; X86-NEXT: xorl %ebp, %ebp
; X86-NEXT: #NO_APP
; X86-NEXT: popl %ebp
; X86-NEXT: retl
call void asm sideeffect "xor %ebp, %ebp", "~{ebp}"()
ret void
}

define internal void @callee_clobber_rbx(ptr %addr) nounwind norecurse {
; X64-LABEL: callee_clobber_rbx:
; X64: # %bb.0:
; X64-NEXT: pushq %rbx
; X64-NEXT: #APP
; X64-NEXT: xorl %ebx, %ebx
; X64-NEXT: #NO_APP
; X64-NEXT: popq %rbx
; X64-NEXT: retq
call void asm sideeffect "xor %ebx, %ebx", "~{ebx}"()
ret void
}

define internal void @callee_clobber_esi(ptr %addr) nounwind norecurse {
; X86-LABEL: callee_clobber_esi:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: #APP
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: #NO_APP
; X86-NEXT: popl %esi
; X86-NEXT: retl
call void asm sideeffect "xor %esi, %esi", "~{esi}"()
ret void
}

define void @caller_use_rbp() "frame-pointer"="all" nounwind {
; X64-LABEL: caller_use_rbp:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: subq $16, %rsp
; X64-NEXT: callq callee_clobber_rbp
; X64-NEXT: movl $5, -4(%rbp)
; X64-NEXT: addq $16, %rsp
; X64-NEXT: popq %rbp
; X64-NEXT: retq
;
; X86-LABEL: caller_use_rbp:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %eax
; X86-NEXT: calll callee_clobber_rbp
; X86-NEXT: movl $5, -4(%ebp)
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
call void @callee_clobber_rbp()
%addr = alloca i32, align 4
store i32 5, ptr %addr, align 4
ret void
}

define void @caller_use_rbx(i32 %X) nounwind ssp {
; X64-LABEL: caller_use_rbx:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: movq %rsp, %rbp
; X64-NEXT: pushq %rbx
; X64-NEXT: andq $-32, %rsp
; X64-NEXT: subq $64, %rsp
; X64-NEXT: movq %rsp, %rbx
; X64-NEXT: movq __stack_chk_guard(%rip), %rax
; X64-NEXT: movq %rax, 32(%rbx)
; X64-NEXT: movq %rsp, %rax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: leaq 15(,%rcx,4), %rcx
; X64-NEXT: andq $-16, %rcx
; X64-NEXT: subq %rcx, %rax
; X64-NEXT: movq %rax, %rsp
; X64-NEXT: movq %rbx, %rdi
; X64-NEXT: callq callee_clobber_rbx
; X64-NEXT: movq __stack_chk_guard(%rip), %rax
; X64-NEXT: cmpq 32(%rbx), %rax
; X64-NEXT: jne .LBB4_2
; X64-NEXT: # %bb.1:
; X64-NEXT: leaq -8(%rbp), %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %rbp
; X64-NEXT: retq
; X64-NEXT: .LBB4_2:
; X64-NEXT: callq __stack_chk_fail@PLT
%realign = alloca i32, align 32
%addr = alloca i32, i32 %X
call void @callee_clobber_rbx(ptr %realign)
ret void
}

define void @caller_use_esi(i32 %X) nounwind ssp {
; X86-LABEL: caller_use_esi:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-32, %esp
; X86-NEXT: subl $32, %esp
; X86-NEXT: movl %esp, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl __stack_chk_guard, %ecx
; X86-NEXT: movl %ecx, 16(%esi)
; X86-NEXT: movl %esp, %ecx
; X86-NEXT: shll $2, %eax
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl %ecx, %esp
; X86-NEXT: movl %esi, %eax
; X86-NEXT: pushl %eax
; X86-NEXT: calll callee_clobber_esi
; X86-NEXT: addl $4, %esp
; X86-NEXT: movl __stack_chk_guard, %eax
; X86-NEXT: cmpl 16(%esi), %eax
; X86-NEXT: jne .LBB5_2
; X86-NEXT: # %bb.1:
; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %ebp
; X86-NEXT: retl
; X86-NEXT: .LBB5_2:
; X86-NEXT: calll __stack_chk_fail
%realign = alloca i32, align 32
%addr = alloca i32, i32 %X
call void @callee_clobber_esi(ptr %realign)
ret void
}
Loading