Skip to content

[ARM] Be more precise about conditions for indirect tail-calls #102451

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3024,18 +3024,27 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(

assert(Subtarget->supportsTailCall());

// Indirect tail calls cannot be optimized for Thumb1 if the args
// to the call take up r0-r3. The reason is that there are no legal registers
// left to hold the pointer to the function to be called.
// Similarly, if the function uses return address sign and authentication,
// r12 is needed to hold the PAC and is not available to hold the callee
// address.
if (Outs.size() >= 4 &&
(!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
if (Subtarget->isThumb1Only())
return false;
// Conservatively assume the function spills LR.
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
// Indirect tail-calls require a register to hold the target address. That
// register must be:
// * Allocatable (i.e. r0-r7 if the target is Thumb1).
// * Not callee-saved, so must be one of r0-r3 or r12.
// * Not used to hold an argument to the tail-called function, which might be
// in r0-r3.
// * Not used to hold the return address authentication code, which is in r12
// if enabled.
// Sometimes, no register matches all of these conditions, so we can't do a
// tail-call.
if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
SmallSet<MCPhysReg, 5> AddressRegisters;
for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
AddressRegisters.insert(R);
if (!(Subtarget->isThumb1Only() ||
MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)))
AddressRegisters.insert(ARM::R12);
for (const CCValAssign &AL : ArgLocs)
if (AL.isRegLoc())
AddressRegisters.erase(AL.getLocReg());
if (AddressRegisters.empty())
return false;
}

Expand Down
111 changes: 111 additions & 0 deletions llvm/test/CodeGen/Thumb2/indirect-tail-call-free-registers.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+vfp4 | FileCheck %s

;; No outgoing arguments, plenty of free registers to hold the target address.
define void @test0(ptr %fptr) {
; CHECK-LABEL: test0:
; CHECK: bx {{r0|r1|r2|r3|r12}}
entry:
tail call void %fptr()
ret void
}

;; Four integer outgoing arguments, which use up r0-r3.
define void @test1(ptr %fptr) {
; CHECK-LABEL: test1:
; CHECK: bx r12
entry:
tail call void %fptr(i32 0, i32 0, i32 0, i32 0)
ret void
}

;; Four integer outgoing arguments, which use up r0-r3, and sign-return-address
;; uses r12, so we can never tail-call this.
define void @test2(ptr %fptr) "sign-return-address"="all" {
; CHECK-LABEL: test2:
; CHECK: blx
entry:
tail call void %fptr(i32 0, i32 0, i32 0, i32 0)
ret void
}

;; An i32 and an i64 argument, which uses r0, r2 and r3 for arguments, leaving
;; r1 free for the address.
define void @test3(ptr %fptr) {
; CHECK-LABEL: test3:
; CHECK: bx {{r1|r12}}
entry:
tail call void %fptr(i32 0, i64 0)
ret void
}

;; Four float arguments, using the soft-float calling convention, which uses
;; r0-r3.
define void @test4(ptr %fptr) {
; CHECK-LABEL: test4:
; CHECK: bx r12
entry:
tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
ret void
}

;; Four float arguments, using the soft-float calling convention, which uses
;; r0-r3, and sign-return-address uses r12. Currently fails with "ran out of
;; registers during register allocation".
define void @test5(ptr %fptr) "sign-return-address"="all" {
; CHECK-LABEL: test5:
; CHECK: blx
entry:
tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
ret void
}

;; Four float arguments, using the hard-float calling convention, which uses
;; s0-s3, leaving the all of the integer registers free for the address.
define void @test6(ptr %fptr) {
; CHECK-LABEL: test6:
; CHECK: bx {{r0|r1|r2|r3|r12}}
entry:
tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
ret void
}

;; Four float arguments, using the hard-float calling convention, which uses
;; s0-s3, leaving r0-r3 free for the address, with r12 used for
;; sign-return-address.
define void @test7(ptr %fptr) "sign-return-address"="all" {
; CHECK-LABEL: test7:
; CHECK: bx {{r0|r1|r2|r3}}
entry:
tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
ret void
}

;; Two double arguments, using the soft-float calling convention, which uses
;; r0-r3.
define void @test8(ptr %fptr) {
; CHECK-LABEL: test8:
; CHECK: bx r12
entry:
tail call arm_aapcscc void %fptr(double 0.0, double 0.0)
ret void
}

;; Two double arguments, using the soft-float calling convention, which uses
;; r0-r3, and sign-return-address uses r12, so we can't tail-call this.
define void @test9(ptr %fptr) "sign-return-address"="all" {
; CHECK-LABEL: test9:
; CHECK: blx
entry:
tail call arm_aapcscc void %fptr(double 0.0, double 0.0)
ret void
}

;; Four integer arguments (one on the stack), but dut to alignment r1 is left
;; empty, so can be used for the tail-call.
define void @test10(ptr %fptr, i64 %b, i32 %c) "sign-return-address"="all" {
; CHECK-LABEL: test10:
; CHECK: bx r1
entry:
tail call void %fptr(i32 0, i64 %b, i32 %c)
ret void
}
Loading