Skip to content

Commit 50a2b31

Browse files
authored
[ARM] Be more precise about conditions for indirect tail-calls (#102451)
This code was trying to predict the conditions in which an indirect tail call will have a free register to hold the target address, and falling back to a non-tail call if all non-callee-saved registers are used for arguments or return address authentication. However, it was only taking the number of arguments into account, not which registers they are allocated to, so floating-point arguments could cause this to give the wrong result, causing either a later error due to the lack of a free register, or a missed optimisation of not doing the tail call. The assignments of arguments to registers is available at this point in the code, so we can calculate exactly which registers will be available for the tail-call.
1 parent d38bae3 commit 50a2b31

File tree

2 files changed

+132
-12
lines changed

2 files changed

+132
-12
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3024,18 +3024,27 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30243024

30253025
assert(Subtarget->supportsTailCall());
30263026

3027-
// Indirect tail calls cannot be optimized for Thumb1 if the args
3028-
// to the call take up r0-r3. The reason is that there are no legal registers
3029-
// left to hold the pointer to the function to be called.
3030-
// Similarly, if the function uses return address sign and authentication,
3031-
// r12 is needed to hold the PAC and is not available to hold the callee
3032-
// address.
3033-
if (Outs.size() >= 4 &&
3034-
(!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
3035-
if (Subtarget->isThumb1Only())
3036-
return false;
3037-
// Conservatively assume the function spills LR.
3038-
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
3027+
// Indirect tail-calls require a register to hold the target address. That
3028+
// register must be:
3029+
// * Allocatable (i.e. r0-r7 if the target is Thumb1).
3030+
// * Not callee-saved, so must be one of r0-r3 or r12.
3031+
// * Not used to hold an argument to the tail-called function, which might be
3032+
// in r0-r3.
3033+
// * Not used to hold the return address authentication code, which is in r12
3034+
// if enabled.
3035+
// Sometimes, no register matches all of these conditions, so we can't do a
3036+
// tail-call.
3037+
if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
3038+
SmallSet<MCPhysReg, 5> AddressRegisters;
3039+
for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
3040+
AddressRegisters.insert(R);
3041+
if (!(Subtarget->isThumb1Only() ||
3042+
MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)))
3043+
AddressRegisters.insert(ARM::R12);
3044+
for (const CCValAssign &AL : ArgLocs)
3045+
if (AL.isRegLoc())
3046+
AddressRegisters.erase(AL.getLocReg());
3047+
if (AddressRegisters.empty())
30393048
return false;
30403049
}
30413050

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+vfp4 | FileCheck %s
2+
3+
;; No outgoing arguments, plenty of free registers to hold the target address.
4+
define void @test0(ptr %fptr) {
5+
; CHECK-LABEL: test0:
6+
; CHECK: bx {{r0|r1|r2|r3|r12}}
7+
entry:
8+
tail call void %fptr()
9+
ret void
10+
}
11+
12+
;; Four integer outgoing arguments, which use up r0-r3.
13+
define void @test1(ptr %fptr) {
14+
; CHECK-LABEL: test1:
15+
; CHECK: bx r12
16+
entry:
17+
tail call void %fptr(i32 0, i32 0, i32 0, i32 0)
18+
ret void
19+
}
20+
21+
;; Four integer outgoing arguments, which use up r0-r3, and sign-return-address
22+
;; uses r12, so we can never tail-call this.
23+
define void @test2(ptr %fptr) "sign-return-address"="all" {
24+
; CHECK-LABEL: test2:
25+
; CHECK: blx
26+
entry:
27+
tail call void %fptr(i32 0, i32 0, i32 0, i32 0)
28+
ret void
29+
}
30+
31+
;; An i32 and an i64 argument, which uses r0, r2 and r3 for arguments, leaving
32+
;; r1 free for the address.
33+
define void @test3(ptr %fptr) {
34+
; CHECK-LABEL: test3:
35+
; CHECK: bx {{r1|r12}}
36+
entry:
37+
tail call void %fptr(i32 0, i64 0)
38+
ret void
39+
}
40+
41+
;; Four float arguments, using the soft-float calling convention, which uses
42+
;; r0-r3.
43+
define void @test4(ptr %fptr) {
44+
; CHECK-LABEL: test4:
45+
; CHECK: bx r12
46+
entry:
47+
tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
48+
ret void
49+
}
50+
51+
;; Four float arguments, using the soft-float calling convention, which uses
52+
;; r0-r3, and sign-return-address uses r12. Currently fails with "ran out of
53+
;; registers during register allocation".
54+
define void @test5(ptr %fptr) "sign-return-address"="all" {
55+
; CHECK-LABEL: test5:
56+
; CHECK: blx
57+
entry:
58+
tail call arm_aapcscc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
59+
ret void
60+
}
61+
62+
;; Four float arguments, using the hard-float calling convention, which uses
63+
;; s0-s3, leaving the all of the integer registers free for the address.
64+
define void @test6(ptr %fptr) {
65+
; CHECK-LABEL: test6:
66+
; CHECK: bx {{r0|r1|r2|r3|r12}}
67+
entry:
68+
tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
69+
ret void
70+
}
71+
72+
;; Four float arguments, using the hard-float calling convention, which uses
73+
;; s0-s3, leaving r0-r3 free for the address, with r12 used for
74+
;; sign-return-address.
75+
define void @test7(ptr %fptr) "sign-return-address"="all" {
76+
; CHECK-LABEL: test7:
77+
; CHECK: bx {{r0|r1|r2|r3}}
78+
entry:
79+
tail call arm_aapcs_vfpcc void %fptr(float 0.0, float 0.0, float 0.0, float 0.0)
80+
ret void
81+
}
82+
83+
;; Two double arguments, using the soft-float calling convention, which uses
84+
;; r0-r3.
85+
define void @test8(ptr %fptr) {
86+
; CHECK-LABEL: test8:
87+
; CHECK: bx r12
88+
entry:
89+
tail call arm_aapcscc void %fptr(double 0.0, double 0.0)
90+
ret void
91+
}
92+
93+
;; Two double arguments, using the soft-float calling convention, which uses
94+
;; r0-r3, and sign-return-address uses r12, so we can't tail-call this.
95+
define void @test9(ptr %fptr) "sign-return-address"="all" {
96+
; CHECK-LABEL: test9:
97+
; CHECK: blx
98+
entry:
99+
tail call arm_aapcscc void %fptr(double 0.0, double 0.0)
100+
ret void
101+
}
102+
103+
;; Four integer arguments (one on the stack), but dut to alignment r1 is left
104+
;; empty, so can be used for the tail-call.
105+
define void @test10(ptr %fptr, i64 %b, i32 %c) "sign-return-address"="all" {
106+
; CHECK-LABEL: test10:
107+
; CHECK: bx r1
108+
entry:
109+
tail call void %fptr(i32 0, i64 %b, i32 %c)
110+
ret void
111+
}

0 commit comments

Comments
 (0)