Skip to content

Commit 727328a

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Tail call memory intrinsics
Because memory intrinsics are handled differently than other calls, we need to check them for tail call eligiblity in the legalizer. This allows us to still inline them when it's beneficial to do so, but also tail call when possible. This adds simple tail calling support for when the intrinsic is followed by a return. It ports the attribute checks from `TargetLowering::isInTailCallPosition` into a similarly-named function in LegalizerHelper.cpp. The target-specific `isUsedByReturnOnly` hook is not ported here. Update tailcall-mem-intrinsics.ll to show that GlobalISel can now tail call memory intrinsics. Update legalize-memcpy-et-al.mir to have a case where we don't tail call. Differential Revision: https://reviews.llvm.org/D67566 llvm-svn: 371893
1 parent d158cf6 commit 727328a

File tree

5 files changed

+131
-13
lines changed

5 files changed

+131
-13
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ class CallLowering {
9595
/// optimization.
9696
bool IsTailCall = false;
9797

98+
/// True if the call was lowered as a tail call. This is consumed by the
99+
/// legalizer. This allows the legalizer to lower libcalls as tail calls.
100+
bool LoweredTailCall = false;
101+
98102
/// True if the call is to a vararg function.
99103
bool IsVarArg = false;
100104
};

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
327327
llvm_unreachable("Unknown libcall function");
328328
}
329329

330+
/// True if an instruction is in tail position in its caller. Intended for
331+
/// legalizing libcalls as tail calls when possible.
332+
static bool isLibCallInTailPosition(MachineInstr &MI) {
333+
const Function &F = MI.getParent()->getParent()->getFunction();
334+
335+
// Conservatively require the attributes of the call to match those of
336+
// the return. Ignore NoAlias and NonNull because they don't affect the
337+
// call sequence.
338+
AttributeList CallerAttrs = F.getAttributes();
339+
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
340+
.removeAttribute(Attribute::NoAlias)
341+
.removeAttribute(Attribute::NonNull)
342+
.hasAttributes())
343+
return false;
344+
345+
// It's not safe to eliminate the sign / zero extension of the return value.
346+
if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
347+
CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
348+
return false;
349+
350+
// Only tail call if the following instruction is a standard return.
351+
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
352+
MachineInstr *Next = MI.getNextNode();
353+
if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
354+
return false;
355+
356+
return true;
357+
}
358+
330359
LegalizerHelper::LegalizeResult
331360
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
332361
const CallLowering::ArgInfo &Result,
@@ -407,10 +436,24 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
407436
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
408437
Info.Callee = MachineOperand::CreateES(Name);
409438
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
439+
Info.IsTailCall = isLibCallInTailPosition(MI);
440+
410441
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
411442
if (!CLI.lowerCall(MIRBuilder, Info))
412443
return LegalizerHelper::UnableToLegalize;
413444

445+
if (Info.LoweredTailCall) {
446+
assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
447+
// We must have a return following the call to get past
448+
// isLibCallInTailPosition.
449+
assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
450+
"Expected instr following MI to be a return?");
451+
452+
// We lowered a tail call, so the call is now the return from the block.
453+
// Delete the old return.
454+
MI.getNextNode()->eraseFromParent();
455+
}
456+
414457
return LegalizerHelper::Legalized;
415458
}
416459

llvm/lib/Target/AArch64/AArch64CallLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,8 +788,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
788788

789789
// If we're tail calling, then we're the return from the block. So, we don't
790790
// want to copy anything.
791-
if (IsSibCall)
791+
if (IsSibCall) {
792+
Info.LoweredTailCall = true;
792793
return true;
794+
}
793795

794796
// Finally we can copy the returned value back into its virtual-register. In
795797
// symmetry with the arugments, the physical register must be an

llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir

Lines changed: 80 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ body: |
1313
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
1414
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
1515
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
16-
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
1716
; CHECK: $x0 = COPY [[COPY]](p0)
1817
; CHECK: $x1 = COPY [[COPY1]](p0)
1918
; CHECK: $x2 = COPY [[ZEXT]](s64)
20-
; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
21-
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
22-
; CHECK: RET_ReallyLR
19+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
20+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
21+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]]
22+
; CHECK: $w3 = COPY [[AND]](s32)
23+
; CHECK: TCRETURNdi &memcpy, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3
2324
%0:_(p0) = COPY $x0
2425
%1:_(p0) = COPY $x1
2526
%2:_(s32) = COPY $w2
@@ -42,13 +43,14 @@ body: |
4243
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
4344
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
4445
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
45-
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
4646
; CHECK: $x0 = COPY [[COPY]](p0)
4747
; CHECK: $x1 = COPY [[COPY1]](p0)
4848
; CHECK: $x2 = COPY [[ZEXT]](s64)
49-
; CHECK: BL &memmove, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
50-
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
51-
; CHECK: RET_ReallyLR
49+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
50+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
51+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]]
52+
; CHECK: $w3 = COPY [[AND]](s32)
53+
; CHECK: TCRETURNdi &memmove, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3
5254
%0:_(p0) = COPY $x0
5355
%1:_(p0) = COPY $x1
5456
%2:_(s32) = COPY $w2
@@ -71,14 +73,15 @@ body: |
7173
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
7274
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
7375
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
74-
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
7576
; CHECK: $x0 = COPY [[COPY]](p0)
7677
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
7778
; CHECK: $w1 = COPY [[COPY3]](s32)
7879
; CHECK: $x2 = COPY [[ZEXT]](s64)
79-
; CHECK: BL &memset, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $w1, implicit $x2
80-
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
81-
; CHECK: RET_ReallyLR
80+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
81+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
82+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]]
83+
; CHECK: $w3 = COPY [[AND]](s32)
84+
; CHECK: TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $w1, implicit $x2, implicit $w3
8285
%0:_(p0) = COPY $x0
8386
%1:_(s32) = COPY $w1
8487
%2:_(s32) = COPY $w2
@@ -89,3 +92,68 @@ body: |
8992
RET_ReallyLR
9093
9194
...
95+
---
96+
name: no_tail_call
97+
tracksRegLiveness: true
98+
body: |
99+
bb.1:
100+
liveins: $w2, $x0, $x1
101+
102+
; CHECK-LABEL: name: no_tail_call
103+
; CHECK: liveins: $w2, $x0, $x1
104+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
105+
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
106+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
107+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
108+
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
109+
; CHECK: $x0 = COPY [[COPY]](p0)
110+
; CHECK: $x1 = COPY [[COPY1]](p0)
111+
; CHECK: $x2 = COPY [[ZEXT]](s64)
112+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
113+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
114+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]]
115+
; CHECK: $w3 = COPY [[AND]](s32)
116+
; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3
117+
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
118+
; CHECK: $x0 = COPY [[ZEXT]](s64)
119+
; CHECK: RET_ReallyLR implicit $x0
120+
%0:_(p0) = COPY $x0
121+
%1:_(p0) = COPY $x1
122+
%2:_(s32) = COPY $w2
123+
%4:_(s1) = G_CONSTANT i1 false
124+
%3:_(s64) = G_ZEXT %2(s32)
125+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %3(s64), %4(s1)
126+
$x0 = COPY %3
127+
RET_ReallyLR implicit $x0
128+
129+
...
130+
---
131+
name: dont_tc_twice
132+
tracksRegLiveness: true
133+
body: |
134+
bb.1:
135+
liveins: $w2, $x0, $x1
136+
; CHECK-LABEL: name: dont_tc_twice
137+
; CHECK: liveins: $w2, $x0, $x1
138+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
139+
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
140+
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
141+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
142+
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
143+
; CHECK: $x0 = COPY [[COPY]](p0)
144+
; CHECK: $x1 = COPY [[COPY1]](p0)
145+
; CHECK: $x2 = COPY [[ZEXT]](s64)
146+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
147+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
148+
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C1]], [[C]]
149+
; CHECK: $w3 = COPY [[AND]](s32)
150+
; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3
151+
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
152+
; CHECK: TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp
153+
%0:_(p0) = COPY $x0
154+
%1:_(p0) = COPY $x1
155+
%2:_(s32) = COPY $w2
156+
%4:_(s1) = G_CONSTANT i1 false
157+
%3:_(s64) = G_ZEXT %2(s32)
158+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %3(s64), %4(s1)
159+
TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp

llvm/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
2+
; RUN: llc -global-isel-abort=1 -verify-machineinstrs -mtriple=aarch64-unknown-unknown -global-isel < %s | FileCheck %s
23

34
; CHECK-LABEL: tail_memcpy:
45
; CHECK: b memcpy

0 commit comments

Comments
 (0)