Skip to content

Commit 8373cee

Browse files
[CGP] Extend dupRetToEnableTailCallOpts to known intrinsics
Hint further tail call optimization opportunities when the examined returned value is the return value of a known intrinsic or library function, and it appears as first function argument. Fixes: #75455.
1 parent d1c481d commit 8373cee

File tree

2 files changed

+85
-37
lines changed

2 files changed

+85
-37
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2522,8 +2522,40 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
25222522
return false;
25232523
}
25242524

2525+
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo,
2526+
const CallInst *CI) {
2527+
assert(CI && CI->use_empty());
2528+
2529+
if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2530+
switch (II->getIntrinsicID()) {
2531+
case Intrinsic::memset:
2532+
case Intrinsic::memcpy:
2533+
case Intrinsic::memmove:
2534+
return true;
2535+
default:
2536+
return false;
2537+
}
2538+
2539+
LibFunc LF;
2540+
Function *Callee = CI->getCalledFunction();
2541+
if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2542+
switch (LF) {
2543+
case LibFunc_strcpy:
2544+
case LibFunc_strncpy:
2545+
case LibFunc_strcat:
2546+
case LibFunc_strncat:
2547+
return true;
2548+
default:
2549+
return false;
2550+
}
2551+
2552+
return false;
2553+
}
2554+
25252555
/// Look for opportunities to duplicate return instructions to the predecessor
2526-
/// to enable tail call optimizations. The case it is currently looking for is:
2556+
/// to enable tail call optimizations. The case it is currently looking for is
2557+
/// the following one. Known intrinsics or library function that may be tail
2558+
/// called are taken into account as well.
25272559
/// @code
25282560
/// bb0:
25292561
/// %tmp0 = tail call i32 @f0()
@@ -2580,8 +2612,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
25802612
}
25812613

25822614
PN = dyn_cast<PHINode>(V);
2583-
if (!PN)
2584-
return false;
25852615
}
25862616

25872617
if (PN && PN->getParent() != BB)
@@ -2620,8 +2650,30 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
26202650
// Make sure the phi value is indeed produced by the tail call.
26212651
if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
26222652
TLI->mayBeEmittedAsTailCall(CI) &&
2623-
attributesPermitTailCall(F, CI, RetI, *TLI))
2653+
attributesPermitTailCall(F, CI, RetI, *TLI)) {
26242654
TailCallBBs.push_back(PredBB);
2655+
} else {
2656+
// Consider the cases in which the phi value is indirectly produced by
2657+
// the tail call, for example when encountering memset(), memmove(),
2658+
// strcpy(), whose return value may have been optimized out. In such
2659+
// cases, the value needs to be the first function argument.
2660+
//
2661+
// bb0:
2662+
// tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2663+
// br label %return
2664+
// return:
2665+
// %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2666+
if (PredBB && PredBB->getSingleSuccessor() == BB)
2667+
CI = dyn_cast_or_null<CallInst>(
2668+
PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2669+
2670+
if (CI && CI->use_empty() &&
2671+
isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2672+
IncomingVal == CI->getArgOperand(0) &&
2673+
TLI->mayBeEmittedAsTailCall(CI) &&
2674+
attributesPermitTailCall(F, CI, RetI, *TLI))
2675+
TailCallBBs.push_back(PredBB);
2676+
}
26252677
}
26262678
} else {
26272679
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
@@ -2631,8 +2683,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
26312683
if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
26322684
CallInst *CI = dyn_cast<CallInst>(I);
26332685
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2634-
attributesPermitTailCall(F, CI, RetI, *TLI))
2635-
TailCallBBs.push_back(Pred);
2686+
attributesPermitTailCall(F, CI, RetI, *TLI)) {
2687+
// Either we return void or the return value must be the first
2688+
// argument of a known intrinsic or library function.
2689+
if (!V || (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2690+
V == CI->getArgOperand(0))) {
2691+
TailCallBBs.push_back(Pred);
2692+
}
2693+
}
26362694
}
26372695
}
26382696
}

llvm/test/CodeGen/X86/tailcall-cgp-dup.ll

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -188,18 +188,14 @@ return:
188188
define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
189189
; CHECK-LABEL: memset_tailc:
190190
; CHECK: ## %bb.0: ## %entry
191-
; CHECK-NEXT: pushq %rbx
192-
; CHECK-NEXT: movq %rdi, %rbx
193191
; CHECK-NEXT: testq %rdi, %rdi
194-
; CHECK-NEXT: je LBB4_2
195-
; CHECK-NEXT: ## %bb.1: ## %if.then
192+
; CHECK-NEXT: je LBB4_1
193+
; CHECK-NEXT: ## %bb.2: ## %if.then
196194
; CHECK-NEXT: movq %rsi, %rdx
197-
; CHECK-NEXT: movq %rbx, %rdi
198195
; CHECK-NEXT: xorl %esi, %esi
199-
; CHECK-NEXT: callq _memset
200-
; CHECK-NEXT: LBB4_2: ## %return
201-
; CHECK-NEXT: movq %rbx, %rax
202-
; CHECK-NEXT: popq %rbx
196+
; CHECK-NEXT: jmp _memset ## TAILCALL
197+
; CHECK-NEXT: LBB4_1: ## %return
198+
; CHECK-NEXT: movq %rdi, %rax
203199
; CHECK-NEXT: retq
204200
entry:
205201
%cmp = icmp eq ptr %ret_val, null
@@ -216,21 +212,15 @@ return:
216212
define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
217213
; CHECK-LABEL: memcpy_tailc:
218214
; CHECK: ## %bb.0: ## %entry
219-
; CHECK-NEXT: pushq %rbx
220215
; CHECK-NEXT: testq %rsi, %rsi
221216
; CHECK-NEXT: je LBB5_1
222217
; CHECK-NEXT: ## %bb.2: ## %if.then
223218
; CHECK-NEXT: movq %rsi, %rax
224-
; CHECK-NEXT: movq %rdi, %rbx
225219
; CHECK-NEXT: movq %rdx, %rsi
226220
; CHECK-NEXT: movq %rax, %rdx
227-
; CHECK-NEXT: callq _memcpy
228-
; CHECK-NEXT: jmp LBB5_3
229-
; CHECK-NEXT: LBB5_1:
230-
; CHECK-NEXT: movq %rdx, %rbx
231-
; CHECK-NEXT: LBB5_3: ## %return
232-
; CHECK-NEXT: movq %rbx, %rax
233-
; CHECK-NEXT: popq %rbx
221+
; CHECK-NEXT: jmp _memcpy ## TAILCALL
222+
; CHECK-NEXT: LBB5_1: ## %return
223+
; CHECK-NEXT: movq %rdx, %rax
234224
; CHECK-NEXT: retq
235225
entry:
236226
%cmp = icmp eq i64 %sz, 0
@@ -251,25 +241,25 @@ define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
251241
; CHECK-NEXT: pushq %r15
252242
; CHECK-NEXT: pushq %r14
253243
; CHECK-NEXT: pushq %rbx
254-
; CHECK-NEXT: movq %rdx, %r14
244+
; CHECK-NEXT: movq %rdx, %rbx
255245
; CHECK-NEXT: movq %rsi, %r15
256-
; CHECK-NEXT: movq %rdi, %rbx
246+
; CHECK-NEXT: movq %rdi, %r14
257247
; CHECK-NEXT: movq %rsi, %rdi
258248
; CHECK-NEXT: callq _malloc
259249
; CHECK-NEXT: testq %r15, %r15
260-
; CHECK-NEXT: je LBB6_2
261-
; CHECK-NEXT: ## %bb.1: ## %if.then
250+
; CHECK-NEXT: je LBB6_1
251+
; CHECK-NEXT: ## %bb.2: ## %if.then
262252
; CHECK-NEXT: movq %rax, %rdi
263-
; CHECK-NEXT: movq %r14, %rsi
264-
; CHECK-NEXT: movq %rax, %rbx
265-
; CHECK-NEXT: callq _strcpy
266-
; CHECK-NEXT: jmp LBB6_3
267-
; CHECK-NEXT: LBB6_2: ## %if.else
268-
; CHECK-NEXT: movq %rbx, %rdi
269-
; CHECK-NEXT: movq %r14, %rsi
253+
; CHECK-NEXT: movq %rbx, %rsi
254+
; CHECK-NEXT: popq %rbx
255+
; CHECK-NEXT: popq %r14
256+
; CHECK-NEXT: popq %r15
257+
; CHECK-NEXT: jmp _strcpy ## TAILCALL
258+
; CHECK-NEXT: LBB6_1: ## %if.else
259+
; CHECK-NEXT: movq %r14, %rdi
260+
; CHECK-NEXT: movq %rbx, %rsi
270261
; CHECK-NEXT: callq _baz
271-
; CHECK-NEXT: LBB6_3: ## %return
272-
; CHECK-NEXT: movq %rbx, %rax
262+
; CHECK-NEXT: movq %r14, %rax
273263
; CHECK-NEXT: popq %rbx
274264
; CHECK-NEXT: popq %r14
275265
; CHECK-NEXT: popq %r15

0 commit comments

Comments
 (0)