Skip to content

Commit d2d77e0

Browse files
Ting WangTing Wang
authored andcommitted
[PowerPC][Coroutines] Add tail-call check with call information for coroutines
Fixes #56679. Reviewed By: ChuanqiXu, shchenz Differential Revision: https://reviews.llvm.org/D131953
1 parent 53bc7d5 commit d2d77e0

File tree

8 files changed

+113
-3
lines changed

8 files changed

+113
-3
lines changed

clang/test/CodeGenCoroutines/pr56329.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Test for PR56919. Tests the we won't contain the resumption of final suspend point.
22
//
33
// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %s -O3 -S -emit-llvm -o - | FileCheck %s
4+
// This test is expected to fail on PowerPC.
5+
// XFAIL: powerpc
46

57
#include "Inputs/coroutine.h"
68

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -765,6 +765,9 @@ class TargetTransformInfo {
765765
/// If the target supports tail calls.
766766
bool supportsTailCalls() const;
767767

768+
/// If target supports tail call on \p CB
769+
bool supportsTailCallFor(const CallBase *CB) const;
770+
768771
/// Don't restrict interleaved unrolling to small loops.
769772
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
770773

@@ -1635,6 +1638,7 @@ class TargetTransformInfo::Concept {
16351638
ArrayRef<Type *> Tys) = 0;
16361639
virtual bool supportsEfficientVectorElementLoadStore() = 0;
16371640
virtual bool supportsTailCalls() = 0;
1641+
virtual bool supportsTailCallFor(const CallBase *CB) = 0;
16381642
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
16391643
virtual MemCmpExpansionOptions
16401644
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
@@ -2109,6 +2113,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
21092113
}
21102114

21112115
bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2116+
bool supportsTailCallFor(const CallBase *CB) override {
2117+
return Impl.supportsTailCallFor(CB);
2118+
}
21122119

21132120
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
21142121
return Impl.enableAggressiveInterleaving(LoopHasReductions);

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,10 @@ class TargetTransformInfoImplBase {
343343

344344
bool supportsTailCalls() const { return true; }
345345

346+
bool supportsTailCallFor(const CallBase *CB) const {
347+
return supportsTailCalls();
348+
}
349+
346350
bool enableAggressiveInterleaving(bool LoopHasReductions) const {
347351
return false;
348352
}

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,10 @@ bool TargetTransformInfo::supportsTailCalls() const {
528528
return TTIImpl->supportsTailCalls();
529529
}
530530

531+
bool TargetTransformInfo::supportsTailCallFor(const CallBase *CB) const {
532+
return TTIImpl->supportsTailCallFor(CB);
533+
}
534+
531535
bool TargetTransformInfo::enableAggressiveInterleaving(
532536
bool LoopHasReductions) const {
533537
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,3 +1461,19 @@ InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
14611461
// evl but no mask, on Power 9/10. Otherwise, we must scalarize.
14621462
return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
14631463
}
1464+
1465+
bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
1466+
// Subtargets using PC-Relative addressing supported.
1467+
if (ST->isUsingPCRelativeCalls())
1468+
return true;
1469+
1470+
const Function *Callee = CB->getCalledFunction();
1471+
// Indirect calls and variadic argument functions not supported.
1472+
if (!Callee || Callee->isVarArg())
1473+
return false;
1474+
1475+
const Function *Caller = CB->getCaller();
1476+
// Support if we can share TOC base.
1477+
return ST->getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(),
1478+
Callee);
1479+
}

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
148148
unsigned AddressSpace,
149149
TTI::TargetCostKind CostKind,
150150
const Instruction *I = nullptr);
151+
bool supportsTailCallFor(const CallBase *CB) const;
151152

152153
private:
153154
// The following constant is used for estimating costs on power9.

llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,7 +1362,7 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
13621362
// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
13631363
// This transformation is done only in the resume part of the coroutine that has
13641364
// identical signature and calling convention as the coro.resume call.
1365-
static void addMustTailToCoroResumes(Function &F) {
1365+
static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
13661366
bool changed = false;
13671367

13681368
// Collect potential resume instructions.
@@ -1374,7 +1374,9 @@ static void addMustTailToCoroResumes(Function &F) {
13741374

13751375
// Set musttail on those that are followed by a ret instruction.
13761376
for (CallInst *Call : Resumes)
1377-
if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
1377+
// Skip targets which don't support tail call on the specific case.
1378+
if (TTI.supportsTailCallFor(Call) &&
1379+
simplifyTerminatorLeadingToRet(Call->getNextNode())) {
13781380
Call->setTailCallKind(CallInst::TCK_MustTail);
13791381
changed = true;
13801382
}
@@ -1610,7 +1612,7 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
16101612
// FIXME: Could we support symmetric transfer effectively without musttail
16111613
// call?
16121614
if (TTI.supportsTailCalls())
1613-
addMustTailToCoroResumes(*ResumeClone);
1615+
addMustTailToCoroResumes(*ResumeClone, TTI);
16141616

16151617
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
16161618
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; Tests that some target (e.g. ppc) can support tail call under condition.
2+
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \
3+
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s
4+
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S \
5+
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 --code-model=medium \
6+
; RUN: | FileCheck %s --check-prefix=CHECK-PCREL
7+
8+
define void @f() #0 {
9+
entry:
10+
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
11+
%alloc = call i8* @malloc(i64 16) #3
12+
%vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
13+
14+
%save = call token @llvm.coro.save(i8* null)
15+
%addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
16+
%pv1 = bitcast i8* %addr1 to void (i8*)*
17+
call fastcc void %pv1(i8* null)
18+
19+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
20+
switch i8 %suspend, label %exit [
21+
i8 0, label %await.ready
22+
i8 1, label %exit
23+
]
24+
await.ready:
25+
%save2 = call token @llvm.coro.save(i8* null)
26+
%addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
27+
%pv2 = bitcast i8* %addr2 to void (i8*)*
28+
call fastcc void %pv2(i8* null)
29+
30+
%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
31+
switch i8 %suspend2, label %exit [
32+
i8 0, label %exit
33+
i8 1, label %exit
34+
]
35+
exit:
36+
call i1 @llvm.coro.end(i8* null, i1 false)
37+
ret void
38+
}
39+
40+
; Verify that in the initial function resume is not marked with musttail.
41+
; CHECK-LABEL: @f(
42+
; CHECK: %[[addr1:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
43+
; CHECK-NEXT: %[[pv1:.+]] = bitcast i8* %[[addr1]] to void (i8*)*
44+
; CHECK-NOT: musttail call fastcc void %[[pv1]](i8* null)
45+
46+
; Verify that ppc target not using PC-Relative addressing in the resume part resume call is not marked with musttail.
47+
; CHECK-LABEL: @f.resume(
48+
; CHECK: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
49+
; CHECK-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
50+
; CHECK-NEXT: call fastcc void %[[pv2]](i8* null)
51+
52+
; Verify that ppc target using PC-Relative addressing in the resume part resume call is marked with musttail.
53+
; CHECK-PCREL-LABEL: @f.resume(
54+
; CHECK-PCREL: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
55+
; CHECK-PCREL-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
56+
; CHECK-PCREL-NEXT: musttail call fastcc void %[[pv2]](i8* null)
57+
; CHECK-PCREL-NEXT: ret void
58+
59+
declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
60+
declare i1 @llvm.coro.alloc(token) #2
61+
declare i64 @llvm.coro.size.i64() #3
62+
declare i8* @llvm.coro.begin(token, i8* writeonly) #2
63+
declare token @llvm.coro.save(i8*) #2
64+
declare i8* @llvm.coro.frame() #3
65+
declare i8 @llvm.coro.suspend(token, i1) #2
66+
declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
67+
declare i1 @llvm.coro.end(i8*, i1) #2
68+
declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #1
69+
declare i8* @malloc(i64)
70+
71+
attributes #0 = { presplitcoroutine }
72+
attributes #1 = { argmemonly nounwind readonly }
73+
attributes #2 = { nounwind }
74+
attributes #3 = { nounwind readnone }

0 commit comments

Comments
 (0)