Skip to content

Commit bd9145c

Browse files
authored
Reapply [AMDGPU] Avoid resource propagation for recursion through multiple functions (#112251)
I was wrong last patch. I viewed the `Visited` set purely as a possible recursion deterrent where functions calling a callee multiple times are handled elsewhere. This doesn't consider cases where a function is called multiple times by different callers still part of the same call graph. New test shows the aforementioned case. Reapplies #111004, fixes #115562.
1 parent 098b0d1 commit bd9145c

File tree

9 files changed

+397
-38
lines changed

9 files changed

+397
-38
lines changed

llvm/include/llvm/MC/MCExpr.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ class MCExpr {
8686
bool InParens = false) const;
8787
void dump() const;
8888

89+
/// Returns whether the given symbol is used anywhere in the expression or
90+
/// subexpressions.
91+
bool isSymbolUsedInExpression(const MCSymbol *Sym) const;
92+
8993
/// @}
9094
/// \name Expression Evaluation
9195
/// @{
@@ -663,6 +667,9 @@ class MCTargetExpr : public MCExpr {
663667
const MCFixup *Fixup) const = 0;
664668
// allow Target Expressions to be checked for equality
665669
virtual bool isEqualTo(const MCExpr *x) const { return false; }
670+
virtual bool isSymbolUsedInExpression(const MCSymbol *Sym) const {
671+
return false;
672+
}
666673
// This should be set when assigned expressions are not valid ".set"
667674
// expressions, e.g. registers, and must be inlined.
668675
virtual bool inlineAssignedExpr() const { return false; }

llvm/lib/MC/MCExpr.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,35 @@ LLVM_DUMP_METHOD void MCExpr::dump() const {
177177
}
178178
#endif
179179

180+
bool MCExpr::isSymbolUsedInExpression(const MCSymbol *Sym) const {
181+
switch (getKind()) {
182+
case MCExpr::Binary: {
183+
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(this);
184+
return BE->getLHS()->isSymbolUsedInExpression(Sym) ||
185+
BE->getRHS()->isSymbolUsedInExpression(Sym);
186+
}
187+
case MCExpr::Target: {
188+
const MCTargetExpr *TE = static_cast<const MCTargetExpr *>(this);
189+
return TE->isSymbolUsedInExpression(Sym);
190+
}
191+
case MCExpr::Constant:
192+
return false;
193+
case MCExpr::SymbolRef: {
194+
const MCSymbol &S = static_cast<const MCSymbolRefExpr *>(this)->getSymbol();
195+
if (S.isVariable() && !S.isWeakExternal())
196+
return S.getVariableValue()->isSymbolUsedInExpression(Sym);
197+
return &S == Sym;
198+
}
199+
case MCExpr::Unary: {
200+
const MCExpr *SubExpr =
201+
static_cast<const MCUnaryExpr *>(this)->getSubExpr();
202+
return SubExpr->isSymbolUsedInExpression(Sym);
203+
}
204+
}
205+
206+
llvm_unreachable("Unknown expr kind!");
207+
}
208+
180209
/* *** */
181210

182211
const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS,

llvm/lib/MC/MCParser/AsmParser.cpp

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6417,33 +6417,6 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info,
64176417
namespace llvm {
64186418
namespace MCParserUtils {
64196419

6420-
/// Returns whether the given symbol is used anywhere in the given expression,
6421-
/// or subexpressions.
6422-
static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) {
6423-
switch (Value->getKind()) {
6424-
case MCExpr::Binary: {
6425-
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Value);
6426-
return isSymbolUsedInExpression(Sym, BE->getLHS()) ||
6427-
isSymbolUsedInExpression(Sym, BE->getRHS());
6428-
}
6429-
case MCExpr::Target:
6430-
case MCExpr::Constant:
6431-
return false;
6432-
case MCExpr::SymbolRef: {
6433-
const MCSymbol &S =
6434-
static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
6435-
if (S.isVariable() && !S.isWeakExternal())
6436-
return isSymbolUsedInExpression(Sym, S.getVariableValue());
6437-
return &S == Sym;
6438-
}
6439-
case MCExpr::Unary:
6440-
return isSymbolUsedInExpression(
6441-
Sym, static_cast<const MCUnaryExpr *>(Value)->getSubExpr());
6442-
}
6443-
6444-
llvm_unreachable("Unknown expr kind!");
6445-
}
6446-
64476420
bool parseAssignmentExpression(StringRef Name, bool allow_redef,
64486421
MCAsmParser &Parser, MCSymbol *&Sym,
64496422
const MCExpr *&Value) {
@@ -6468,7 +6441,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
64686441
//
64696442
// FIXME: Diagnostics. Note the location of the definition as a label.
64706443
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
6471-
if (isSymbolUsedInExpression(Sym, Value))
6444+
if (Value->isSymbolUsedInExpression(Sym))
64726445
return Parser.Error(EqualLoc, "Recursive use of '" + Name + "'");
64736446
else if (Sym->isUndefined(/*SetUsed*/ false) && !Sym->isUsed() &&
64746447
!Sym->isVariable())

llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,25 +100,50 @@ void MCResourceInfo::assignResourceInfoExpr(
100100
const MCConstantExpr *LocalConstExpr =
101101
MCConstantExpr::create(LocalValue, OutContext);
102102
const MCExpr *SymVal = LocalConstExpr;
103+
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
103104
if (!Callees.empty()) {
104105
SmallVector<const MCExpr *, 8> ArgExprs;
105-
// Avoid recursive symbol assignment.
106106
SmallPtrSet<const Function *, 8> Seen;
107107
ArgExprs.push_back(LocalConstExpr);
108-
const Function &F = MF.getFunction();
109-
Seen.insert(&F);
110108

111109
for (const Function *Callee : Callees) {
112110
if (!Seen.insert(Callee).second)
113111
continue;
112+
114113
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
115114
MCSymbol *CalleeValSym =
116115
getSymbol(CalleeFnSym->getName(), RIK, OutContext);
117-
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
116+
117+
// Avoid constructing recursive definitions by detecting whether `Sym` is
118+
// found transitively within any of its `CalleeValSym`.
119+
if (!CalleeValSym->isVariable() ||
120+
!CalleeValSym->getVariableValue(/*isUsed=*/false)
121+
->isSymbolUsedInExpression(Sym)) {
122+
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
123+
} else {
124+
// In case of recursion: make sure to use conservative register counts
125+
// (i.e., specifically for VGPR/SGPR/AGPR).
126+
switch (RIK) {
127+
default:
128+
break;
129+
case RIK_NumVGPR:
130+
ArgExprs.push_back(MCSymbolRefExpr::create(
131+
getMaxVGPRSymbol(OutContext), OutContext));
132+
break;
133+
case RIK_NumSGPR:
134+
ArgExprs.push_back(MCSymbolRefExpr::create(
135+
getMaxSGPRSymbol(OutContext), OutContext));
136+
break;
137+
case RIK_NumAGPR:
138+
ArgExprs.push_back(MCSymbolRefExpr::create(
139+
getMaxAGPRSymbol(OutContext), OutContext));
140+
break;
141+
}
142+
}
118143
}
119-
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
144+
if (ArgExprs.size() > 1)
145+
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
120146
}
121-
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
122147
Sym->setVariableValue(SymVal);
123148
}
124149

@@ -162,6 +187,7 @@ void MCResourceInfo::gatherResourceInfo(
162187
// The expression for private segment size should be: FRI.PrivateSegmentSize
163188
// + max(FRI.Callees, FRI.CalleeSegmentSize)
164189
SmallVector<const MCExpr *, 8> ArgExprs;
190+
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext);
165191
if (FRI.CalleeSegmentSize)
166192
ArgExprs.push_back(
167193
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
@@ -173,9 +199,16 @@ void MCResourceInfo::gatherResourceInfo(
173199
continue;
174200
if (!Callee->isDeclaration()) {
175201
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
176-
MCSymbol *calleeValSym =
202+
MCSymbol *CalleeValSym =
177203
getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
178-
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
204+
205+
// Avoid constructing recursive definitions by detecting whether `Sym`
206+
// is found transitively within any of its `CalleeValSym`.
207+
if (!CalleeValSym->isVariable() ||
208+
!CalleeValSym->getVariableValue(/*isUsed=*/false)
209+
->isSymbolUsedInExpression(Sym)) {
210+
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
211+
}
179212
}
180213
}
181214
const MCExpr *localConstExpr =
@@ -186,8 +219,7 @@ void MCResourceInfo::gatherResourceInfo(
186219
localConstExpr =
187220
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
188221
}
189-
getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext)
190-
->setVariableValue(localConstExpr);
222+
Sym->setVariableValue(localConstExpr);
191223
}
192224

193225
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,14 @@ const AMDGPUMCExpr *AMDGPUMCExpr::createOccupancy(unsigned InitOcc,
305305
Ctx);
306306
}
307307

308+
bool AMDGPUMCExpr::isSymbolUsedInExpression(const MCSymbol *Sym) const {
309+
for (const MCExpr *E : getArgs()) {
310+
if (E->isSymbolUsedInExpression(Sym))
311+
return true;
312+
}
313+
return false;
314+
}
315+
308316
static KnownBits fromOptionalToKnownBits(std::optional<bool> CompareResult) {
309317
static constexpr unsigned BitWidth = 64;
310318
const APInt True(BitWidth, 1);

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class AMDGPUMCExpr : public MCTargetExpr {
9797
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
9898
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
9999
const MCFixup *Fixup) const override;
100+
bool isSymbolUsedInExpression(const MCSymbol *Sym) const override;
100101
void visitUsedExpr(MCStreamer &Streamer) const override;
101102
MCFragment *findAssociatedFragment() const override;
102103
void fixELFSymbolsInTLSFixups(MCAssembler &) const override{};

llvm/test/CodeGen/AMDGPU/function-resource-usage.ll

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,136 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
481481
ret void
482482
}
483483

484+
; GCN-LABEL: {{^}}multi_stage_recurse2:
485+
; GCN: .set multi_stage_recurse2.num_vgpr, max(43, multi_stage_recurse1.num_vgpr)
486+
; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
487+
; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
488+
; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
489+
; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
490+
; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
491+
; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
492+
; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
493+
; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
494+
; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
495+
; GCN: NumVgprs: max(43, multi_stage_recurse1.num_vgpr)
496+
; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
497+
; GCN-LABEL: {{^}}multi_stage_recurse1:
498+
; GCN: .set multi_stage_recurse1.num_vgpr, max(48, amdgpu.max_num_vgpr)
499+
; GCN: .set multi_stage_recurse1.num_agpr, max(0, amdgpu.max_num_agpr)
500+
; GCN: .set multi_stage_recurse1.numbered_sgpr, max(34, amdgpu.max_num_sgpr)
501+
; GCN: .set multi_stage_recurse1.private_seg_size, 16
502+
; GCN: .set multi_stage_recurse1.uses_vcc, 1
503+
; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
504+
; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
505+
; GCN: .set multi_stage_recurse1.has_recursion, 1
506+
; GCN: .set multi_stage_recurse1.has_indirect_call, 0
507+
; GCN: TotalNumSgprs: multi_stage_recurse1.numbered_sgpr+4
508+
; GCN: NumVgprs: max(48, amdgpu.max_num_vgpr)
509+
; GCN: ScratchSize: 16
510+
define void @multi_stage_recurse1(i32 %val) #2 {
511+
call void @multi_stage_recurse2(i32 %val)
512+
call void asm sideeffect "", "~{v47}"() #0
513+
ret void
514+
}
515+
define void @multi_stage_recurse2(i32 %val) #2 {
516+
call void @multi_stage_recurse1(i32 %val)
517+
call void asm sideeffect "", "~{v42}"() #0
518+
ret void
519+
}
520+
521+
; GCN-LABEL: {{^}}usage_multi_stage_recurse:
522+
; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
523+
; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
524+
; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
525+
; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
526+
; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
527+
; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
528+
; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
529+
; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
530+
; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
531+
; GCN: TotalNumSgprs: usage_multi_stage_recurse.numbered_sgpr+6
532+
; GCN: NumVgprs: usage_multi_stage_recurse.num_vgpr
533+
; GCN: ScratchSize: 16
534+
define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
535+
call void @multi_stage_recurse1(i32 %n)
536+
ret void
537+
}
538+
539+
; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
540+
; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
541+
; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
542+
; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(54, multi_stage_recurse_noattr1.numbered_sgpr)
543+
; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
544+
; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
545+
; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
546+
; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
547+
; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
548+
; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
549+
; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
550+
; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
551+
; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
552+
; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
553+
; GCN: .set multi_stage_recurse_noattr1.num_vgpr, max(41, amdgpu.max_num_vgpr)
554+
; GCN: .set multi_stage_recurse_noattr1.num_agpr, max(0, amdgpu.max_num_agpr)
555+
; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, max(57, amdgpu.max_num_sgpr)
556+
; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
557+
; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
558+
; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
559+
; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
560+
; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
561+
; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
562+
; GCN: TotalNumSgprs: multi_stage_recurse_noattr1.numbered_sgpr+4
563+
; GCN: NumVgprs: max(41, amdgpu.max_num_vgpr)
564+
; GCN: ScratchSize: 16
565+
define void @multi_stage_recurse_noattr1(i32 %val) #0 {
566+
call void @multi_stage_recurse_noattr2(i32 %val)
567+
call void asm sideeffect "", "~{s56}"() #0
568+
ret void
569+
}
570+
define void @multi_stage_recurse_noattr2(i32 %val) #0 {
571+
call void @multi_stage_recurse_noattr1(i32 %val)
572+
call void asm sideeffect "", "~{s53}"() #0
573+
ret void
574+
}
575+
576+
; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
577+
; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
578+
; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
579+
; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
580+
; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
581+
; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
582+
; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
583+
; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
584+
; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
585+
; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
586+
; GCN: TotalNumSgprs: usage_multi_stage_recurse_noattrs.numbered_sgpr+6
587+
; GCN: NumVgprs: usage_multi_stage_recurse_noattrs.num_vgpr
588+
; GCN: ScratchSize: 16
589+
define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
590+
call void @multi_stage_recurse_noattr1(i32 %n)
591+
ret void
592+
}
593+
594+
; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
595+
; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
596+
; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
597+
; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
598+
; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
599+
; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
600+
; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
601+
; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
602+
; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
603+
; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
604+
; GCN: TotalNumSgprs: multi_call_with_multi_stage_recurse.numbered_sgpr+6
605+
; GCN: NumVgprs: multi_call_with_multi_stage_recurse.num_vgpr
606+
; GCN: ScratchSize: 2052
607+
define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
608+
call void @use_stack0()
609+
call void @use_stack1()
610+
call void @multi_stage_recurse1(i32 %n)
611+
ret void
612+
}
613+
484614
; Make sure there's no assert when a sgpr96 is used.
485615
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
486616
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)

0 commit comments

Comments
 (0)