Skip to content

Commit e353195

Browse files
authored
[AMDGPU] Fix stack size metadata for functions with direct and indirect calls (#110828)
When a function has an external call, it should still use the stack sizes of direct, known, calls to calculate its own stack size
1 parent d07dc5a commit e353195

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,12 @@ void MCResourceInfo::gatherResourceInfo(
159159
ArgExprs.push_back(
160160
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
161161

162-
if (!FRI.HasIndirectCall) {
163-
for (const Function *Callee : FRI.Callees) {
162+
SmallPtrSet<const Function *, 8> Seen;
163+
Seen.insert(&MF.getFunction());
164+
for (const Function *Callee : FRI.Callees) {
165+
if (!Seen.insert(Callee).second)
166+
continue;
167+
if (!Callee->isDeclaration()) {
164168
MCSymbol *calleeValSym =
165169
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
166170
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));

llvm/test/CodeGen/AMDGPU/function-resource-usage.ll

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,22 +358,45 @@ declare void @external() #0
358358
; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
359359
; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
360360
; GCN: .set multi_call_with_external.numbered_sgpr, max(42, amdgpu.max_num_sgpr)
361-
; GCN: .set multi_call_with_external.private_seg_size, 0
361+
; GCN: .set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
362362
; GCN: .set multi_call_with_external.uses_vcc, 1
363363
; GCN: .set multi_call_with_external.uses_flat_scratch, 1
364364
; GCN: .set multi_call_with_external.has_dyn_sized_stack, 1
365365
; GCN: .set multi_call_with_external.has_recursion, 0
366366
; GCN: .set multi_call_with_external.has_indirect_call, 1
367367
; GCN: TotalNumSgprs: multi_call_with_external.numbered_sgpr+6
368368
; GCN: NumVgprs: multi_call_with_external.num_vgpr
369-
; GCN: ScratchSize: 0
369+
; GCN: ScratchSize: 2052
370370
define amdgpu_kernel void @multi_call_with_external() #0 {
371371
call void @use_stack0()
372372
call void @use_stack1()
373373
call void @external()
374374
ret void
375375
}
376376

377+
; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates:
378+
; GCN: .set multi_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr)
379+
; GCN: .set multi_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr)
380+
; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(44, amdgpu.max_num_sgpr)
381+
; GCN: .set multi_call_with_external_and_duplicates.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
382+
; GCN: .set multi_call_with_external_and_duplicates.uses_vcc, 1
383+
; GCN: .set multi_call_with_external_and_duplicates.uses_flat_scratch, 1
384+
; GCN: .set multi_call_with_external_and_duplicates.has_dyn_sized_stack, 1
385+
; GCN: .set multi_call_with_external_and_duplicates.has_recursion, 0
386+
; GCN: .set multi_call_with_external_and_duplicates.has_indirect_call, 1
387+
; GCN: TotalNumSgprs: multi_call_with_external_and_duplicates.numbered_sgpr+6
388+
; GCN: NumVgprs: multi_call_with_external_and_duplicates.num_vgpr
389+
; GCN: ScratchSize: 2052
390+
define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
391+
call void @use_stack0()
392+
call void @use_stack0()
393+
call void @use_stack1()
394+
call void @use_stack1()
395+
call void @external()
396+
call void @external()
397+
ret void
398+
}
399+
377400
; GCN-LABEL: {{^}}usage_external:
378401
; GCN: .set usage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
379402
; GCN: .set usage_external.num_agpr, max(0, amdgpu.max_num_agpr)

0 commit comments

Comments
 (0)