Skip to content

Commit 3d7d691

Browse files
jhuber6yuxuanchen1997
authored andcommitted
[AMDGPU] Fix resource analysis crash on alias-to-alias function (#99034)
Summary: Previously this code only looked through a single level of aliases to find the underlying function. This patch changes it to continue until it finds the end. Aliases that form a cycle are illegal IR, so we shouldn't need to worry about infinite loops. Fixes #96812 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251684
1 parent 98618b9 commit 3d7d691

File tree

2 files changed

+60
-3
lines changed

2 files changed

+60
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,10 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
6565
assert(Op.getImm() == 0);
6666
return nullptr;
6767
}
68-
if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
69-
return cast<Function>(GA->getOperand(0));
70-
return cast<Function>(Op.getGlobal());
68+
const GlobalValue *GV = Op.getGlobal();
69+
while (auto *GA = dyn_cast<GlobalAlias>(GV))
70+
GV = cast<GlobalValue>(GA->getOperand(0));
71+
return cast<Function>(GV);
7172
}
7273

7374
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
@foo_a = alias void (ptr), ptr @foo
5+
@bar_a = alias void (ptr), ptr @foo_a
6+
7+
define void @foo() {
8+
; CHECK-LABEL: foo:
9+
; CHECK: ; %bb.0: ; %entry
10+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; CHECK-NEXT: s_setpc_b64 s[30:31]
12+
entry:
13+
ret void
14+
}
15+
16+
define void @bar() {
17+
; CHECK-LABEL: bar:
18+
; CHECK: ; %bb.0: ; %entry
19+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20+
; CHECK-NEXT: s_mov_b32 s16, s33
21+
; CHECK-NEXT: s_mov_b32 s33, s32
22+
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
23+
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
24+
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
25+
; CHECK-NEXT: s_waitcnt expcnt(0)
26+
; CHECK-NEXT: v_writelane_b32 v40, s16, 2
27+
; CHECK-NEXT: s_addk_i32 s32, 0x400
28+
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
29+
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
30+
; CHECK-NEXT: s_getpc_b64 s[16:17]
31+
; CHECK-NEXT: s_add_u32 s16, s16, bar_a@gotpcrel32@lo+4
32+
; CHECK-NEXT: s_addc_u32 s17, s17, bar_a@gotpcrel32@hi+12
33+
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
34+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
35+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
36+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
37+
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
38+
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
39+
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
40+
; CHECK-NEXT: v_readlane_b32 s4, v40, 2
41+
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
42+
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
43+
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
44+
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
45+
; CHECK-NEXT: s_mov_b32 s33, s4
46+
; CHECK-NEXT: s_waitcnt vmcnt(0)
47+
; CHECK-NEXT: s_setpc_b64 s[30:31]
48+
entry:
49+
call void @bar_a(ptr null)
50+
ret void
51+
}
52+
53+
; UTC_ARGS: --disable
54+
; CHECK: .set foo_a, foo
55+
; CHECK: .set bar_a, foo_a
56+
; UTC_ARGS: --enable

0 commit comments

Comments
 (0)