Skip to content

Commit 93220e7

Browse files
authored
RegAllocGreedy: Fix use after free during last chance recoloring (#120697)
Last chance recoloring can delete the current fixed interval during recursive assignment of interfering live intervals. Check if the virtual register value was assigned before attempting the unassignment, as is done in other scenarios. This relies on the fact that we do not recycle virtual register numbers. I have only seen this occur in error situations where the allocation will fail, but I think this can theoretically happen in working allocations. This feels very brute force, but I've spent over a week debugging this and this is what works without any lit regressions. The surprising piece to me was that unspillable live ranges may be spilled, and a number of tests rely on optimizations occurring on them. My other attempts to fixed this mostly revolved around not identifying unspillable live ranges as snippet copies. I've also discovered we're making some unproductive live range splits with subranges. If we avoid such splits, some of the unspillable copies disappear but mandating that be precise to fix a use after free doesn't sound right.
1 parent 10fb5d2 commit 93220e7

File tree

3 files changed

+309
-2
lines changed

3 files changed

+309
-2
lines changed

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2035,6 +2035,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
20352035
// available colors.
20362036
Matrix->assign(VirtReg, PhysReg);
20372037

2038+
// VirtReg may be deleted during tryRecoloringCandidates, save a copy.
2039+
Register ThisVirtReg = VirtReg.reg();
2040+
20382041
// Save the current recoloring state.
20392042
// If we cannot recolor all the interferences, we will have to start again
20402043
// at this point for the next physical register.
@@ -2046,8 +2049,16 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
20462049
NewVRegs.push_back(NewVReg);
20472050
// Do not mess up with the global assignment process.
20482051
// I.e., VirtReg must be unassigned.
2049-
Matrix->unassign(VirtReg);
2050-
return PhysReg;
2052+
if (VRM->hasPhys(ThisVirtReg)) {
2053+
Matrix->unassign(VirtReg);
2054+
return PhysReg;
2055+
}
2056+
2057+
// It is possible VirtReg will be deleted during tryRecoloringCandidates.
2058+
LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register "
2059+
<< printReg(ThisVirtReg) << '\n');
2060+
FixedRegisters.erase(ThisVirtReg);
2061+
return 0;
20512062
}
20522063

20532064
LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -simplify-mir -start-before=greedy,2 -stress-regalloc=4 -stop-before=virtregrewriter,2 -o - -verify-regalloc %s 2> %t.err | FileCheck %s
2+
# RUN: FileCheck -check-prefix=ERR %s < %t.err
3+
4+
# To allocate the vreg_512_align2, the allocation will attempt to
5+
# inflate the register class to av_512_align2. This will ultimately
6+
# not work, and the allocation will fail. There is an unproductive
7+
# live range split, and we end up with a snippet copy of an
8+
# unspillable register. Recursive assignment of interfering ranges
9+
# during last chance recoloring would delete the unspillable snippet
10+
# live range. Make sure there's no use after free when rolling back
11+
# the last chance assignment.
12+
13+
# ERR: error: <unknown>:0:0: ran out of registers during register allocation in function 'inflated_reg_class_copy_use_after_free'
14+
# ERR: error: <unknown>:0:0: ran out of registers during register allocation in function 'inflated_reg_class_copy_use_after_free_lane_subset'
15+
16+
--- |
17+
define amdgpu_kernel void @inflated_reg_class_copy_use_after_free() {
18+
ret void
19+
}
20+
21+
define amdgpu_kernel void @inflated_reg_class_copy_use_after_free_lane_subset() {
22+
ret void
23+
}
24+
25+
...
26+
27+
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
28+
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
29+
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
30+
# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
31+
# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
32+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
33+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
34+
# CHECK-NEXT: }
35+
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
36+
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
37+
# CHECK-NEXT: }
38+
# CHECK-NEXT: undef [[SPLIT2:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT1]].sub2_sub3 {
39+
# CHECK-NEXT: internal [[SPLIT2]].sub0:av_512_align2 = COPY [[SPLIT1]].sub0
40+
# CHECK-NEXT: }
41+
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[SPLIT2]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5)
42+
# CHECK-NEXT: [[RESTORE1:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
43+
# CHECK-NEXT: undef [[SPLIT3:%[0-9]+]].sub0_sub1:av_512_align2 = COPY [[RESTORE1]].sub0_sub1
44+
# CHECK-NEXT: [[RESTORE2:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5)
45+
# CHECK-NEXT: undef [[SPLIT3:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[RESTORE2]].sub2_sub3 {
46+
# CHECK-NEXT: internal [[SPLIT3]].sub0:av_512_align2 = COPY [[RESTORE2]].sub0
47+
# CHECK-NEXT: }
48+
# CHECK-NEXT: undef [[SPLIT4:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT3]].sub2_sub3 {
49+
# CHECK-NEXT: internal [[SPLIT4]].sub0:av_512_align2 = COPY [[SPLIT3]].sub0
50+
# CHECK-NEXT: }
51+
# CHECK-NEXT: [[SPLIT5:%[0-9]+]].sub2:av_512_align2 = COPY [[SPLIT4]].sub3
52+
# CHECK-NEXT: undef [[SPLIT6:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT5]].sub0_sub1_sub2
53+
# CHECK-NEXT: undef [[SPLIT7:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT6]].sub0_sub1_sub2
54+
# CHECK-NEXT: undef [[SPLIT8:%[0-9]+]].sub0:av_512_align2 = COPY [[SPLIT4]].sub0 {
55+
# CHECK-NEXT: internal [[SPLIT8]].sub2:av_512_align2 = COPY [[SPLIT4]].sub2
56+
# CHECK-NEXT: }
57+
# CHECK-NEXT: [[SPLIT9:%[0-9]+]].sub3:av_512_align2 = COPY [[SPLIT8]].sub2
58+
# CHECK-NEXT: undef [[SPLIT10:%[0-9]+]].sub0_sub1_sub2_sub3:av_512_align2 = COPY [[SPLIT9]].sub0_sub1_sub2_sub3
59+
# CHECK-NEXT: undef [[SPLIT13:%[0-9]+]].sub0_sub1_sub2_sub3:vreg_512_align2 = COPY [[SPLIT10]].sub0_sub1_sub2_sub3
60+
# CHECK-NEXT: [[MFMA_USE1:%[0-9]+]].sub4:vreg_512_align2 = COPY [[SPLIT8]].sub0
61+
# CHECK-NEXT: [[MFMA_USE1]].sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
62+
# CHECK-NEXT: [[MFMA_USE1]].sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
63+
# CHECK-NEXT: [[MFMA_USE1]].sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
64+
# CHECK-NEXT: [[MFMA_USE1]].sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
65+
# CHECK-NEXT: [[MFMA_USE1]].sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
66+
# CHECK-NEXT: [[MFMA_USE1]].sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
67+
# CHECK-NEXT: [[MFMA_USE1]].sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
68+
# CHECK-NEXT: [[MFMA_USE1]].sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
69+
# CHECK-NEXT: [[MFMA_USE1]].sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
70+
# CHECK-NEXT: [[MFMA_USE1]].sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
71+
# CHECK-NEXT: [[MFMA_USE1]].sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
72+
# CHECK-NEXT: [[MFMA_USE1]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_mac_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[MFMA_USE1]], 0, 0, 0, implicit $mode, implicit $exec
73+
74+
---
75+
name: inflated_reg_class_copy_use_after_free
76+
tracksRegLiveness: true
77+
machineFunctionInfo:
78+
isEntryFunction: true
79+
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
80+
stackPtrOffsetReg: '$sgpr32'
81+
occupancy: 7
82+
vgprForAGPRCopy: '$vgpr255'
83+
sgprForEXECCopy: '$sgpr74_sgpr75'
84+
body: |
85+
bb.0:
86+
liveins: $vgpr0, $sgpr4_sgpr5
87+
88+
%0:vgpr_32 = IMPLICIT_DEF
89+
renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed undef renamable $sgpr4_sgpr5, 0, 0 :: (load (s64), addrspace 4)
90+
S_NOP 0, implicit-def undef %1.sub12_sub13_sub14_sub15:vreg_512_align2
91+
S_NOP 0, implicit-def %1.sub8_sub9_sub10_sub11:vreg_512_align2
92+
S_NOP 0, implicit-def %1.sub4_sub5_sub6_sub7:vreg_512_align2
93+
S_NOP 0, implicit-def %1.sub0_sub1_sub2_sub3:vreg_512_align2
94+
early-clobber %2:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, %1, 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
95+
%1.sub2:vreg_512_align2 = COPY %2.sub3
96+
%1.sub3:vreg_512_align2 = COPY %2.sub2
97+
%1.sub4:vreg_512_align2 = COPY %2.sub0
98+
%1.sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
99+
%1.sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
100+
%1.sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
101+
%1.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
102+
%1.sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
103+
%1.sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
104+
%1.sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
105+
%1.sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
106+
%1.sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
107+
%1.sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
108+
%1.sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
109+
%1:vreg_512_align2 = V_MFMA_F32_16X16X1F32_mac_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, %1, 0, 0, 0, implicit $mode, implicit $exec
110+
GLOBAL_STORE_DWORDX4_SADDR undef %3:vgpr_32, %1.sub12_sub13_sub14_sub15, undef renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), addrspace 1)
111+
S_ENDPGM 0
112+
113+
...
114+
115+
# This test is similar to except it is still broken when the use
116+
# instruction does not read the full set of lanes after one attempted fix.
117+
118+
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
119+
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
120+
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
121+
# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
122+
# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
123+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
124+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
125+
# CHECK-NEXT: }
126+
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
127+
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
128+
# CHECK-NEXT: }
129+
# CHECK-NEXT: undef [[SPLIT2:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT1]].sub2_sub3 {
130+
# CHECK-NEXT: internal [[SPLIT2]].sub0:av_512_align2 = COPY [[SPLIT1]].sub0
131+
# CHECK-NEXT: }
132+
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[SPLIT2]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5)
133+
# CHECK-NEXT: [[RESTORE_1:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
134+
# CHECK-NEXT: undef [[SPLIT3:%[0-9]+]].sub0_sub1:av_512_align2 = COPY [[RESTORE_1]].sub0_sub1
135+
# CHECK-NEXT: [[RESTORE_2:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5)
136+
# CHECK-NEXT: undef [[SPLIT4:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[RESTORE_2]].sub2_sub3 {
137+
# CHECK-NEXT: internal [[SPLIT4]].sub0:av_512_align2 = COPY [[RESTORE_2]].sub0
138+
# CHECK-NEXT: }
139+
# CHECK-NEXT: undef [[SPLIT5:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT4]].sub2_sub3 {
140+
# CHECK-NEXT: internal [[SPLIT5]].sub0:av_512_align2 = COPY [[SPLIT4]].sub0
141+
# CHECK-NEXT: }
142+
# CHECK-NEXT: [[SPLIT3]].sub2:av_512_align2 = COPY [[SPLIT5]].sub3
143+
# CHECK-NEXT: undef [[SPLIT6:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT3]].sub0_sub1_sub2
144+
# CHECK-NEXT: undef [[SPLIT7:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT6]].sub0_sub1_sub2
145+
# CHECK-NEXT: undef [[SPLIT8:%[0-9]+]].sub0:av_512_align2 = COPY [[SPLIT5]].sub0 {
146+
# CHECK-NEXT: internal [[SPLIT8]].sub2:av_512_align2 = COPY [[SPLIT5]].sub2
147+
# CHECK-NEXT: }
148+
# CHECK-NEXT: [[SPLIT7]].sub3:av_512_align2 = COPY [[SPLIT8]].sub2
149+
# CHECK-NEXT: undef [[SPLIT9:%[0-9]+]].sub0_sub1_sub2_sub3:av_512_align2 = COPY [[SPLIT7]].sub0_sub1_sub2_sub3
150+
# CHECK-NEXT: undef [[LAST_USE:%[0-9]+]].sub0_sub1_sub2_sub3:vreg_512_align2 = COPY [[SPLIT9]].sub0_sub1_sub2_sub3
151+
# CHECK-NEXT: [[LAST_USE]].sub4:vreg_512_align2 = COPY [[SPLIT8]].sub0
152+
# CHECK-NEXT: [[LAST_USE]].sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
153+
# CHECK-NEXT: [[LAST_USE]].sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
154+
# CHECK-NEXT: [[LAST_USE]].sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
155+
# CHECK-NEXT: [[LAST_USE]].sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
156+
# CHECK-NEXT: [[LAST_USE]].sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
157+
# CHECK-NEXT: [[LAST_USE]].sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
158+
# CHECK-NEXT: [[LAST_USE]].sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
159+
# CHECK-NEXT: [[LAST_USE]].sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
160+
# CHECK-NEXT: [[LAST_USE]].sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
161+
# CHECK-NEXT: [[LAST_USE]].sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
162+
# CHECK-NEXT: [[LAST_USE]].sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
163+
# CHECK-NEXT: S_NOP 0, implicit-def [[LAST_USE]], implicit [[LAST_USE]].sub0_sub1_sub2_sub3, implicit [[LAST_USE]].sub4_sub5_sub6_sub7, implicit [[LAST_USE]].sub8_sub9_sub10_sub11
164+
165+
---
166+
name: inflated_reg_class_copy_use_after_free_lane_subset
167+
tracksRegLiveness: true
168+
machineFunctionInfo:
169+
isEntryFunction: true
170+
scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75'
171+
stackPtrOffsetReg: '$sgpr32'
172+
occupancy: 7
173+
vgprForAGPRCopy: '$vgpr255'
174+
sgprForEXECCopy: '$sgpr74_sgpr75'
175+
body: |
176+
bb.0:
177+
liveins: $vgpr0, $sgpr4_sgpr5
178+
179+
%0:vgpr_32 = IMPLICIT_DEF
180+
renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed undef renamable $sgpr4_sgpr5, 0, 0 :: (load (s64), addrspace 4)
181+
S_NOP 0, implicit-def undef %1.sub12_sub13_sub14_sub15:vreg_512_align2
182+
S_NOP 0, implicit-def %1.sub8_sub9_sub10_sub11:vreg_512_align2
183+
S_NOP 0, implicit-def %1.sub4_sub5_sub6_sub7:vreg_512_align2
184+
S_NOP 0, implicit-def %1.sub0_sub1_sub2_sub3:vreg_512_align2
185+
S_NOP 0, implicit-def early-clobber %2:vreg_512_align2, implicit %1.sub0_sub1_sub2_sub3, implicit %1.sub4_sub5_sub6_sub7
186+
%1.sub2:vreg_512_align2 = COPY %2.sub3
187+
%1.sub3:vreg_512_align2 = COPY %2.sub2
188+
%1.sub4:vreg_512_align2 = COPY %2.sub0
189+
%1.sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
190+
%1.sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
191+
%1.sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
192+
%1.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
193+
%1.sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
194+
%1.sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
195+
%1.sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
196+
%1.sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
197+
%1.sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
198+
%1.sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
199+
%1.sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec
200+
S_NOP 0, implicit-def %1:vreg_512_align2, implicit %1.sub0_sub1_sub2_sub3, implicit %1.sub4_sub5_sub6_sub7, implicit %1.sub8_sub9_sub10_sub11
201+
GLOBAL_STORE_DWORDX4_SADDR undef %3:vgpr_32, %1.sub12_sub13_sub14_sub15, undef renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), addrspace 1)
202+
S_ENDPGM 0
203+
204+
...

0 commit comments

Comments
 (0)