Skip to content

Commit c8e70d9

Browse files
committed
Reapply "RegAlloc: Fix verifier error after failed allocation (#119690)"
This reverts commit 0c50054. Reapply with more fixes to avoid expensive_checks failures. Make sure to call splitSeparateComponents after shrinkToUses, and update the VirtRegMap with the split registers. Also set undef on all physical register aliases to the assigned register.
1 parent d93b454 commit c8e70d9

10 files changed

+165
-13
lines changed

llvm/lib/CodeGen/RegAllocBase.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
6565
Matrix = &mat;
6666
MRI->freezeReservedRegs();
6767
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
68+
FailedVRegs.clear();
6869
}
6970

7071
// Visit all the live registers. If they are already assigned to a physical
@@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {
128129

129130
// Keep going after reporting the error.
130131
VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
132+
FailedVRegs.insert(VirtReg->reg());
131133
} else if (AvailablePhysReg)
132134
Matrix->assign(*VirtReg, AvailablePhysReg);
133135

@@ -161,6 +163,59 @@ void RegAllocBase::postOptimization() {
161163
DeadRemats.clear();
162164
}
163165

166+
void RegAllocBase::cleanupFailedVRegs() {
167+
SmallSet<Register, 8> JunkRegs;
168+
169+
for (Register FailedReg : FailedVRegs) {
170+
JunkRegs.insert(FailedReg);
171+
172+
MCRegister PhysReg = VRM->getPhys(FailedReg);
173+
LiveInterval &FailedInterval = LIS->getInterval(FailedReg);
174+
175+
// The liveness information for the failed register and anything interfering
176+
// with the physical register we arbitrarily chose is junk and needs to be
177+
// deleted.
178+
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
179+
LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
180+
for (const LiveInterval *InterferingReg : Q.interferingVRegs())
181+
JunkRegs.insert(InterferingReg->reg());
182+
}
183+
184+
// The liveness of the assigned physical register is also now unreliable.
185+
for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
186+
++Aliases) {
187+
for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
188+
if (MO.readsReg())
189+
MO.setIsUndef(true);
190+
}
191+
}
192+
}
193+
194+
for (Register JunkReg : JunkRegs) {
195+
MCRegister PhysReg = VRM->getPhys(JunkReg);
196+
// We still should produce valid IR. Kill all the uses and reduce the live
197+
// ranges so that we don't think it's possible to introduce kill flags
198+
// later which will fail the verifier.
199+
for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
200+
if (MO.readsReg())
201+
MO.setIsUndef(true);
202+
}
203+
204+
LiveInterval &JunkLI = LIS->getInterval(JunkReg);
205+
if (LIS->shrinkToUses(&JunkLI)) {
206+
SmallVector<LiveInterval *, 8> SplitLIs;
207+
LIS->splitSeparateComponents(JunkLI, SplitLIs);
208+
209+
VRM->grow();
210+
Register Original = VRM->getOriginal(JunkReg);
211+
for (LiveInterval *SplitLI : SplitLIs) {
212+
VRM->setIsSplitFromReg(SplitLI->reg(), Original);
213+
VRM->assignVirt2Phys(SplitLI->reg(), PhysReg);
214+
}
215+
}
216+
}
217+
}
218+
164219
void RegAllocBase::enqueue(const LiveInterval *LI) {
165220
const Register Reg = LI->reg();
166221

llvm/lib/CodeGen/RegAllocBase.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
3838

3939
#include "llvm/ADT/SmallPtrSet.h"
40+
#include "llvm/ADT/SmallSet.h"
4041
#include "llvm/CodeGen/MachineRegisterInfo.h"
4142
#include "llvm/CodeGen/RegAllocCommon.h"
4243
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -81,6 +82,7 @@ class RegAllocBase {
8182
/// always available for the remat of all the siblings of the original reg.
8283
SmallPtrSet<MachineInstr *, 32> DeadRemats;
8384

85+
SmallSet<Register, 2> FailedVRegs;
8486
RegAllocBase(const RegAllocFilterFunc F = nullptr)
8587
: shouldAllocateRegisterImpl(F) {}
8688

@@ -104,6 +106,10 @@ class RegAllocBase {
104106
// rematerialization.
105107
virtual void postOptimization();
106108

109+
/// Perform cleanups on registers that failed to allocate. This hacks on the
110+
/// liveness in order to avoid spurious verifier errors in later passes.
111+
void cleanupFailedVRegs();
112+
107113
// Get a temporary reference to a Spiller instance.
108114
virtual Spiller &spiller() = 0;
109115

llvm/lib/CodeGen/RegAllocBasic.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
329329

330330
allocatePhysRegs();
331331
postOptimization();
332+
cleanupFailedVRegs();
332333

333334
// Diagnostic output before rewriting
334335
LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,6 +2795,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
27952795
if (VerifyEnabled)
27962796
MF->verify(this, "Before post optimization", &errs());
27972797
postOptimization();
2798+
cleanupFailedVRegs();
27982799
reportStats();
27992800

28002801
releaseMemory();

llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
ret void
1414
}
1515

16-
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
16+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
1717

1818
...
1919

20-
# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
21-
# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3
20+
# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3
21+
# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
2222

2323
---
2424
name: foo

llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
2828
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
2929
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
30-
# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
31-
# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
32-
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
33-
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
30+
# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
31+
# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
32+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 {
33+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0
3434
# CHECK-NEXT: }
3535
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
3636
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
@@ -118,10 +118,10 @@ body: |
118118
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
119119
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
120120
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
121-
# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
122-
# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
123-
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
124-
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
121+
# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
122+
# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7
123+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 {
124+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0
125125
# CHECK-NEXT: }
126126
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
127127
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0

llvm/test/CodeGen/AMDGPU/issue48473.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
4444

4545
# CHECK-LABEL: name: issue48473
46-
# CHECK: S_NOP 0, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
46+
# CHECK: S_NOP 0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
4747

4848
---
4949
name: issue48473
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s
2+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s
3+
4+
# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err
5+
# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err
6+
7+
# This testcase must fail register allocation. It should also not
8+
# produce a verifier error after doing so. Previously, it would not
9+
# properly update the liveness for the dummy selected register. As a
10+
# result, VirtRegRewriter would incorrectly add kill flags which
11+
# combined with other uses of the physical register produced a
12+
# verifier error.
13+
14+
# ERR: error: <unknown>:0:0: ran out of registers during register allocation
15+
16+
# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
17+
# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
18+
# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
19+
20+
# GREEDY: dead $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE
21+
# GREEDY: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE
22+
# GREEDY: S_NOP 0, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
23+
# GREEDY: S_NOP 0, implicit killed undef $vgpr20_vgpr21
24+
25+
26+
# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
27+
# BASIC: SI_SPILL_V256_SAVE killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
28+
# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
29+
# BASIC: SI_SPILL_V64_SAVE killed undef $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5)
30+
# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE
31+
# BASIC: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE
32+
# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE
33+
# BASIC: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
34+
# BASIC: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE
35+
36+
--- |
37+
define void @killed_reg_after_regalloc_failure() #0 {
38+
ret void
39+
}
40+
41+
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
42+
43+
...
44+
---
45+
name: killed_reg_after_regalloc_failure
46+
tracksRegLiveness: true
47+
machineFunctionInfo:
48+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
49+
frameOffsetReg: '$sgpr33'
50+
stackPtrOffsetReg: '$sgpr32'
51+
body: |
52+
bb.0:
53+
S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128
54+
S_NOP 0, implicit-def %3:vreg_64
55+
S_NOP 0, implicit %0, implicit %1, implicit %2
56+
S_NOP 0, implicit %3
57+
S_ENDPGM 0
58+
59+
...
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s
2+
3+
; ERR: error: inline assembly requires more registers than available
4+
; ERR-NOT: ERROR
5+
; ERR-NOT: Bad machine code
6+
7+
; This test requires respecting undef on the spill source operand when
8+
; expanding the pseudos to avoid all verifier errors
9+
10+
%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> }
11+
12+
define void @foo(<32 x i32> addrspace(1)* %arg) #0 {
13+
%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
14+
%asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"()
15+
%vgpr0 = extractvalue %asm.output %asm, 0
16+
%vgpr1 = extractvalue %asm.output %asm, 1
17+
%vgpr2 = extractvalue %asm.output %asm, 2
18+
%vgpr3 = extractvalue %asm.output %asm, 3
19+
%vgpr4 = extractvalue %asm.output %asm, 4
20+
call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"()
21+
call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0)
22+
call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1)
23+
call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2)
24+
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3)
25+
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4)
26+
call void asm sideeffect "; use $0","{a1}"(i32 %agpr0)
27+
ret void
28+
}
29+
30+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }

llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
1+
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
22

33
; This testcase fails register allocation at the same time it performs
44
; virtual register splitting (by introducing VGPR to AGPR copies). We

0 commit comments

Comments
 (0)