Skip to content

RegAlloc: Fix verifier error after failed allocation #119690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions llvm/lib/CodeGen/RegAllocBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
Matrix = &mat;
MRI->freezeReservedRegs();
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
FailedVRegs.clear();
}

// Visit all the live registers. If they are already assigned to a physical
Expand Down Expand Up @@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {

// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
FailedVRegs.insert(VirtReg->reg());
} else if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);

Expand Down Expand Up @@ -161,6 +163,40 @@ void RegAllocBase::postOptimization() {
DeadRemats.clear();
}

void RegAllocBase::cleanupFailedVRegs() {
SmallSet<Register, 8> JunkRegs;

for (Register FailedReg : FailedVRegs) {
JunkRegs.insert(FailedReg);

MCRegister PhysReg = VRM->getPhys(FailedReg);
LiveInterval &FailedInterval = LIS->getInterval(FailedReg);

// The liveness information for the failed register and anything interfering
// with the physical register we arbitrarily chose is junk and needs to be
// deleted.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
for (const LiveInterval *InterferingReg : Q.interferingVRegs())
JunkRegs.insert(InterferingReg->reg());
}
}

// TODO: Probably need to set undef on any physreg uses not associated with
// a virtual register.
for (Register JunkReg : JunkRegs) {
// We still should produce valid IR. Kill all the uses and reduce the live
// ranges so that we don't think it's possible to introduce kill flags
// later which will fail the verifier.
for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
if (MO.readsReg())
MO.setIsUndef(true);
}

LIS->shrinkToUses(&LIS->getInterval(JunkReg));
}
}

void RegAllocBase::enqueue(const LiveInterval *LI) {
const Register Reg = LI->reg();

Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/RegAllocBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H

#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegAllocCommon.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
Expand Down Expand Up @@ -81,6 +82,7 @@ class RegAllocBase {
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;

SmallSet<Register, 2> FailedVRegs;
RegAllocBase(const RegAllocFilterFunc F = nullptr)
: shouldAllocateRegisterImpl(F) {}

Expand All @@ -104,6 +106,10 @@ class RegAllocBase {
// rematerialization.
virtual void postOptimization();

/// Perform cleanups on registers that failed to allocate. This hacks on the
/// liveness in order to avoid spurious verifier errors in later passes.
void cleanupFailedVRegs();

// Get a temporary reference to a Spiller instance.
virtual Spiller &spiller() = 0;

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/RegAllocBasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {

allocatePhysRegs();
postOptimization();
cleanupFailedVRegs();

// Diagnostic output before rewriting
LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/RegAllocGreedy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2795,6 +2795,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
if (VerifyEnabled)
MF->verify(this, "Before post optimization", &errs());
postOptimization();
cleanupFailedVRegs();
reportStats();

releaseMemory();
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

...

# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3
# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3
# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3

---
name: foo
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0
# CHECK-NEXT: }
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
Expand Down Expand Up @@ -118,10 +118,10 @@ body: |
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0
# CHECK-NEXT: }
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AMDGPU/issue48473.mir
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67

# CHECK-LABEL: name: issue48473
# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
# CHECK: S_NOP 0, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67


---
name: issue48473
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s

; FIXME: Should pass verifier after failure.
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s

declare <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32, i32, <32 x i32>, i32 immarg, i32 immarg, i32 immarg)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s

# FIXME: We should not produce a verifier error after erroring
# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -filetype=null 2>&1 | FileCheck -check-prefix=ERR %s

# ERR: error: inline assembly requires more registers than available
# VERIFIER: *** Bad machine code: Using an undefined physical register ***
# ERR-NOT: Bad machine code

# This testcase cannot be compiled with the enforced register
# budget. Previously, tryLastChanceRecoloring would assert here. It
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s

# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err
# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err

# This testcase must fail register allocation. It should also not
# produce a verifier error after doing so. Previously, it would not
# properly update the liveness for the dummy selected register. As a
# result, VirtRegRewriter would incorrectly add kill flags which
# combined with other uses of the physical register produced a
# verifier error.

# ERR: error: <unknown>:0:0: ran out of registers during register allocation

# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3

# GREEDY: dead renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE
# GREEDY: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE
# GREEDY: S_NOP 0, implicit undef renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
# GREEDY: S_NOP 0, implicit killed renamable $vgpr20_vgpr21


# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
# BASIC: SI_SPILL_V256_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
# BASIC: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5)
# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE
# BASIC: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE
# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE
# BASIC: S_NOP 0, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
# BASIC: renamable $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE

--- |
define void @killed_reg_after_regalloc_failure() #0 {
ret void
}

attributes #0 = { "amdgpu-waves-per-eu"="10,10" }

...
---
name: killed_reg_after_regalloc_failure
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128
S_NOP 0, implicit-def %3:vreg_64
S_NOP 0, implicit %0, implicit %1, implicit %2
S_NOP 0, implicit %3
S_ENDPGM 0

...
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s

; ERR: error: inline assembly requires more registers than available
; ERR-NOT: ERROR
; ERR-NOT: Bad machine code

; This test requires respecting undef on the spill source operand when
; expanding the pseudos to avoid all verifier errors

%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> }

define void @foo(<32 x i32> addrspace(1)* %arg) #0 {
%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
%asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"()
%vgpr0 = extractvalue %asm.output %asm, 0
%vgpr1 = extractvalue %asm.output %asm, 1
%vgpr2 = extractvalue %asm.output %asm, 2
%vgpr3 = extractvalue %asm.output %asm, 3
%vgpr4 = extractvalue %asm.output %asm, 4
call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"()
call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0)
call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1)
call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2)
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3)
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4)
call void asm sideeffect "; use $0","{a1}"(i32 %agpr0)
ret void
}

attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s

; This testcase fails register allocation at the same time it performs
; virtual register splitting (by introducing VGPR to AGPR copies). We
Expand All @@ -11,7 +11,6 @@
; it takes the first avialable register.

; CHECK: error: <unknown>:0:0: ran out of registers during register allocation
; CHECK: Bad machine code: Using an undefined physical register
define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) #0 {
%agpr0 = call float asm sideeffect "; def $0", "=${a0}"()
%agpr.vec = insertelement <16 x float> undef, float %agpr0, i32 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/inline-asm-assertion.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: not llc -verify-machineinstrs -O0 < %s 2>&1 | FileCheck %s
; RUN: not --crash llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2
; RUN: not llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2
; CHECK: error: inline assembly requires more registers than available
; CHECK: .size main, .Lfunc_end0-main
; CHECK-O2: error: inline assembly requires more registers than available
Expand Down
Loading