[AMDGPU] Support wwm-reg AV spill pseudos

cdevadas · cdevadas · commit 81827f8cfb22 · 2023-08-17T20:04:18.000+05:30
The wwm register spill pseudos are currently defined for VGPR_32 regclass. It causes a verifier error for gfx908 or above as the regalloc sometimes restores the values to the vector superclass AV_32. Fixing it by supporting AV wwm-spill pseudos as well. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D155646
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1589,11 +1589,15 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) {
   }
 }
 
-static unsigned getWWMRegSpillSaveOpcode(unsigned Size) {
+static unsigned getWWMRegSpillSaveOpcode(unsigned Size,
+                                         bool IsVectorSuperClass) {
   // Currently, there is only 32-bit WWM register spills needed.
   if (Size != 4)
     llvm_unreachable("unknown wwm register spill size");
 
+  if (IsVectorSuperClass)
+    return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
+
   return AMDGPU::SI_SPILL_WWM_V32_SAVE;
 }
 
@@ -1602,11 +1606,13 @@ static unsigned getVectorRegSpillSaveOpcode(Register Reg,
                                             unsigned Size,
                                             const SIRegisterInfo &TRI,
                                             const SIMachineFunctionInfo &MFI) {
+  bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+
   // Choose the right opcode if spilling a WWM register.
   if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
-    return getWWMRegSpillSaveOpcode(Size);
+    return getWWMRegSpillSaveOpcode(Size, IsVectorSuperClass);
 
-  if (TRI.isVectorSuperClass(RC))
+  if (IsVectorSuperClass)
     return getAVSpillSaveOpcode(Size);
 
   return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size)
@@ -1809,23 +1815,29 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) {
   }
 }
 
-static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) {
+static unsigned getWWMRegSpillRestoreOpcode(unsigned Size,
+                                            bool IsVectorSuperClass) {
   // Currently, there is only 32-bit WWM register spills needed.
   if (Size != 4)
     llvm_unreachable("unknown wwm register spill size");
 
+  if (IsVectorSuperClass)
+    return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
+
   return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
 }
 
 static unsigned
 getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
                                unsigned Size, const SIRegisterInfo &TRI,
                                const SIMachineFunctionInfo &MFI) {
+  bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+
   // Choose the right opcode if restoring a WWM register.
   if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
-    return getWWMRegSpillRestoreOpcode(Size);
+    return getWWMRegSpillRestoreOpcode(Size, IsVectorSuperClass);
 
-  if (TRI.isVectorSuperClass(RC))
+  if (IsVectorSuperClass)
     return getAVSpillRestoreOpcode(Size);
 
   return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -670,7 +670,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
 
   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
-           Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
+           Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
+           Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
+           Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
   }
 
   static bool isDPP(const MachineInstr &MI) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -952,8 +952,10 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
 defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
 defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;
 
-let isConvergent = 1 in
-defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;
+let isConvergent = 1 in {
+  defm SI_SPILL_WWM_V32  : SI_SPILL_VGPR <VGPR_32>;
+  defm SI_SPILL_WWM_AV32 : SI_SPILL_VGPR <AV_32, 1>;
+}
 
 def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
   (outs SReg_64:$dst),
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1065,6 +1065,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
   case AMDGPU::SI_SPILL_AV32_RESTORE:
   case AMDGPU::SI_SPILL_WWM_V32_SAVE:
   case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
+  case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
+  case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
     return 1;
   default: llvm_unreachable("Invalid spill opcode");
   }
@@ -2144,7 +2146,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_AV96_SAVE:
     case AMDGPU::SI_SPILL_AV64_SAVE:
     case AMDGPU::SI_SPILL_AV32_SAVE:
-    case AMDGPU::SI_SPILL_WWM_V32_SAVE: {
+    case AMDGPU::SI_SPILL_WWM_V32_SAVE:
+    case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -2211,7 +2214,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_AV384_RESTORE:
     case AMDGPU::SI_SPILL_AV512_RESTORE:
     case AMDGPU::SI_SPILL_AV1024_RESTORE:
-    case AMDGPU::SI_SPILL_WWM_V32_RESTORE: {
+    case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
+    case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir b/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir
@@ -1,12 +1,5 @@
-# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=greedy,1 -verify-machineinstrs --stress-regalloc=2 %s -o /dev/null 2>&1 | FileCheck %s
-
-# This test would fail as there is no wwm-register spill pseudo instructions supported for the vector superclass (AV).
-# Currently there is only VGPR_32 regclass spilling allowed for wwm-registers.
-
-# CHECK: Bad machine code: Illegal virtual register for instruction
-# CHECK: instruction: {{.*}}    [[AV_REG:%[0-9]+]]:av_32 = SI_SPILL_WWM_V32_RESTORE
-# CHECK-NEXT: - operand 0:   [[AV_REG]]:av_32
-# CHECK-NEXT: Expected a VGPR_32 register, but got a AV_32 register
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=greedy,1 -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN-REGALLOC %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=virtregrewriter,1 -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN-REWRITER %s
 
 name:            test_wwm_reg_superclass_spill
 tracksRegLiveness: true
@@ -19,6 +12,11 @@ machineFunctionInfo:
   sgprForEXECCopy:   '$sgpr100_sgpr101'
 body:             |
   bb.0:
+    ; GCN-REGALLOC-NUM-2: %{{[0-9]+}}:av_32 = SI_SPILL_WWM_AV32_RESTORE
+    ; GCN-REGALLOC: S_ENDPGM 0
+    ;
+    ; GCN-REWRITER-NUM-2: renamable $vgpr0 = SI_SPILL_WWM_AV32_RESTORE
+    ; GCN-REWRITER: S_ENDPGM 0
     $vgpr0 = IMPLICIT_DEF
     $sgpr0_sgpr1 = IMPLICIT_DEF
     %temp0:vgpr_32(s32) = COPY $vgpr0

Original file line number	Diff line number	Diff line change
`@@ -670,7 +670,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {`
`670`	`670`
`671`	`671`	`static bool isWWMRegSpillOpcode(uint16_t Opcode) {`
`672`	`672`	`return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE \|\|`
`673`		`- Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;`
	`673`	`+ Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE \|\|`
	`674`	`+ Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE \|\|`
	`675`	`+ Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;`
`674`	`676`	`}`
`675`	`677`
`676`	`678`	`static bool isDPP(const MachineInstr &MI) {`