Skip to content

Commit f9cd789

Browse files
authored
[AMDGPU] Add pseudo instructions for SGPR spill to VGPR (#69923)
For a future patch, is it important to keep the lowered SGPR spills to be recognized as spill instructions during regalloc. Directly lowering them into V_WRITELANE/V_READLANE won't allow us to attach the SPILL flag to their instructions. This patch introduces the pseudo instructions with the SGPRSpill flag set in their Desc. They will get lowered to equivalent instructions later during post RA pseudo expansion.
1 parent 9bcb30d commit f9cd789

34 files changed

+852
-821
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ class PrologEpilogSGPRSpillBuilder {
274274
Register SubReg = NumSubRegs == 1
275275
? SuperReg
276276
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
277-
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR)
277+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
278+
Spill[I].VGPR)
278279
.addReg(SubReg)
279280
.addImm(Spill[I].Lane)
280281
.addReg(Spill[I].VGPR, RegState::Undef);
@@ -319,7 +320,7 @@ class PrologEpilogSGPRSpillBuilder {
319320
Register SubReg = NumSubRegs == 1
320321
? SuperReg
321322
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
322-
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
323+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
323324
.addReg(Spill[I].VGPR)
324325
.addImm(Spill[I].Lane);
325326
}
@@ -1554,12 +1555,10 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
15541555
// TODO: Handle this elsewhere at an early point. Walking through all MBBs
15551556
// here would be a bad heuristic. A better way should be by calling
15561557
// allocateWWMSpill during the regalloc pipeline whenever a physical
1557-
// register is allocated for the intended virtual registers. That will
1558-
// also help excluding the general use of WRITELANE/READLANE intrinsics
1559-
// that won't really need any such special handling.
1560-
if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32)
1558+
// register is allocated for the intended virtual registers.
1559+
if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
15611560
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
1562-
else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
1561+
else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
15631562
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
15641563
else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
15651564
NeedExecCopyReservedReg = true;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,6 +2111,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21112111
MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32));
21122112
break;
21132113

2114+
case AMDGPU::SI_SPILL_S32_TO_VGPR:
2115+
MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
2116+
break;
2117+
2118+
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2119+
MI.setDesc(get(AMDGPU::V_READLANE_B32));
2120+
break;
2121+
21142122
case AMDGPU::V_MOV_B64_PSEUDO: {
21152123
Register Dst = MI.getOperand(0).getReg();
21162124
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -4014,7 +4022,9 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
40144022
// However, executing them with EXEC = 0 causes them to operate on undefined
40154023
// data, which we avoid by returning true here.
40164024
if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4017-
Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
4025+
Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4026+
Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4027+
Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
40184028
return true;
40194029

40204030
return false;
@@ -4408,7 +4418,9 @@ static bool shouldReadExec(const MachineInstr &MI) {
44084418
if (SIInstrInfo::isVALU(MI)) {
44094419
switch (MI.getOpcode()) {
44104420
case AMDGPU::V_READLANE_B32:
4421+
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
44114422
case AMDGPU::V_WRITELANE_B32:
4423+
case AMDGPU::SI_SPILL_S32_TO_VGPR:
44124424
return false;
44134425
}
44144426

@@ -9071,7 +9083,9 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
90719083
return InstructionUniformity::NeverUniform;
90729084

90739085
unsigned opcode = MI.getOpcode();
9074-
if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
9086+
if (opcode == AMDGPU::V_READLANE_B32 ||
9087+
opcode == AMDGPU::V_READFIRSTLANE_B32 ||
9088+
opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
90759089
return InstructionUniformity::AlwaysUniform;
90769090

90779091
if (isCopyInstr(MI)) {

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,28 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
875875
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
876876
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
877877

878+
let SGPRSpill = 1, VALU = 1, isConvergent = 1 in {
879+
def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst),
880+
(ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> {
881+
let Size = 4;
882+
let FixedSize = 1;
883+
let IsNeverUniform = 1;
884+
let hasSideEffects = 0;
885+
let mayLoad = 0;
886+
let mayStore = 0;
887+
let Constraints = "$vdst = $vdst_in";
888+
}
889+
890+
def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst),
891+
(ins VGPR_32:$src0, i32imm:$src1)> {
892+
let Size = 4;
893+
let FixedSize = 1;
894+
let hasSideEffects = 0;
895+
let mayLoad = 0;
896+
let mayStore = 0;
897+
}
898+
} // End SGPRSpill = 1, VALU = 1, isConvergent = 1
899+
878900
// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
879901
// needs to be used and an extra instruction to move between VGPR and AGPR.
880902
// UsesTmp adds to the total size of an expanded spill in this case.

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,7 +1769,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
17691769
// Mark the "old value of vgpr" input undef only if this is the first sgpr
17701770
// spill to this specific vgpr in the first basic block.
17711771
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1772-
SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
1772+
SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
17731773
.addReg(SubReg, getKillRegState(UseKill))
17741774
.addImm(Spill.Lane)
17751775
.addReg(Spill.VGPR);
@@ -1815,8 +1815,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
18151815
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
18161816

18171817
MachineInstrBuilder WriteLane =
1818-
BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1819-
SB.TmpVGPR)
1818+
BuildMI(*SB.MBB, MI, SB.DL,
1819+
SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
18201820
.addReg(SubReg, SubKillState)
18211821
.addImm(i % PVD.PerVGPR)
18221822
.addReg(SB.TmpVGPR, TmpVGPRFlags);
@@ -1877,8 +1877,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
18771877
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
18781878

18791879
SpilledReg Spill = VGPRSpills[i];
1880-
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1881-
SubReg)
1880+
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1881+
SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
18821882
.addReg(Spill.VGPR)
18831883
.addImm(Spill.Lane);
18841884
if (SB.NumSubRegs > 1 && i == 0)
@@ -1911,7 +1911,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
19111911

19121912
bool LastSubReg = (i + 1 == e);
19131913
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1914-
SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1914+
SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
19151915
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
19161916
.addImm(i);
19171917
if (SB.NumSubRegs > 1 && i == 0)

llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ body: |
6060
; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec
6161
; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec
6262
; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
63-
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
64-
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
63+
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
64+
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
6565
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
6666
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
6767
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
@@ -124,8 +124,8 @@ body: |
124124
125125
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
126126
renamable $sgpr16_sgpr17 = IMPLICIT_DEF
127-
$vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
128-
$vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
127+
$vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
128+
$vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
129129
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
130130
%8:vreg_64 = nofpexcept V_FMA_F64_e64 0, %7, 0, %6, 0, %5, 0, 0, implicit $mode, implicit $exec
131131
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32

llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
2222
; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec
2323
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
2424
; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = S_XOR_B64 renamable $sgpr4_sgpr5, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
25-
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
26-
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7
25+
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
26+
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7
2727
; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
2828
; REGALLOC-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5
2929
; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
@@ -34,13 +34,13 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
3434
; REGALLOC-NEXT: {{ $}}
3535
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
3636
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
37-
; REGALLOC-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5
38-
; REGALLOC-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
37+
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
38+
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
3939
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
4040
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
4141
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
42-
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
43-
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5
42+
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
43+
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5
4444
; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
4545
; REGALLOC-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
4646
; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
@@ -67,8 +67,8 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
6767
; REGALLOC-NEXT: bb.4.bb.3:
6868
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
6969
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
70-
; REGALLOC-NEXT: $sgpr4 = V_READLANE_B32 $vgpr1, 2, implicit-def $sgpr4_sgpr5
71-
; REGALLOC-NEXT: $sgpr5 = V_READLANE_B32 $vgpr1, 3
70+
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
71+
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
7272
; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
7373
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 5
7474
; REGALLOC-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 killed $vgpr0, killed $sgpr4, implicit $exec

llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ body: |
1919
; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
2020
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
2121
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
22-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr42, 0, $vgpr0
23-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr43, 1, $vgpr0
24-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr46, 2, $vgpr0
25-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr47, 3, $vgpr0
22+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0
23+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0
24+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 2, $vgpr0
25+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 3, $vgpr0
2626
; CHECK-NEXT: S_NOP 0
2727
; CHECK-NEXT: {{ $}}
2828
; CHECK-NEXT: bb.1:

llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ body: |
2626
; GCN: liveins: $sgpr4, $vgpr2_vgpr3
2727
; GCN-NEXT: {{ $}}
2828
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
29-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
29+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
3030
; GCN-NEXT: S_NOP 0
31-
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
31+
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
3232
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
3333
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
3434
; GCN-NEXT: KILL killed renamable $vgpr0
@@ -77,9 +77,9 @@ body: |
7777
; GCN-NEXT: successors: %bb.3(0x80000000)
7878
; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
7979
; GCN-NEXT: {{ $}}
80-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr6, 0, killed $vgpr0
80+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
8181
; GCN-NEXT: S_NOP 0
82-
; GCN-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 0
82+
; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
8383
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
8484
; GCN-NEXT: S_BRANCH %bb.3
8585
; GCN-NEXT: {{ $}}
@@ -143,9 +143,9 @@ body: |
143143
; GCN-NEXT: successors: %bb.2(0x80000000)
144144
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
145145
; GCN-NEXT: {{ $}}
146-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
146+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
147147
; GCN-NEXT: S_NOP 0
148-
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
148+
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
149149
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
150150
; GCN-NEXT: S_BRANCH %bb.2
151151
; GCN-NEXT: {{ $}}
@@ -245,9 +245,9 @@ body: |
245245
; GCN-NEXT: bb.1:
246246
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
247247
; GCN-NEXT: {{ $}}
248-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
248+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
249249
; GCN-NEXT: S_NOP 0
250-
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
250+
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
251251
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
252252
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
253253
; GCN-NEXT: KILL killed renamable $vgpr0

llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ body: |
3636
; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
3737
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
3838
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
39-
; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
39+
; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
4040
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
4141
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
4242
; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
@@ -50,7 +50,7 @@ body: |
5050
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
5151
; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
5252
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
53-
; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
53+
; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
5454
; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
5555
; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
5656
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)

0 commit comments

Comments
 (0)