Skip to content

Commit 5567f46

Browse files
cdevadaszhang2amd
authored andcommitted
[AMDGPU] Add pseudo instructions for SGPR spill to VGPR (llvm#69923)
For a future patch, is it important to keep the lowered SGPR spills to be recognized as spill instructions during regalloc. Directly lowering them into V_WRITELANE/V_READLANE won't allow us to attach the SPILL flag to their instructions. This patch introduces the pseudo instructions with the SGPRSpill flag set in their Desc. They will get lowered to equivalent instructions later during post RA pseudo expansion. Change-Id: I3f98352642089e5d2ecae3b5fe7a75b61a917862
1 parent 6fbcfa7 commit 5567f46

33 files changed

+877
-827
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,8 @@ class PrologEpilogSGPRSpillBuilder {
361361
Register SubReg = NumSubRegs == 1
362362
? SuperReg
363363
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
364-
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR)
364+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
365+
Spill[I].VGPR)
365366
.addReg(SubReg)
366367
.addImm(Spill[I].Lane)
367368
.addReg(Spill[I].VGPR, RegState::Undef);
@@ -444,7 +445,7 @@ class PrologEpilogSGPRSpillBuilder {
444445
Register SubReg = NumSubRegs == 1
445446
? SuperReg
446447
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
447-
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
448+
BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
448449
.addReg(Spill[I].VGPR)
449450
.addImm(Spill[I].Lane);
450451
}
@@ -1817,12 +1818,10 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
18171818
// TODO: Handle this elsewhere at an early point. Walking through all MBBs
18181819
// here would be a bad heuristic. A better way should be by calling
18191820
// allocateWWMSpill during the regalloc pipeline whenever a physical
1820-
// register is allocated for the intended virtual registers. That will
1821-
// also help excluding the general use of WRITELANE/READLANE intrinsics
1822-
// that won't really need any such special handling.
1823-
if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32)
1821+
// register is allocated for the intended virtual registers.
1822+
if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
18241823
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
1825-
else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
1824+
else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
18261825
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
18271826
else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
18281827
NeedExecCopyReservedReg = true;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2046,6 +2046,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
20462046
MI.setDesc(get(AMDGPU::S_AND_B32));
20472047
break;
20482048

2049+
case AMDGPU::SI_SPILL_S32_TO_VGPR:
2050+
MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
2051+
break;
2052+
2053+
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2054+
MI.setDesc(get(AMDGPU::V_READLANE_B32));
2055+
break;
2056+
20492057
case AMDGPU::V_MOV_B64_PSEUDO: {
20502058
Register Dst = MI.getOperand(0).getReg();
20512059
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -3807,7 +3815,9 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
38073815
// However, executing them with EXEC = 0 causes them to operate on undefined
38083816
// data, which we avoid by returning true here.
38093817
if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
3810-
Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
3818+
Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
3819+
Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
3820+
Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
38113821
return true;
38123822

38133823
return false;
@@ -4188,7 +4198,9 @@ static bool shouldReadExec(const MachineInstr &MI) {
41884198
if (SIInstrInfo::isVALU(MI)) {
41894199
switch (MI.getOpcode()) {
41904200
case AMDGPU::V_READLANE_B32:
4201+
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
41914202
case AMDGPU::V_WRITELANE_B32:
4203+
case AMDGPU::SI_SPILL_S32_TO_VGPR:
41924204
return false;
41934205
}
41944206

@@ -8635,7 +8647,9 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
86358647
return InstructionUniformity::NeverUniform;
86368648

86378649
unsigned opcode = MI.getOpcode();
8638-
if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
8650+
if (opcode == AMDGPU::V_READLANE_B32 ||
8651+
opcode == AMDGPU::V_READFIRSTLANE_B32 ||
8652+
opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
86398653
return InstructionUniformity::AlwaysUniform;
86408654

86418655
if (MI.isCopy()) {

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,30 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
840840
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
841841
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
842842

843+
let SGPRSpill = 1, VALU = 1, isConvergent = 1 in {
844+
def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst),
845+
(ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> {
846+
let Size = 4;
847+
let FixedSize = 1;
848+
let IsNeverUniform = 1;
849+
let hasSideEffects = 0;
850+
let mayLoad = 0;
851+
let mayStore = 0;
852+
let VALU = 1;
853+
let Constraints = "$vdst = $vdst_in";
854+
}
855+
856+
def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst),
857+
(ins VGPR_32:$src0, i32imm:$src1)> {
858+
let Size = 4;
859+
let FixedSize = 1;
860+
let hasSideEffects = 0;
861+
let mayLoad = 0;
862+
let mayStore = 0;
863+
let VALU = 1;
864+
}
865+
} // End SGPRSpill = 1, VALU = 1, isConvergent = 1
866+
843867
// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
844868
// needs to be used and an extra instruction to move between VGPR and AGPR.
845869
// UsesTmp adds to the total size of an expanded spill in this case.

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,7 +1809,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
18091809
// Mark the "old value of vgpr" input undef only if this is the first sgpr
18101810
// spill to this specific vgpr in the first basic block.
18111811
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1812-
SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
1812+
SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
18131813
.addReg(SubReg, getKillRegState(UseKill))
18141814
.addImm(Spill.Lane)
18151815
.addReg(Spill.VGPR);
@@ -1871,8 +1871,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
18711871
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
18721872

18731873
MachineInstrBuilder WriteLane =
1874-
BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1875-
SB.TmpVGPR)
1874+
BuildMI(*SB.MBB, MI, SB.DL,
1875+
SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
18761876
.addReg(SubReg, SubKillState)
18771877
.addImm(i % PVD.PerVGPR)
18781878
.addReg(SB.TmpVGPR, TmpVGPRFlags);
@@ -1945,8 +1945,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
19451945
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
19461946

19471947
SpilledReg Spill = VGPRSpills[i];
1948-
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1949-
SubReg)
1948+
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1949+
SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
19501950
.addReg(Spill.VGPR)
19511951
.addImm(Spill.Lane);
19521952
if (SB.NumSubRegs > 1 && i == 0)
@@ -1979,7 +1979,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
19791979

19801980
bool LastSubReg = (i + 1 == e);
19811981
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1982-
SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1982+
SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
19831983
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
19841984
.addImm(i);
19851985
if (SB.NumSubRegs > 1 && i == 0)

llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -505,8 +505,8 @@ body: |
505505
; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec
506506
; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec
507507
; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
508-
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
509-
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
508+
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
509+
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
510510
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
511511
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
512512
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
@@ -569,8 +569,8 @@ body: |
569569
570570
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
571571
renamable $sgpr16_sgpr17 = IMPLICIT_DEF
572-
$vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
573-
$vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
572+
$vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
573+
$vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
574574
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
575575
%8:vreg_64 = nofpexcept V_FMA_F64_e64 0, %7, 0, %6, 0, %5, 0, 0, implicit $mode, implicit $exec
576576
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32

llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ body: |
2323
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
2424
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0_lo16, 0
2525
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
26-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr42, 0, $vgpr0
26+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0
2727
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4a, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x00
28-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr43, 1, $vgpr0
28+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0
2929
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4b, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x04
30-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr46, 2, $vgpr0
30+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 2, $vgpr0
3131
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4e, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x08
32-
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr47, 3, $vgpr0
32+
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 3, $vgpr0
3333
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x4f, 0x05, 0x90, 0x80, 0x14, 0xe4, 0x0c
3434
; CHECK-NEXT: S_NOP 0
3535
; CHECK-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ body: |
2626
; GCN: liveins: $sgpr4, $vgpr2_vgpr3
2727
; GCN-NEXT: {{ $}}
2828
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
29-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
29+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
3030
; GCN-NEXT: S_NOP 0
31-
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
31+
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
3232
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
3333
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
3434
; GCN-NEXT: KILL killed renamable $vgpr0
@@ -77,9 +77,9 @@ body: |
7777
; GCN-NEXT: successors: %bb.3(0x80000000)
7878
; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
7979
; GCN-NEXT: {{ $}}
80-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr6, 0, killed $vgpr0
80+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
8181
; GCN-NEXT: S_NOP 0
82-
; GCN-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 0
82+
; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
8383
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
8484
; GCN-NEXT: S_BRANCH %bb.3
8585
; GCN-NEXT: {{ $}}
@@ -143,9 +143,9 @@ body: |
143143
; GCN-NEXT: successors: %bb.2(0x80000000)
144144
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
145145
; GCN-NEXT: {{ $}}
146-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
146+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
147147
; GCN-NEXT: S_NOP 0
148-
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
148+
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
149149
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
150150
; GCN-NEXT: S_BRANCH %bb.2
151151
; GCN-NEXT: {{ $}}
@@ -245,9 +245,9 @@ body: |
245245
; GCN-NEXT: bb.1:
246246
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
247247
; GCN-NEXT: {{ $}}
248-
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
248+
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
249249
; GCN-NEXT: S_NOP 0
250-
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
250+
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
251251
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
252252
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
253253
; GCN-NEXT: KILL killed renamable $vgpr0

llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ body: |
6666
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
6767
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832
6868
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
69-
; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
69+
; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
7070
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00
7171
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16
7272
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
@@ -82,7 +82,7 @@ body: |
8282
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
8383
; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
8484
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
85-
; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
85+
; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
8686
; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
8787
; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
8888
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)

llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ body: |
6262
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
6363
; MUBUF-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832
6464
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
65-
; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
65+
; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
6666
; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00
6767
; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33_lo16
6868
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
@@ -72,7 +72,7 @@ body: |
7272
; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
7373
; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
7474
; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
75-
; MUBUF-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
75+
; MUBUF-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
7676
; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
7777
; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
7878
; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
@@ -81,6 +81,7 @@ body: |
8181
; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32_lo16
8282
; MUBUF-NEXT: $sgpr33 = COPY $sgpr4
8383
; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc
84+
;
8485
; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
8586
; FLATSCR: liveins: $vgpr1, $vgpr2
8687
; FLATSCR-NEXT: {{ $}}
@@ -121,7 +122,7 @@ body: |
121122
; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
122123
; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2_lo16, 1048832
123124
; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
124-
; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
125+
; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
125126
; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x10, 0x41, 0x05, 0x90, 0x82, 0x14, 0xe4, 0x00
126127
; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe1
127128
; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
@@ -132,7 +133,7 @@ body: |
132133
; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
133134
; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
134135
; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -16384, implicit-def $scc
135-
; FLATSCR-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
136+
; FLATSCR-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
136137
; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
137138
; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
138139
; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)

0 commit comments

Comments
 (0)