Skip to content

Commit ded07b8

Browse files
committed
Update [AMDGPU] Support block load/store for CSR llvm#130013
1 parent 833fdac commit ded07b8

File tree

7 files changed

+36
-53
lines changed

7 files changed

+36
-53
lines changed

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
476476
// Now that we know which registers need to be saved and restored, allocate
477477
// stack slots for them.
478478
for (auto &CS : CSI) {
479-
// If the target has spilled this register to another register, we don't
480-
// need to allocate a stack slot.
479+
// If the target has spilled this register to another register or already
480+
// handled it , we don't need to allocate a stack slot.
481481
if (CS.isSpilledToReg() || CS.isHandledByTarget())
482482
continue;
483483

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -182,28 +182,26 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
182182
return AsmPrinter::lowerConstant(CV);
183183
}
184184

185-
static void emitVGPRBlockComment(const MachineInstr *MI, MCStreamer &OS) {
185+
static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
186+
const TargetRegisterInfo *TRI,
187+
const SIMachineFunctionInfo *MFI,
188+
MCStreamer &OS) {
186189
// The instruction will only transfer a subset of the registers in the block,
187190
// based on the mask that is stored in m0. We could search for the instruction
188191
// that sets m0, but most of the time we'll already have the mask stored in
189192
// the machine function info. Try to use that. This assumes that we only use
190193
// block loads/stores for CSR spills.
191-
const MachineFunction *MF = MI->getParent()->getParent();
192-
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
193-
const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
194-
const SIInstrInfo *TII = MF->getSubtarget<GCNSubtarget>().getInstrInfo();
195-
196194
Register RegBlock =
197195
TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst
198196
: AMDGPU::OpName::vdata)
199197
->getReg();
200-
Register FirstRegInBlock = TRI.getSubReg(RegBlock, AMDGPU::sub0);
198+
Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0);
201199
uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock);
202200

203201
SmallString<512> TransferredRegs;
204202
for (unsigned I = 0; I < 32; ++I) {
205203
if (Mask & (1 << I)) {
206-
(llvm::Twine(" ") + TRI.getName(FirstRegInBlock + I))
204+
(llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I))
207205
.toVector(TransferredRegs);
208206
}
209207
}
@@ -300,9 +298,11 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
300298
return;
301299
}
302300

303-
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
304-
if (isVerbose())
305-
emitVGPRBlockComment(MI, *OutStreamer);
301+
if (isVerbose())
302+
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
303+
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
304+
MF->getInfo<SIMachineFunctionInfo>(),
305+
*OutStreamer);
306306

307307
MCInst TmpInst;
308308
MCInstLowering.lower(MI, TmpInst);

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,7 +1894,6 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
18941894

18951895
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
18961896
const GCNSubtarget &ST,
1897-
const TargetRegisterInfo *TRI,
18981897
std::vector<CalleeSavedInfo> &CSI,
18991898
unsigned &MinCSFrameIndex,
19001899
unsigned &MaxCSFrameIndex) {
@@ -1921,8 +1920,8 @@ static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
19211920
if (!CanUseBlockOps(*CSIt))
19221921
continue;
19231922

1924-
// Find all the regs that will fit in a 32-bit block starting at the current
1925-
// reg and build the mask. It should have 1 for every register that's
1923+
// Find all the regs that will fit in a 32-bit mask starting at the current
1924+
// reg and build said mask. It should have 1 for every register that's
19261925
// included, with the current register as the least significant bit.
19271926
uint32_t Mask = 1;
19281927
CSEnd = std::remove_if(
@@ -1935,8 +1934,7 @@ static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,
19351934
}
19361935
});
19371936

1938-
const TargetRegisterClass *BlockRegClass =
1939-
TII->getRegClassForBlockOp(TRI, MF);
1937+
const TargetRegisterClass *BlockRegClass = &AMDGPU::VReg_1024RegClass;
19401938
Register RegBlock =
19411939
MRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
19421940
if (!RegBlock) {
@@ -1990,8 +1988,7 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
19901988
bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
19911989

19921990
if (UseVGPRBlocks)
1993-
assignSlotsUsingVGPRBlocks(MF, ST, TRI, CSI, MinCSFrameIndex,
1994-
MaxCSFrameIndex);
1991+
assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);
19951992

19961993
return assignCalleeSavedSpillSlots(MF, TRI, CSI);
19971994
}
@@ -2152,10 +2149,10 @@ bool SIFrameLowering::restoreCalleeSavedRegisters(
21522149
// VGPRs in the register block is reserved (e.g. if it's a WWM register),
21532150
// then the whole block will be marked as reserved and `updateLiveness` will
21542151
// skip it.
2155-
if (!MBB.isLiveIn(Reg))
2156-
MBB.addLiveIn(Reg);
2152+
MBB.addLiveIn(Reg);
21572153
}
21582154

2155+
MBB.sortUniqueLiveIns();
21592156
return false;
21602157
}
21612158

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5769,16 +5769,6 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
57695769
IsAllocatable);
57705770
}
57715771

5772-
const TargetRegisterClass *
5773-
SIInstrInfo::getRegClassForBlockOp(const TargetRegisterInfo *TRI,
5774-
const MachineFunction &MF) const {
5775-
const MCInstrDesc &ScratchStoreBlockOp =
5776-
get(AMDGPU::SCRATCH_STORE_BLOCK_SADDR);
5777-
int VDataIdx = AMDGPU::getNamedOperandIdx(ScratchStoreBlockOp.getOpcode(),
5778-
AMDGPU::OpName::vdata);
5779-
return getRegClass(ScratchStoreBlockOp, VDataIdx, TRI, MF);
5780-
}
5781-
57825772
const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
57835773
unsigned OpNo) const {
57845774
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,10 +1441,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
14411441
const MachineFunction &MF)
14421442
const override;
14431443

1444-
const TargetRegisterClass *
1445-
getRegClassForBlockOp(const TargetRegisterInfo *TRI,
1446-
const MachineFunction &MF) const;
1447-
14481444
void fixImplicitOperands(MachineInstr &MI) const;
14491445

14501446
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,

llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ body: |
180180
bb.0:
181181
liveins: $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
182182
; W32-LABEL: name: other_regs
183-
; W32: liveins: $sgpr42, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
183+
; W32: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
184184
; W32-NEXT: {{ $}}
185185
; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
186186
; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
@@ -189,10 +189,10 @@ body: |
189189
; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
190190
; W32-NEXT: $m0 = S_MOV_B32 9
191191
; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5)
192-
; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr44
193-
; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr42, implicit-def $m0, implicit-def $exec
192+
; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
193+
; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
194194
; W32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
195-
; W32-NEXT: $sgpr42 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
195+
; W32-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
196196
; W32-NEXT: $m0 = S_MOV_B32 9
197197
; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5)
198198
; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
@@ -203,7 +203,7 @@ body: |
203203
; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
204204
;
205205
; W64-LABEL: name: other_regs
206-
; W64: liveins: $sgpr42, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
206+
; W64: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
207207
; W64-NEXT: {{ $}}
208208
; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
209209
; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
@@ -212,10 +212,10 @@ body: |
212212
; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
213213
; W64-NEXT: $m0 = S_MOV_B32 9
214214
; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5)
215-
; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr44
216-
; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr42, implicit-def $m0, implicit-def $exec
215+
; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
216+
; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
217217
; W64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
218-
; W64-NEXT: $sgpr42 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
218+
; W64-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
219219
; W64-NEXT: $m0 = S_MOV_B32 9
220220
; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5)
221221
; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
@@ -224,7 +224,7 @@ body: |
224224
; W64-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
225225
; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
226226
; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
227-
S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr42, implicit-def $m0, implicit-def $exec
227+
S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
228228
S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
229229
230230
S_SETPC_B64_return $sgpr30_sgpr31

llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,24 @@ define i32 @non_entry_func(i32 %x) {
1515
; CHECK-NEXT: s_wait_alu 0xfffe
1616
; CHECK-NEXT: s_mov_b32 exec_lo, s0
1717
; CHECK-NEXT: s_mov_b32 m0, 0x110003
18-
; CHECK-NEXT: v_writelane_b32 v2, s40, 0
19-
; CHECK-NEXT: ; transferring at most VGPR40 VGPR41 VGPR56 VGPR60 ; 128-byte Folded Spill
18+
; CHECK-NEXT: v_writelane_b32 v2, s48, 0
19+
; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Spill
2020
; CHECK-NEXT: scratch_store_block off, v[40:71], s32 offset:4
2121
; CHECK-NEXT: s_mov_b32 m0, 1
2222
; CHECK-NEXT: v_mov_b32_e32 v1, v0
23-
; CHECK-NEXT: ; transferring at most VGPR120 ; 128-byte Folded Spill
23+
; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Spill
2424
; CHECK-NEXT: scratch_store_block off, v[120:151], s32
2525
; CHECK-NEXT: ;;#ASMSTART
2626
; CHECK-NEXT: s_nop
2727
; CHECK-NEXT: ;;#ASMEND
28-
; CHECK-NEXT: ; transferring at most VGPR120 ; 128-byte Folded Reload
28+
; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Reload
2929
; CHECK-NEXT: scratch_load_block v[120:151], off, s32
3030
; CHECK-NEXT: s_mov_b32 m0, 0x110003
3131
; CHECK-NEXT: scratch_store_b32 off, v1, s32 offset:88
32-
; CHECK-NEXT: ; transferring at most VGPR40 VGPR41 VGPR56 VGPR60 ; 128-byte Folded Reload
32+
; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Reload
3333
; CHECK-NEXT: scratch_load_block v[40:71], off, s32 offset:4
3434
; CHECK-NEXT: v_mov_b32_e32 v0, v1
35-
; CHECK-NEXT: v_readlane_b32 s40, v2, 0
35+
; CHECK-NEXT: v_readlane_b32 s48, v2, 0
3636
; CHECK-NEXT: s_xor_saveexec_b32 s0, -1
3737
; CHECK-NEXT: scratch_load_b32 v2, off, s32 offset:100 ; 4-byte Folded Reload
3838
; CHECK-NEXT: s_wait_alu 0xfffe
@@ -41,7 +41,7 @@ define i32 @non_entry_func(i32 %x) {
4141
; CHECK-NEXT: s_setpc_b64 s[30:31]
4242
%local = alloca i32, i32 3, addrspace(5)
4343
store i32 %x, ptr addrspace(5) %local
44-
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s40}"()
44+
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48}"()
4545
ret i32 %x
4646
}
4747

@@ -87,7 +87,7 @@ define amdgpu_kernel void @entry_func(i32 %x) {
8787
; DAGISEL-NEXT: v_mov_b32_e32 v0, s12
8888
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
8989
; DAGISEL-NEXT: s_endpgm
90-
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s40}"()
90+
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48}"()
9191
%res = call i32 @non_entry_func(i32 %x)
9292
ret void
9393
}

0 commit comments

Comments
 (0)