Skip to content

Commit 42cfb9f

Browse files
committed
Revert "Revert "[AMDGPU] RA inserted scalar instructions can be at the BB top (llvm#72140)""
This reverts commit 675ba5e. Change-Id: I19b8d08af0dbea3d08d3ff310f90cad11fb80351
1 parent 208cfa0 commit 42cfb9f

21 files changed

+781
-652
lines changed

llvm/include/llvm/CodeGen/MachineBasicBlock.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -846,8 +846,10 @@ class MachineBasicBlock
846846

847847
/// Return the first instruction in MBB after I that is not a PHI, label or
848848
/// debug. This is the correct point to insert copies at the beginning of a
849-
/// basic block.
850-
iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp = true);
849+
/// basic block. \p Reg is the register being used by a spill or defined for a
850+
/// restore/split during register allocation.
851+
iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg = Register(),
852+
bool SkipPseudoOp = true);
851853

852854
/// Returns an iterator to the first terminator instruction of this basic
853855
/// block. If a terminator does not exist, it returns end().

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,8 +2013,10 @@ class TargetInstrInfo : public MCInstrInfo {
20132013

20142014
/// True if the instruction is bound to the top of its basic block and no
20152015
/// other instructions shall be inserted before it. This can be implemented
2016-
/// to prevent register allocator to insert spills before such instructions.
2017-
virtual bool isBasicBlockPrologue(const MachineInstr &MI) const {
2016+
/// to prevent register allocator to insert spills for \p Reg before such
2017+
/// instructions.
2018+
virtual bool isBasicBlockPrologue(const MachineInstr &MI,
2019+
Register Reg = Register()) const {
20182020
return false;
20192021
}
20202022

llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,8 @@ class StatepointState {
461461

462462
if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) {
463463
RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad);
464-
auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin());
464+
auto EHPadInsertPoint =
465+
EHPad->SkipPHIsLabelsAndDebug(EHPad->begin(), Reg);
465466
insertReloadBefore(Reg, EHPadInsertPoint, EHPad);
466467
LLVM_DEBUG(dbgs() << "...also reload at EHPad "
467468
<< printMBBReference(*EHPad) << "\n");

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
463463
MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
464464
MachineBasicBlock::iterator MII;
465465
if (SrcVNI->isPHIDef())
466-
MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
466+
MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin(), SrcReg);
467467
else {
468468
MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
469469
assert(DefMI && "Defining instruction disappeared");

llvm/lib/CodeGen/MachineBasicBlock.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,13 +223,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
223223

224224
MachineBasicBlock::iterator
225225
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I,
226-
bool SkipPseudoOp) {
226+
Register Reg, bool SkipPseudoOp) {
227227
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
228228

229229
iterator E = end();
230230
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||
231231
(SkipPseudoOp && I->isPseudoProbe()) ||
232-
TII->isBasicBlockPrologue(*I)))
232+
TII->isBasicBlockPrologue(*I, Reg)))
233233
++I;
234234
// FIXME: This needs to change if we wish to bundle labels / dbg_values
235235
// inside the bundle.

llvm/lib/CodeGen/SplitKit.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -807,8 +807,10 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
807807
return Start;
808808
}
809809

810-
VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
811-
MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
810+
unsigned RegIdx = 0;
811+
Register Reg = LIS.getInterval(Edit->get(RegIdx)).reg();
812+
VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB,
813+
MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg));
812814
RegAssign.insert(Start, VNI->def, OpenIdx);
813815
LLVM_DEBUG(dump());
814816
return VNI->def;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8922,16 +8922,25 @@ unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
89228922
return AMDGPU::COPY;
89238923
}
89248924

8925-
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
8925+
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
8926+
Register Reg) const {
89268927
// We need to handle instructions which may be inserted during register
89278928
// allocation to handle the prolog. The initial prolog instruction may have
89288929
// been separated from the start of the block by spills and copies inserted
8929-
// needed by the prolog.
8930-
uint16_t Opc = MI.getOpcode();
8930+
// needed by the prolog. However, the insertions for scalar registers can
8931+
// always be placed at the BB top as they are independent of the exec mask
8932+
// value.
8933+
bool IsNullOrVectorRegister = true;
8934+
if (Reg) {
8935+
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8936+
IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
8937+
}
89318938

8939+
uint16_t Opc = MI.getOpcode();
89328940
// FIXME: Copies inserted in the block prolog for live-range split should also
89338941
// be included.
8934-
return (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY &&
8942+
return IsNullOrVectorRegister &&
8943+
(isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY &&
89358944
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
89368945
}
89378946

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1291,7 +1291,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
12911291
unsigned getLiveRangeSplitOpcode(Register Reg,
12921292
const MachineFunction &MF) const override;
12931293

1294-
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
1294+
bool isBasicBlockPrologue(const MachineInstr &MI,
1295+
Register Reg = Register()) const override;
12951296

12961297
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
12971298
MachineBasicBlock::iterator InsPt,

llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
3232
; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
3333
; REGALLOC-NEXT: {{ $}}
3434
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
35-
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
3635
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
3736
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
3837
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
38+
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
3939
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
4040
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
4141
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
@@ -65,10 +65,10 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
6565
; REGALLOC-NEXT: {{ $}}
6666
; REGALLOC-NEXT: bb.4.bb.3:
6767
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
68-
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
6968
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
7069
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
7170
; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
71+
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
7272
; REGALLOC-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 killed $vgpr0, 2, $vgpr0, implicit $exec
7373
; REGALLOC-NEXT: KILL killed renamable $vgpr1
7474
; REGALLOC-NEXT: SI_RETURN implicit killed $vgpr0

0 commit comments

Comments
 (0)