Skip to content

Commit 96b5455

Browse files
cdevadaszhang2amd
authored andcommitted
[AMDGPU] RA inserted scalar instructions can be at the BB top (llvm#72140)
We adjust the insertion point at the BB top for spills/copies during RA to ensure they are placed after the exec restore instructions required for the divergent control flow execution. This is, however, required only for the vector operations. The insertions for scalar registers can still go to the BB top. Change-Id: I0ee60b84c53c73d65d8bc9b6fdfc0bcb1e86c4fe
1 parent 12fa283 commit 96b5455

File tree

9 files changed

+324
-20
lines changed

9 files changed

+324
-20
lines changed

llvm/include/llvm/CodeGen/MachineBasicBlock.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -824,8 +824,10 @@ class MachineBasicBlock
824824

825825
/// Return the first instruction in MBB after I that is not a PHI, label or
826826
/// debug. This is the correct point to insert copies at the beginning of a
827-
/// basic block.
828-
iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp = true);
827+
/// basic block. \p Reg is the register being used by a spill or defined for a
828+
/// restore/split during register allocation.
829+
iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg = Register(),
830+
bool SkipPseudoOp = true);
829831

830832
/// Returns an iterator to the first terminator instruction of this basic
831833
/// block. If a terminator does not exist, it returns end().

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1940,8 +1940,10 @@ class TargetInstrInfo : public MCInstrInfo {
19401940

19411941
/// True if the instruction is bound to the top of its basic block and no
19421942
/// other instructions shall be inserted before it. This can be implemented
1943-
/// to prevent register allocator to insert spills before such instructions.
1944-
virtual bool isBasicBlockPrologue(const MachineInstr &MI) const {
1943+
/// to prevent register allocator to insert spills for \p Reg before such
1944+
/// instructions.
1945+
virtual bool isBasicBlockPrologue(const MachineInstr &MI,
1946+
Register Reg = Register()) const {
19451947
return false;
19461948
}
19471949

llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,8 @@ class StatepointState {
461461

462462
if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) {
463463
RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad);
464-
auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin());
464+
auto EHPadInsertPoint =
465+
EHPad->SkipPHIsLabelsAndDebug(EHPad->begin(), Reg);
465466
insertReloadBefore(Reg, EHPadInsertPoint, EHPad);
466467
LLVM_DEBUG(dbgs() << "...also reload at EHPad "
467468
<< printMBBReference(*EHPad) << "\n");

llvm/lib/CodeGen/InlineSpiller.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
424424
MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
425425
MachineBasicBlock::iterator MII;
426426
if (SrcVNI->isPHIDef())
427-
MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
427+
MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin(), SrcReg);
428428
else {
429429
MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
430430
assert(DefMI && "Defining instruction disappeared");

llvm/lib/CodeGen/MachineBasicBlock.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
221221

222222
MachineBasicBlock::iterator
223223
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I,
224-
bool SkipPseudoOp) {
224+
Register Reg, bool SkipPseudoOp) {
225225
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
226226

227227
iterator E = end();
228228
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||
229229
(SkipPseudoOp && I->isPseudoProbe()) ||
230-
TII->isBasicBlockPrologue(*I)))
230+
TII->isBasicBlockPrologue(*I, Reg)))
231231
++I;
232232
// FIXME: This needs to change if we wish to bundle labels / dbg_values
233233
// inside the bundle.

llvm/lib/CodeGen/SplitKit.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,8 +795,10 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
795795
return Start;
796796
}
797797

798-
VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
799-
MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
798+
unsigned RegIdx = 0;
799+
Register Reg = LIS.getInterval(Edit->get(RegIdx)).reg();
800+
VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB,
801+
MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg));
800802
RegAssign.insert(Start, VNI->def, OpenIdx);
801803
LLVM_DEBUG(dump());
802804
return VNI->def;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7987,16 +7987,25 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
79877987
return ArrayRef(TargetFlags);
79887988
}
79897989

7990-
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
7990+
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
7991+
Register Reg) const {
79917992
// We need to handle instructions which may be inserted during register
79927993
// allocation to handle the prolog. The initial prolog instruction may have
79937994
// been separated from the start of the block by spills and copies inserted
7994-
// needed by the prolog.
7995-
uint16_t Opc = MI.getOpcode();
7995+
// needed by the prolog. However, the insertions for scalar registers can
7996+
// always be placed at the BB top as they are independent of the exec mask
7997+
// value.
7998+
bool IsNullOrVectorRegister = true;
7999+
if (Reg) {
8000+
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8001+
IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
8002+
}
79968003

8004+
uint16_t Opc = MI.getOpcode();
79978005
// FIXME: Copies inserted in the block prolog for live-range split should also
79988006
// be included.
7999-
return (isSpillOpcode(Opc) || (!MI.isTerminator() && !MI.isCopy() &&
8007+
return IsNullOrVectorRegister &&
8008+
(isSpillOpcode(Opc) || (!MI.isTerminator() && !MI.isCopy() &&
80008009
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
80018010
}
80028011

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1132,7 +1132,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
11321132
CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
11331133
const ScheduleDAGMI *DAG) const override;
11341134

1135-
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
1135+
bool isBasicBlockPrologue(const MachineInstr &MI,
1136+
Register Reg = Register()) const override;
11361137

11371138
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
11381139
MachineBasicBlock::iterator InsPt,

0 commit comments

Comments
 (0)