Skip to content

Commit 6670c73

Browse files
committed
[AMDGPU][Waterfall] Put exec save in the loop header
Add a dedicated loop header for waterfall, where the current exec is saved. The extra block will get cleaned up in the branch-folder, but having a dedicated block for exec setup code will ensure that scheduler will not re-order it with the rest of the code. This patch effectively works around a problem in si-wqm pass, which inserts extra exec operations that otherwise could interfere with waterfall exec setup. Change-Id: Iada97dd47cb3bf3869c8d2347cc3337c251f3d45
1 parent 6418bf6 commit 6670c73

File tree

4 files changed

+245
-235
lines changed

4 files changed

+245
-235
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaterfall.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -605,34 +605,38 @@ bool SIInsertWaterfall::processWaterfall(MachineBasicBlock &MBB) {
605605

606606
// EXEC mask handling
607607
Register Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
608-
unsigned SaveExecOpc =
609-
ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
608+
unsigned SaveExecOpc = ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
609+
: AMDGPU::S_AND_SAVEEXEC_B64;
610610
unsigned XorTermOpc =
611-
ST->isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
612-
unsigned MovOpc =
613-
ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
611+
ST->isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
612+
unsigned MovOpc = ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
614613
const auto *BoolXExecRC = RI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
615614

616-
Register SaveExec = MRI->createVirtualRegister(BoolXExecRC);
617-
Register TmpExec = MRI->createVirtualRegister(BoolXExecRC);
618-
619-
BuildMI(*CurrMBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);
620-
621-
// Save the EXEC mask
622-
BuildMI(*CurrMBB, I, DL, TII->get(MovOpc), SaveExec)
623-
.addReg(Exec);
624-
615+
MachineBasicBlock &LoopHeaderBB = *MF.CreateMachineBasicBlock();
625616
MachineBasicBlock &LoopBB = *MF.CreateMachineBasicBlock();
626617
MachineBasicBlock &RemainderBB = *MF.CreateMachineBasicBlock();
627618
MachineFunction::iterator MBBI(*CurrMBB);
628619
++MBBI;
629620

621+
MF.insert(MBBI, &LoopHeaderBB);
630622
MF.insert(MBBI, &LoopBB);
631623
MF.insert(MBBI, &RemainderBB);
632624

625+
LoopHeaderBB.addSuccessor(&LoopBB);
633626
LoopBB.addSuccessor(&LoopBB);
634627
LoopBB.addSuccessor(&RemainderBB);
635628

629+
Register SaveExec = MRI->createVirtualRegister(BoolXExecRC);
630+
Register TmpExec = MRI->createVirtualRegister(BoolXExecRC);
631+
632+
// Put TmpExec and SaveExec in the loop header.
633+
MachineBasicBlock::iterator LH = LoopHeaderBB.begin();
634+
BuildMI(LoopHeaderBB, LH, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
635+
TmpExec);
636+
637+
// Save the EXEC mask
638+
BuildMI(LoopHeaderBB, LH, DL, TII->get(MovOpc), SaveExec).addReg(Exec);
639+
636640
// Move all instructions from the SI_WATERFALL_BEGIN to the last
637641
// SI_WATERFALL_END or last use tagged from SI_WATERFALL_LAST_USE
638642
// into the new LoopBB
@@ -651,7 +655,7 @@ bool SIInsertWaterfall::processWaterfall(MachineBasicBlock &MBB) {
651655
MachineBasicBlock::iterator E(Item.Final);
652656
++E;
653657

654-
CurrMBB->addSuccessor(&LoopBB);
658+
CurrMBB->addSuccessor(&LoopHeaderBB);
655659

656660
MachineBasicBlock::iterator J = LoopBB.begin();
657661

@@ -663,7 +667,7 @@ bool SIInsertWaterfall::processWaterfall(MachineBasicBlock &MBB) {
663667

664668
BuildMI(LoopBB, J, DL, TII->get(TargetOpcode::PHI), PhiExec)
665669
.addReg(TmpExec)
666-
.addMBB(CurrMBB)
670+
.addMBB(&LoopHeaderBB)
667671
.addReg(NewExec)
668672
.addMBB(&LoopBB);
669673

0 commit comments

Comments
 (0)