Skip to content

Commit 78347c9

Browse files
committed
[AMDGPU] Prevent VGPR copies from moving across the EXEC mask definitions
Differential Revision: https://reviews.llvm.org/D63731 Reviewers: qcolombet, rampitec llvm-svn: 369532
1 parent 1c18a9c commit 78347c9

File tree

5 files changed

+38
-2
lines changed

5 files changed

+38
-2
lines changed

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,22 @@ class MachineInstr
427427
return getNumExplicitDefs() + MCID->getNumImplicitDefs();
428428
}
429429

430+
/// Returns true if the instruction has implicit definition.
431+
bool hasImplicitDef() const {
432+
for (unsigned I = getNumExplicitOperands(), E = getNumOperands();
433+
I != E; ++I) {
434+
const MachineOperand &MO = getOperand(I);
435+
if (MO.isDef() && MO.isImplicit())
436+
return true;
437+
}
438+
return false;
439+
}
440+
441+
/// Returns the implicit operands number.
442+
unsigned getNumImplicitOperands() const {
443+
return getNumOperands() - getNumExplicitOperands();
444+
}
445+
430446
/// Return true if operand \p OpIdx is a subregister index.
431447
bool isOperandSubregIdx(unsigned OpIdx) const {
432448
assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&

llvm/lib/CodeGen/PeepholeOptimizer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1808,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
18081808
assert(Def->isCopy() && "Invalid definition");
18091809
// Copy instruction are supposed to be: Def = Src.
18101810
// If someone breaks this assumption, bad things will happen everywhere.
1811-
assert(Def->getNumOperands() == 2 && "Invalid number of operands");
1811+
// There may be implicit uses preventing the copy to be moved across
1812+
// some target specific register definitions
1813+
assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
1814+
"Invalid number of operands");
1815+
assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
18121816

18131817
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
18141818
// If we look for a different subreg, it means we want a subreg of src.

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,13 @@ void SIFoldOperands::foldOperand(
612612
return;
613613

614614
UseMI->setDesc(TII->get(MovOp));
615+
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
616+
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
617+
while (ImpOpI != ImpOpE) {
618+
MachineInstr::mop_iterator Tmp = ImpOpI;
619+
ImpOpI++;
620+
UseMI->RemoveOperand(UseMI->getOperandNo(Tmp));
621+
}
615622
CopiesToReplace.push_back(UseMI);
616623
} else {
617624
if (UseMI->isCopy() && OpToFold.isReg() &&

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4200,6 +4200,15 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
42004200
// Try to eliminate the copy if it is copying an immediate value.
42014201
if (Def->isMoveImmediate())
42024202
FoldImmediate(*Copy, *Def, OpReg, &MRI);
4203+
4204+
bool ImpDef = Def->isImplicitDef();
4205+
while (!ImpDef && Def && Def->isCopy()) {
4206+
Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
4207+
ImpDef = Def && Def->isImplicitDef();
4208+
}
4209+
if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
4210+
!ImpDef)
4211+
Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
42034212
}
42044213

42054214
// Emit the actual waterfall loop, executing the wrapped instruction for each

llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,7 @@ body: |
882882
---
883883

884884
# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
885-
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec
885+
# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
886886
# GCN-NEXT: S_ENDPGM 0, implicit %3
887887

888888
name: constant_fold_lshl_or_reg0_immreg_immreg

0 commit comments

Comments
 (0)