Skip to content

Commit a3d70da

Browse files
AMDGPU: update GFX11 wmma hazards
One V_NOP od unrelated VALU instruction in between is required for correctness when matrix A or B of current WMMA instruction overlaps with matrix D of previous WMMA instruction. Remaining cases of WMMA operand overlaps are handled by the hardware and do not require handling in hazard recognizer. Hardware may stall in cases where: - matrix C of current WMMA instruction overlaps with matrix D of previous WMMA instruction - VALU instruction reads matrix D of previous WMMA instruction - matrix A,B or C of WMMA instruction reads result of previous VALU instruction
1 parent 18af032 commit a3d70da

File tree

2 files changed

+35
-36
lines changed

2 files changed

+35
-36
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1706,8 +1706,8 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
17061706
if (!SIInstrInfo::isWMMA(I))
17071707
return false;
17081708

1709-
// Src0 or Src1 of the current wmma instruction overlaps with the dest of
1710-
// the previous wmma.
1709+
// Src0(matrix A) or Src1(matrix B) of the current wmma instruction overlaps
1710+
// with the dest(matrix D) of the previous wmma.
17111711
const Register CurSrc0Reg =
17121712
TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg();
17131713
const Register CurSrc1Reg =
@@ -1721,25 +1721,6 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
17211721
return true;
17221722
}
17231723

1724-
// Src2 of the current wmma instruction overlaps with the dest of the
1725-
// previous wmma.
1726-
const MachineOperand *Src2 =
1727-
TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
1728-
const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register();
1729-
1730-
if (CurSrc2Reg != AMDGPU::NoRegister &&
1731-
TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) {
1732-
1733-
const MachineOperand *Src2Mods =
1734-
TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
1735-
const bool NoSrc2Mods =
1736-
(Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0;
1737-
// Exception: there is no hazard if the wmma instructions are of the same
1738-
// type and there is no input modifier on src2 of the current instruction.
1739-
return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) ==
1740-
TII->pseudoToMCOpcode(MI->getOpcode())));
1741-
}
1742-
17431724
return false;
17441725
};
17451726

0 commit comments

Comments
 (0)