Skip to content

Commit 45ec3a3

Browse files
author
Baptiste Saleil
committed
[PowerPC] Fix for excessive ACC copies due to PHI nodes
When using accumulators in loops, they are passed around in PHI nodes of unprimed accumulators, causing the generation of additional prime/unprime instructions. This patch detects these cases and changes these PHI nodes to primed accumulator PHI nodes. We also add IR and MIR test cases for several PHI node cases. Differential Revision: https://reviews.llvm.org/D91391
1 parent aa11556 commit 45ec3a3

File tree

3 files changed

+1300
-0
lines changed

3 files changed

+1300
-0
lines changed

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,113 @@ void PPCMIPeephole::UpdateTOCSaves(
267267
TOCSaves[MI] = Keep;
268268
}
269269

270+
// This function returns a list of all PHI nodes in the tree starting from
271+
// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
272+
// The list initially only contains the root PHI. When we visit a PHI node, we
273+
// add it to the list. We continue to look for other PHI node operands while
274+
// there are nodes to visit in the list. The function returns false if the
275+
// optimization cannot be applied on this tree.
276+
static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
277+
MachineInstr *RootPHI,
278+
SmallVectorImpl<MachineInstr *> &PHIs) {
279+
PHIs.push_back(RootPHI);
280+
unsigned VisitedIndex = 0;
281+
while (VisitedIndex < PHIs.size()) {
282+
MachineInstr *VisitedPHI = PHIs[VisitedIndex];
283+
for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
284+
PHIOp != NumOps; PHIOp += 2) {
285+
Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
286+
if (!Register::isVirtualRegister(RegOp))
287+
return false;
288+
MachineInstr *Instr = MRI->getVRegDef(RegOp);
289+
// While collecting the PHI nodes, we check if they can be converted (i.e.
290+
// all the operands are either copies, implicit defs or PHI nodes).
291+
unsigned Opcode = Instr->getOpcode();
292+
if (Opcode == PPC::COPY) {
293+
Register Reg = Instr->getOperand(1).getReg();
294+
if (!Register::isVirtualRegister(Reg) ||
295+
MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
296+
return false;
297+
} else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
298+
return false;
299+
// If we detect a cycle in the PHI nodes, we exit. It would be
300+
// possible to change cycles as well, but that would add a lot
301+
// of complexity for a case that is unlikely to occur with MMA
302+
// code.
303+
if (Opcode != PPC::PHI)
304+
continue;
305+
if (std::find(PHIs.begin(), PHIs.end(), Instr) != PHIs.end())
306+
return false;
307+
PHIs.push_back(Instr);
308+
}
309+
VisitedIndex++;
310+
}
311+
return true;
312+
}
313+
314+
// This function changes the unprimed accumulator PHI nodes in the PHIs list to
315+
// primed accumulator PHI nodes. The list is traversed in reverse order to
316+
// change all the PHI operands of a PHI node before changing the node itself.
317+
// We keep a map to associate each changed PHI node to its non-changed form.
318+
static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
319+
MachineRegisterInfo *MRI,
320+
SmallVectorImpl<MachineInstr *> &PHIs,
321+
Register Dst) {
322+
DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
323+
for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) {
324+
MachineInstr *PHI = *It;
325+
SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
326+
// We check if the current PHI node can be changed by looking at its
327+
// operands. If all the operands are either copies from primed
328+
// accumulators, implicit definitions or other unprimed accumulator
329+
// PHI nodes, we change it.
330+
for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
331+
PHIOp += 2) {
332+
Register RegOp = PHI->getOperand(PHIOp).getReg();
333+
MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
334+
unsigned Opcode = PHIInput->getOpcode();
335+
assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
336+
Opcode == PPC::PHI) &&
337+
"Unexpected instruction");
338+
if (Opcode == PPC::COPY) {
339+
assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
340+
&PPC::ACCRCRegClass &&
341+
"Unexpected register class");
342+
PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
343+
} else if (Opcode == PPC::IMPLICIT_DEF) {
344+
Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
345+
BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
346+
TII->get(PPC::IMPLICIT_DEF), AccReg);
347+
PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
348+
PHI->getOperand(PHIOp + 1)});
349+
} else if (Opcode == PPC::PHI) {
350+
// We found a PHI operand. At this point we know this operand
351+
// has already been changed so we get its associated changed form
352+
// from the map.
353+
assert(ChangedPHIMap.count(PHIInput) == 1 &&
354+
"This PHI node should have already been changed.");
355+
MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
356+
PHIOps.push_back({MachineOperand::CreateReg(
357+
PrimedAccPHI->getOperand(0).getReg(), false),
358+
PHI->getOperand(PHIOp + 1)});
359+
}
360+
}
361+
Register AccReg = Dst;
362+
// If the PHI node we are changing is the root node, the register it defines
363+
// will be the destination register of the original copy (of the PHI def).
364+
// For all other PHI's in the list, we need to create another primed
365+
// accumulator virtual register as the PHI will no longer define the
366+
// unprimed accumulator.
367+
if (PHI != PHIs[0])
368+
AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
369+
MachineInstrBuilder NewPHI = BuildMI(
370+
*PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
371+
for (auto RegMBB : PHIOps)
372+
NewPHI.add(RegMBB.first).add(RegMBB.second);
373+
ChangedPHIMap[PHI] = NewPHI.getInstr();
374+
}
375+
}
376+
270377
// Perform peephole optimizations.
271378
bool PPCMIPeephole::simplifyCode(void) {
272379
bool Simplified = false;
@@ -321,6 +428,38 @@ bool PPCMIPeephole::simplifyCode(void) {
321428

322429
default:
323430
break;
431+
case PPC::COPY: {
432+
Register Src = MI.getOperand(1).getReg();
433+
Register Dst = MI.getOperand(0).getReg();
434+
if (!Register::isVirtualRegister(Src) ||
435+
!Register::isVirtualRegister(Dst))
436+
break;
437+
if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
438+
MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
439+
break;
440+
441+
// We are copying an unprimed accumulator to a primed accumulator.
442+
// If the input to the copy is a PHI that is fed only by (i) copies in
443+
// the other direction (ii) implicitly defined unprimed accumulators or
444+
// (iii) other PHI nodes satisfying (i) and (ii), we can change
445+
// the PHI to a PHI on primed accumulators (as long as we also change
446+
// its operands). To detect and change such copies, we first get a list
447+
// of all the PHI nodes starting from the root PHI node in BFS order.
448+
// We then visit all these PHI nodes to check if they can be changed to
449+
// primed accumulator PHI nodes and if so, we change them.
450+
MachineInstr *RootPHI = MRI->getVRegDef(Src);
451+
if (RootPHI->getOpcode() != PPC::PHI)
452+
break;
453+
454+
SmallVector<MachineInstr *, 4> PHIs;
455+
if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
456+
break;
457+
458+
convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
459+
460+
ToErase = &MI;
461+
break;
462+
}
324463
case PPC::LI:
325464
case PPC::LI8: {
326465
// If we are materializing a zero, look for any use operands for which

0 commit comments

Comments
 (0)