@@ -267,6 +267,113 @@ void PPCMIPeephole::UpdateTOCSaves(
267
267
TOCSaves[MI] = Keep;
268
268
}
269
269
270
+ // This function returns a list of all PHI nodes in the tree starting from
271
+ // the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
272
+ // The list initially only contains the root PHI. When we visit a PHI node, we
273
+ // add it to the list. We continue to look for other PHI node operands while
274
+ // there are nodes to visit in the list. The function returns false if the
275
+ // optimization cannot be applied on this tree.
276
+ static bool collectUnprimedAccPHIs (MachineRegisterInfo *MRI,
277
+ MachineInstr *RootPHI,
278
+ SmallVectorImpl<MachineInstr *> &PHIs) {
279
+ PHIs.push_back (RootPHI);
280
+ unsigned VisitedIndex = 0 ;
281
+ while (VisitedIndex < PHIs.size ()) {
282
+ MachineInstr *VisitedPHI = PHIs[VisitedIndex];
283
+ for (unsigned PHIOp = 1 , NumOps = VisitedPHI->getNumOperands ();
284
+ PHIOp != NumOps; PHIOp += 2 ) {
285
+ Register RegOp = VisitedPHI->getOperand (PHIOp).getReg ();
286
+ if (!Register::isVirtualRegister (RegOp))
287
+ return false ;
288
+ MachineInstr *Instr = MRI->getVRegDef (RegOp);
289
+ // While collecting the PHI nodes, we check if they can be converted (i.e.
290
+ // all the operands are either copies, implicit defs or PHI nodes).
291
+ unsigned Opcode = Instr->getOpcode ();
292
+ if (Opcode == PPC::COPY) {
293
+ Register Reg = Instr->getOperand (1 ).getReg ();
294
+ if (!Register::isVirtualRegister (Reg) ||
295
+ MRI->getRegClass (Reg) != &PPC::ACCRCRegClass)
296
+ return false ;
297
+ } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
298
+ return false ;
299
+ // If we detect a cycle in the PHI nodes, we exit. It would be
300
+ // possible to change cycles as well, but that would add a lot
301
+ // of complexity for a case that is unlikely to occur with MMA
302
+ // code.
303
+ if (Opcode != PPC::PHI)
304
+ continue ;
305
+ if (std::find (PHIs.begin (), PHIs.end (), Instr) != PHIs.end ())
306
+ return false ;
307
+ PHIs.push_back (Instr);
308
+ }
309
+ VisitedIndex++;
310
+ }
311
+ return true ;
312
+ }
313
+
314
+ // This function changes the unprimed accumulator PHI nodes in the PHIs list to
315
+ // primed accumulator PHI nodes. The list is traversed in reverse order to
316
+ // change all the PHI operands of a PHI node before changing the node itself.
317
+ // We keep a map to associate each changed PHI node to its non-changed form.
318
+ static void convertUnprimedAccPHIs (const PPCInstrInfo *TII,
319
+ MachineRegisterInfo *MRI,
320
+ SmallVectorImpl<MachineInstr *> &PHIs,
321
+ Register Dst) {
322
+ DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
323
+ for (auto It = PHIs.rbegin (), End = PHIs.rend (); It != End; ++It) {
324
+ MachineInstr *PHI = *It;
325
+ SmallVector<std::pair<MachineOperand, MachineOperand>, 4 > PHIOps;
326
+ // We check if the current PHI node can be changed by looking at its
327
+ // operands. If all the operands are either copies from primed
328
+ // accumulators, implicit definitions or other unprimed accumulator
329
+ // PHI nodes, we change it.
330
+ for (unsigned PHIOp = 1 , NumOps = PHI->getNumOperands (); PHIOp != NumOps;
331
+ PHIOp += 2 ) {
332
+ Register RegOp = PHI->getOperand (PHIOp).getReg ();
333
+ MachineInstr *PHIInput = MRI->getVRegDef (RegOp);
334
+ unsigned Opcode = PHIInput->getOpcode ();
335
+ assert ((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
336
+ Opcode == PPC::PHI) &&
337
+ " Unexpected instruction" );
338
+ if (Opcode == PPC::COPY) {
339
+ assert (MRI->getRegClass (PHIInput->getOperand (1 ).getReg ()) ==
340
+ &PPC::ACCRCRegClass &&
341
+ " Unexpected register class" );
342
+ PHIOps.push_back ({PHIInput->getOperand (1 ), PHI->getOperand (PHIOp + 1 )});
343
+ } else if (Opcode == PPC::IMPLICIT_DEF) {
344
+ Register AccReg = MRI->createVirtualRegister (&PPC::ACCRCRegClass);
345
+ BuildMI (*PHIInput->getParent (), PHIInput, PHIInput->getDebugLoc (),
346
+ TII->get (PPC::IMPLICIT_DEF), AccReg);
347
+ PHIOps.push_back ({MachineOperand::CreateReg (AccReg, false ),
348
+ PHI->getOperand (PHIOp + 1 )});
349
+ } else if (Opcode == PPC::PHI) {
350
+ // We found a PHI operand. At this point we know this operand
351
+ // has already been changed so we get its associated changed form
352
+ // from the map.
353
+ assert (ChangedPHIMap.count (PHIInput) == 1 &&
354
+ " This PHI node should have already been changed." );
355
+ MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup (PHIInput);
356
+ PHIOps.push_back ({MachineOperand::CreateReg (
357
+ PrimedAccPHI->getOperand (0 ).getReg (), false ),
358
+ PHI->getOperand (PHIOp + 1 )});
359
+ }
360
+ }
361
+ Register AccReg = Dst;
362
+ // If the PHI node we are changing is the root node, the register it defines
363
+ // will be the destination register of the original copy (of the PHI def).
364
+ // For all other PHI's in the list, we need to create another primed
365
+ // accumulator virtual register as the PHI will no longer define the
366
+ // unprimed accumulator.
367
+ if (PHI != PHIs[0 ])
368
+ AccReg = MRI->createVirtualRegister (&PPC::ACCRCRegClass);
369
+ MachineInstrBuilder NewPHI = BuildMI (
370
+ *PHI->getParent (), PHI, PHI->getDebugLoc (), TII->get (PPC::PHI), AccReg);
371
+ for (auto RegMBB : PHIOps)
372
+ NewPHI.add (RegMBB.first ).add (RegMBB.second );
373
+ ChangedPHIMap[PHI] = NewPHI.getInstr ();
374
+ }
375
+ }
376
+
270
377
// Perform peephole optimizations.
271
378
bool PPCMIPeephole::simplifyCode (void ) {
272
379
bool Simplified = false ;
@@ -321,6 +428,38 @@ bool PPCMIPeephole::simplifyCode(void) {
321
428
322
429
default :
323
430
break ;
431
+ case PPC::COPY: {
432
+ Register Src = MI.getOperand (1 ).getReg ();
433
+ Register Dst = MI.getOperand (0 ).getReg ();
434
+ if (!Register::isVirtualRegister (Src) ||
435
+ !Register::isVirtualRegister (Dst))
436
+ break ;
437
+ if (MRI->getRegClass (Src) != &PPC::UACCRCRegClass ||
438
+ MRI->getRegClass (Dst) != &PPC::ACCRCRegClass)
439
+ break ;
440
+
441
+ // We are copying an unprimed accumulator to a primed accumulator.
442
+ // If the input to the copy is a PHI that is fed only by (i) copies in
443
+ // the other direction (ii) implicitly defined unprimed accumulators or
444
+ // (iii) other PHI nodes satisfying (i) and (ii), we can change
445
+ // the PHI to a PHI on primed accumulators (as long as we also change
446
+ // its operands). To detect and change such copies, we first get a list
447
+ // of all the PHI nodes starting from the root PHI node in BFS order.
448
+ // We then visit all these PHI nodes to check if they can be changed to
449
+ // primed accumulator PHI nodes and if so, we change them.
450
+ MachineInstr *RootPHI = MRI->getVRegDef (Src);
451
+ if (RootPHI->getOpcode () != PPC::PHI)
452
+ break ;
453
+
454
+ SmallVector<MachineInstr *, 4 > PHIs;
455
+ if (!collectUnprimedAccPHIs (MRI, RootPHI, PHIs))
456
+ break ;
457
+
458
+ convertUnprimedAccPHIs (TII, MRI, PHIs, Dst);
459
+
460
+ ToErase = &MI;
461
+ break ;
462
+ }
324
463
case PPC::LI:
325
464
case PPC::LI8: {
326
465
// If we are materializing a zero, look for any use operands for which
0 commit comments