27
27
#include " X86Subtarget.h"
28
28
#include " llvm/ADT/ArrayRef.h"
29
29
#include " llvm/ADT/DenseMap.h"
30
+ #include " llvm/ADT/PostOrderIterator.h"
30
31
#include " llvm/ADT/STLExtras.h"
31
32
#include " llvm/ADT/ScopeExit.h"
32
33
#include " llvm/ADT/SmallPtrSet.h"
@@ -102,7 +103,7 @@ class X86FlagsCopyLoweringPass : public MachineFunctionPass {
102
103
MachineDominatorTree *MDT;
103
104
104
105
CondRegArray collectCondsInRegs (MachineBasicBlock &MBB,
105
- MachineInstr & CopyDefI);
106
+ MachineBasicBlock::iterator CopyDefI);
106
107
107
108
unsigned promoteCondToReg (MachineBasicBlock &MBB,
108
109
MachineBasicBlock::iterator TestPos,
@@ -356,9 +357,14 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
356
357
// Nothing to do for a degenerate empty function...
357
358
return false ;
358
359
360
+ // Collect the copies in RPO so that when there are chains where a copy is in
361
+ // turn copied again we visit the first one first. This ensures we can find
362
+ // viable locations for testing the original EFLAGS that dominate all the
363
+ // uses across complex CFGs.
359
364
SmallVector<MachineInstr *, 4 > Copies;
360
- for (MachineBasicBlock &MBB : MF)
361
- for (MachineInstr &MI : MBB)
365
+ ReversePostOrderTraversal<MachineFunction *> RPOT (&MF);
366
+ for (MachineBasicBlock *MBB : RPOT)
367
+ for (MachineInstr &MI : *MBB)
362
368
if (MI.getOpcode () == TargetOpcode::COPY &&
363
369
MI.getOperand (0 ).getReg () == X86::EFLAGS)
364
370
Copies.push_back (&MI);
@@ -407,12 +413,99 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
407
413
if (DOp.isDead ())
408
414
continue ;
409
415
410
- MachineBasicBlock & TestMBB = * CopyDefI.getParent ();
416
+ MachineBasicBlock * TestMBB = CopyDefI.getParent ();
411
417
auto TestPos = CopyDefI.getIterator ();
412
418
DebugLoc TestLoc = CopyDefI.getDebugLoc ();
413
419
414
420
LLVM_DEBUG (dbgs () << " Rewriting copy: " ; CopyI->dump ());
415
421
422
+ // Walk up across live-in EFLAGS to find where they were actually def'ed.
423
+ //
424
+ // This copy's def may just be part of a region of blocks covered by
425
+ // a single def of EFLAGS and we want to find the top of that region where
426
+ // possible.
427
+ //
428
+ // This is essentially a search for a *candidate* reaching definition
429
+ // location. We don't need to ever find the actual reaching definition here,
430
+ // but we want to walk up the dominator tree to find the highest point which
431
+ // would be viable for such a definition.
432
+ auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
433
+ MachineBasicBlock::iterator End) {
434
+ // Scan backwards as we expect these to be relatively short and often find
435
+ // a clobber near the end.
436
+ return llvm::any_of (
437
+ llvm::reverse (llvm::make_range (Begin, End)), [&](MachineInstr &MI) {
438
+ // Flag any instruction (other than the copy we are
439
+ // currently rewriting) that defs EFLAGS.
440
+ return &MI != CopyI && MI.findRegisterDefOperand (X86::EFLAGS);
441
+ });
442
+ };
443
+ auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
444
+ MachineBasicBlock *EndMBB) {
445
+ assert (MDT->dominates (BeginMBB, EndMBB) &&
446
+ " Only support paths down the dominator tree!" );
447
+ SmallPtrSet<MachineBasicBlock *, 4 > Visited;
448
+ SmallVector<MachineBasicBlock *, 4 > Worklist;
449
+ // We terminate at the beginning. No need to scan it.
450
+ Visited.insert (BeginMBB);
451
+ Worklist.push_back (EndMBB);
452
+ do {
453
+ auto *MBB = Worklist.pop_back_val ();
454
+ for (auto *PredMBB : MBB->predecessors ()) {
455
+ if (!Visited.insert (PredMBB).second )
456
+ continue ;
457
+ if (HasEFLAGSClobber (PredMBB->begin (), PredMBB->end ()))
458
+ return true ;
459
+ // Enqueue this block to walk its predecessors.
460
+ Worklist.push_back (PredMBB);
461
+ }
462
+ } while (!Worklist.empty ());
463
+ // No clobber found along a path from the begin to end.
464
+ return false ;
465
+ };
466
+ while (TestMBB->isLiveIn (X86::EFLAGS) && !TestMBB->pred_empty () &&
467
+ !HasEFLAGSClobber (TestMBB->begin (), TestPos)) {
468
+ // Find the nearest common dominator of the predecessors, as
469
+ // that will be the best candidate to hoist into.
470
+ MachineBasicBlock *HoistMBB =
471
+ std::accumulate (std::next (TestMBB->pred_begin ()), TestMBB->pred_end (),
472
+ *TestMBB->pred_begin (),
473
+ [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
474
+ return MDT->findNearestCommonDominator (LHS, RHS);
475
+ });
476
+
477
+ // Now we need to scan all predecessors that may be reached along paths to
478
+ // the hoist block. A clobber anywhere in any of these blocks the hoist.
479
+ // Note that this even handles loops because we require *no* clobbers.
480
+ if (HasEFLAGSClobberPath (HoistMBB, TestMBB))
481
+ break ;
482
+
483
+ // We also need the terminators to not sneakily clobber flags.
484
+ if (HasEFLAGSClobber (HoistMBB->getFirstTerminator ()->getIterator (),
485
+ HoistMBB->instr_end ()))
486
+ break ;
487
+
488
+ // We found a viable location, hoist our test position to it.
489
+ TestMBB = HoistMBB;
490
+ TestPos = TestMBB->getFirstTerminator ()->getIterator ();
491
+ // Clear the debug location as it would just be confusing after hoisting.
492
+ TestLoc = DebugLoc ();
493
+ }
494
+ LLVM_DEBUG ({
495
+ auto DefIt = llvm::find_if (
496
+ llvm::reverse (llvm::make_range (TestMBB->instr_begin (), TestPos)),
497
+ [&](MachineInstr &MI) {
498
+ return MI.findRegisterDefOperand (X86::EFLAGS);
499
+ });
500
+ if (DefIt.base () != TestMBB->instr_begin ()) {
501
+ dbgs () << " Using EFLAGS defined by: " ;
502
+ DefIt->dump ();
503
+ } else {
504
+ dbgs () << " Using live-in flags for BB:\n " ;
505
+ TestMBB->dump ();
506
+ }
507
+ });
508
+
416
509
// While rewriting uses, we buffer jumps and rewrite them in a second pass
417
510
// because doing so will perturb the CFG that we are walking to find the
418
511
// uses in the first place.
@@ -423,52 +516,47 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
423
516
// very few of them and we expect to not revisit the same copy definition
424
517
// many times. If either of those change sufficiently we could build a map
425
518
// of these up front instead.
426
- CondRegArray CondRegs = collectCondsInRegs (TestMBB, CopyDefI );
519
+ CondRegArray CondRegs = collectCondsInRegs (* TestMBB, TestPos );
427
520
428
521
// Collect the basic blocks we need to scan. Typically this will just be
429
522
// a single basic block but we may have to scan multiple blocks if the
430
523
// EFLAGS copy lives into successors.
431
524
SmallVector<MachineBasicBlock *, 2 > Blocks;
432
525
SmallPtrSet<MachineBasicBlock *, 2 > VisitedBlocks;
433
526
Blocks.push_back (&MBB);
434
- VisitedBlocks.insert (&MBB);
435
527
436
528
do {
437
529
MachineBasicBlock &UseMBB = *Blocks.pop_back_val ();
438
530
439
531
// Track when if/when we find a kill of the flags in this block.
440
532
bool FlagsKilled = false ;
441
533
442
- // We currently don't do any PHI insertion and so we require that the
443
- // test basic block dominates all of the use basic blocks.
444
- //
445
- // We could in theory do PHI insertion here if it becomes useful by just
446
- // taking undef values in along every edge that we don't trace this
447
- // EFLAGS copy along. This isn't as bad as fully general PHI insertion,
448
- // but still seems like a great deal of complexity.
449
- //
450
- // Because it is theoretically possible that some earlier MI pass or
451
- // other lowering transformation could induce this to happen, we do
452
- // a hard check even in non-debug builds here.
453
- if (&TestMBB != &UseMBB && !MDT->dominates (&TestMBB, &UseMBB)) {
454
- LLVM_DEBUG ({
455
- dbgs () << " ERROR: Encountered use that is not dominated by our test "
456
- " basic block! Rewriting this would require inserting PHI "
457
- " nodes to track the flag state across the CFG.\n\n Test "
458
- " block:\n " ;
459
- TestMBB.dump ();
460
- dbgs () << " Use block:\n " ;
461
- UseMBB.dump ();
462
- });
463
- report_fatal_error (" Cannot lower EFLAGS copy when original copy def "
464
- " does not dominate all uses." );
465
- }
466
-
467
- for (auto MII = &UseMBB == &MBB ? std::next (CopyI->getIterator ())
468
- : UseMBB.instr_begin (),
534
+ // In most cases, we walk from the beginning to the end of the block. But
535
+ // when the block is the same block as the copy is from, we will visit it
536
+ // twice. The first time we start from the copy and go to the end. The
537
+ // second time we start from the beginning and go to the copy. This lets
538
+ // us handle copies inside of cycles.
539
+ // FIXME: This loop is *super* confusing. This is at least in part
540
+ // a symptom of all of this routine needing to be refactored into
541
+ // documentable components. Once done, there may be a better way to write
542
+ // this loop.
543
+ for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count (&UseMBB))
544
+ ? std::next (CopyI->getIterator ())
545
+ : UseMBB.instr_begin (),
469
546
MIE = UseMBB.instr_end ();
470
547
MII != MIE;) {
471
548
MachineInstr &MI = *MII++;
549
+ // If we are in the original copy block and encounter either the copy
550
+ // def or the copy itself, break so that we don't re-process any part of
551
+ // the block or process the instructions in the range that was copied
552
+ // over.
553
+ if (&MI == CopyI || &MI == &CopyDefI) {
554
+ assert (&UseMBB == &MBB && VisitedBlocks.count (&MBB) &&
555
+ " Should only encounter these on the second pass over the "
556
+ " original block." );
557
+ break ;
558
+ }
559
+
472
560
MachineOperand *FlagUse = MI.findRegisterUseOperand (X86::EFLAGS);
473
561
if (!FlagUse) {
474
562
if (MI.findRegisterDefOperand (X86::EFLAGS)) {
@@ -512,10 +600,10 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
512
600
513
601
// Otherwise we can just rewrite in-place.
514
602
if (X86::getCondFromCMovOpc (MI.getOpcode ()) != X86::COND_INVALID) {
515
- rewriteCMov (TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
603
+ rewriteCMov (* TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
516
604
} else if (X86::getCondFromSETOpc (MI.getOpcode ()) !=
517
605
X86::COND_INVALID) {
518
- rewriteSetCC (TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
606
+ rewriteSetCC (* TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
519
607
} else if (MI.getOpcode () == TargetOpcode::COPY) {
520
608
rewriteCopy (MI, *FlagUse, CopyDefI);
521
609
} else {
@@ -538,13 +626,13 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
538
626
case X86::SETB_C64r:
539
627
// Use custom lowering for arithmetic that is merely extending the
540
628
// carry flag. We model this as the SETB_C* pseudo instructions.
541
- rewriteSetCarryExtended (TestMBB, TestPos, TestLoc, MI, *FlagUse,
629
+ rewriteSetCarryExtended (* TestMBB, TestPos, TestLoc, MI, *FlagUse,
542
630
CondRegs);
543
631
break ;
544
632
545
633
default :
546
634
// Generically handle remaining uses as arithmetic instructions.
547
- rewriteArithmetic (TestMBB, TestPos, TestLoc, MI, *FlagUse,
635
+ rewriteArithmetic (* TestMBB, TestPos, TestLoc, MI, *FlagUse,
548
636
CondRegs);
549
637
break ;
550
638
}
@@ -564,8 +652,38 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
564
652
// and queue those up for processing.
565
653
for (MachineBasicBlock *SuccMBB : UseMBB.successors ())
566
654
if (SuccMBB->isLiveIn (X86::EFLAGS) &&
567
- VisitedBlocks.insert (SuccMBB).second )
655
+ VisitedBlocks.insert (SuccMBB).second ) {
656
+ // We currently don't do any PHI insertion and so we require that the
657
+ // test basic block dominates all of the use basic blocks. Further, we
658
+ // can't have a cycle from the test block back to itself as that would
659
+ // create a cycle requiring a PHI to break it.
660
+ //
661
+ // We could in theory do PHI insertion here if it becomes useful by
662
+ // just taking undef values in along every edge that we don't trace
663
+ // this EFLAGS copy along. This isn't as bad as fully general PHI
664
+ // insertion, but still seems like a great deal of complexity.
665
+ //
666
+ // Because it is theoretically possible that some earlier MI pass or
667
+ // other lowering transformation could induce this to happen, we do
668
+ // a hard check even in non-debug builds here.
669
+ if (SuccMBB == TestMBB || !MDT->dominates (TestMBB, SuccMBB)) {
670
+ LLVM_DEBUG ({
671
+ dbgs ()
672
+ << " ERROR: Encountered use that is not dominated by our test "
673
+ " basic block! Rewriting this would require inserting PHI "
674
+ " nodes to track the flag state across the CFG.\n\n Test "
675
+ " block:\n " ;
676
+ TestMBB->dump ();
677
+ dbgs () << " Use block:\n " ;
678
+ SuccMBB->dump ();
679
+ });
680
+ report_fatal_error (
681
+ " Cannot lower EFLAGS copy when original copy def "
682
+ " does not dominate all uses." );
683
+ }
684
+
568
685
Blocks.push_back (SuccMBB);
686
+ }
569
687
} while (!Blocks.empty ());
570
688
571
689
// Now rewrite the jumps that use the flags. These we handle specially
@@ -580,7 +698,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
580
698
else
581
699
LastJmpMBB = JmpI->getParent ();
582
700
583
- rewriteCondJmp (TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
701
+ rewriteCondJmp (* TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
584
702
}
585
703
586
704
// FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
@@ -604,14 +722,13 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
604
722
605
723
// / Collect any conditions that have already been set in registers so that we
606
724
// / can re-use them rather than adding duplicates.
607
- CondRegArray
608
- X86FlagsCopyLoweringPass::collectCondsInRegs (MachineBasicBlock &MBB,
609
- MachineInstr &CopyDefI) {
725
+ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs (
726
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
610
727
CondRegArray CondRegs = {};
611
728
612
729
// Scan backwards across the range of instructions with live EFLAGS.
613
- for (MachineInstr &MI : llvm::reverse (
614
- llvm::make_range (MBB.instr_begin (), CopyDefI. getIterator () ))) {
730
+ for (MachineInstr &MI :
731
+ llvm::reverse ( llvm:: make_range (MBB.begin (), TestPos ))) {
615
732
X86::CondCode Cond = X86::getCondFromSETOpc (MI.getOpcode ());
616
733
if (Cond != X86::COND_INVALID && MI.getOperand (0 ).isReg () &&
617
734
TRI->isVirtualRegister (MI.getOperand (0 ).getReg ()))
0 commit comments