@@ -2523,9 +2523,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2523
2523
return scheduleFound && Schedule.getMaxStageCount () > 0 ;
2524
2524
}
2525
2525
2526
+ static Register findUniqueOperandDefinedInLoop (const MachineInstr &MI) {
2527
+ const MachineRegisterInfo &MRI = MI.getParent ()->getParent ()->getRegInfo ();
2528
+ Register Result;
2529
+ for (const MachineOperand &Use : MI.all_uses ()) {
2530
+ Register Reg = Use.getReg ();
2531
+ if (!Reg.isVirtual ())
2532
+ return Register ();
2533
+ if (MRI.getVRegDef (Reg)->getParent () != MI.getParent ())
2534
+ continue ;
2535
+ if (Result)
2536
+ return Register ();
2537
+ Result = Reg;
2538
+ }
2539
+ return Result;
2540
+ }
2541
+
2542
+ // / When Op is a value that is incremented recursively in a loop and there is a
2543
+ // / unique instruction that increments it, returns true and sets Value.
2544
+ static bool findLoopIncrementValue (const MachineOperand &Op, int &Value) {
2545
+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
2546
+ return false ;
2547
+
2548
+ Register OrgReg = Op.getReg ();
2549
+ Register CurReg = OrgReg;
2550
+ const MachineBasicBlock *LoopBB = Op.getParent ()->getParent ();
2551
+ const MachineRegisterInfo &MRI = LoopBB->getParent ()->getRegInfo ();
2552
+
2553
+ const TargetInstrInfo *TII =
2554
+ LoopBB->getParent ()->getSubtarget ().getInstrInfo ();
2555
+ const TargetRegisterInfo *TRI =
2556
+ LoopBB->getParent ()->getSubtarget ().getRegisterInfo ();
2557
+
2558
+ MachineInstr *Phi = nullptr ;
2559
+ MachineInstr *Increment = nullptr ;
2560
+
2561
+ // Traverse definitions until it reaches Op or an instruction that does not
2562
+ // satisfy the condition.
2563
+ // Acceptable example:
2564
+ // bb.0:
2565
+ // %0 = PHI %3, %bb.0, ...
2566
+ // %2 = ADD %0, Value
2567
+ // ... = LOAD %2(Op)
2568
+ // %3 = COPY %2
2569
+ while (true ) {
2570
+ if (!CurReg.isValid () || !CurReg.isVirtual ())
2571
+ return false ;
2572
+ MachineInstr *Def = MRI.getVRegDef (CurReg);
2573
+ if (Def->getParent () != LoopBB)
2574
+ return false ;
2575
+
2576
+ if (Def->isCopy ()) {
2577
+ // Ignore copy instructions unless they contain subregisters
2578
+ if (Def->getOperand (0 ).getSubReg () || Def->getOperand (1 ).getSubReg ())
2579
+ return false ;
2580
+ CurReg = Def->getOperand (1 ).getReg ();
2581
+ } else if (Def->isPHI ()) {
2582
+ // There must be just one Phi
2583
+ if (Phi)
2584
+ return false ;
2585
+ Phi = Def;
2586
+ CurReg = getLoopPhiReg (*Def, LoopBB);
2587
+ } else if (TII->getIncrementValue (*Def, Value)) {
2588
+ // Potentially a unique increment
2589
+ if (Increment)
2590
+ // Multiple increments exist
2591
+ return false ;
2592
+
2593
+ const MachineOperand *BaseOp;
2594
+ int64_t Offset;
2595
+ bool OffsetIsScalable;
2596
+ if (TII->getMemOperandWithOffset (*Def, BaseOp, Offset, OffsetIsScalable,
2597
+ TRI)) {
2598
+ // Pre/post increment instruction
2599
+ CurReg = BaseOp->getReg ();
2600
+ } else {
2601
+ // If only one of the operands is defined within the loop, it is assumed
2602
+ // to be an incremented value.
2603
+ CurReg = findUniqueOperandDefinedInLoop (*Def);
2604
+ if (!CurReg.isValid ())
2605
+ return false ;
2606
+ }
2607
+ Increment = Def;
2608
+ } else {
2609
+ return false ;
2610
+ }
2611
+ if (CurReg == OrgReg)
2612
+ break ;
2613
+ }
2614
+
2615
+ if (!Phi || !Increment)
2616
+ return false ;
2617
+
2618
+ return true ;
2619
+ }
2620
+
2526
2621
// / Return true if we can compute the amount the instruction changes
2527
2622
// / during each iteration. Set Delta to the amount of the change.
2528
- bool SwingSchedulerDAG::computeDelta (MachineInstr &MI, unsigned &Delta) const {
2623
+ bool SwingSchedulerDAG::computeDelta (const MachineInstr &MI, int &Delta) const {
2529
2624
const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2530
2625
const MachineOperand *BaseOp;
2531
2626
int64_t Offset;
@@ -2540,24 +2635,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {
2540
2635
if (!BaseOp->isReg ())
2541
2636
return false ;
2542
2637
2543
- Register BaseReg = BaseOp->getReg ();
2544
-
2545
- MachineRegisterInfo &MRI = MF.getRegInfo ();
2546
- // Check if there is a Phi. If so, get the definition in the loop.
2547
- MachineInstr *BaseDef = MRI.getVRegDef (BaseReg);
2548
- if (BaseDef && BaseDef->isPHI ()) {
2549
- BaseReg = getLoopPhiReg (*BaseDef, MI.getParent ());
2550
- BaseDef = MRI.getVRegDef (BaseReg);
2551
- }
2552
- if (!BaseDef)
2553
- return false ;
2554
-
2555
- int D = 0 ;
2556
- if (!TII->getIncrementValue (*BaseDef, D) && D >= 0 )
2557
- return false ;
2558
-
2559
- Delta = D;
2560
- return true ;
2638
+ return findLoopIncrementValue (*BaseOp, Delta);
2561
2639
}
2562
2640
2563
2641
// / Check if we can change the instruction to use an offset value from the
@@ -2675,6 +2753,96 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2675
2753
return Def;
2676
2754
}
2677
2755
2756
+ // / Return false if there is no overlap between the region accessed by BaseMI in
2757
+ // / an iteration and the region accessed by OtherMI in subsequent iterations.
2758
+ bool SwingSchedulerDAG::mayOverlapInLaterIter (
2759
+ const MachineInstr *BaseMI, const MachineInstr *OtherMI) const {
2760
+ int DeltaB, DeltaO, Delta;
2761
+ if (!computeDelta (*BaseMI, DeltaB) || !computeDelta (*OtherMI, DeltaO) ||
2762
+ DeltaB != DeltaO)
2763
+ return true ;
2764
+ Delta = DeltaB;
2765
+
2766
+ const MachineOperand *BaseOpB, *BaseOpO;
2767
+ int64_t OffsetB, OffsetO;
2768
+ bool OffsetBIsScalable, OffsetOIsScalable;
2769
+ const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2770
+ if (!TII->getMemOperandWithOffset (*BaseMI, BaseOpB, OffsetB,
2771
+ OffsetBIsScalable, TRI) ||
2772
+ !TII->getMemOperandWithOffset (*OtherMI, BaseOpO, OffsetO,
2773
+ OffsetOIsScalable, TRI))
2774
+ return true ;
2775
+
2776
+ if (OffsetBIsScalable || OffsetOIsScalable)
2777
+ return true ;
2778
+
2779
+ if (!BaseOpB->isIdenticalTo (*BaseOpO)) {
2780
+ // Pass cases with different base operands but same initial values.
2781
+ // Typically for when pre/post increment is used.
2782
+
2783
+ if (!BaseOpB->isReg () || !BaseOpO->isReg ())
2784
+ return true ;
2785
+ Register RegB = BaseOpB->getReg (), RegO = BaseOpO->getReg ();
2786
+ if (!RegB.isVirtual () || !RegO.isVirtual ())
2787
+ return true ;
2788
+
2789
+ MachineInstr *DefB = MRI.getVRegDef (BaseOpB->getReg ());
2790
+ MachineInstr *DefO = MRI.getVRegDef (BaseOpO->getReg ());
2791
+ if (!DefB || !DefO || !DefB->isPHI () || !DefO->isPHI ())
2792
+ return true ;
2793
+
2794
+ unsigned InitValB = 0 ;
2795
+ unsigned LoopValB = 0 ;
2796
+ unsigned InitValO = 0 ;
2797
+ unsigned LoopValO = 0 ;
2798
+ getPhiRegs (*DefB, BB, InitValB, LoopValB);
2799
+ getPhiRegs (*DefO, BB, InitValO, LoopValO);
2800
+ MachineInstr *InitDefB = MRI.getVRegDef (InitValB);
2801
+ MachineInstr *InitDefO = MRI.getVRegDef (InitValO);
2802
+
2803
+ if (!InitDefB->isIdenticalTo (*InitDefO))
2804
+ return true ;
2805
+ }
2806
+
2807
+ LocationSize AccessSizeB = (*BaseMI->memoperands_begin ())->getSize ();
2808
+ LocationSize AccessSizeO = (*OtherMI->memoperands_begin ())->getSize ();
2809
+
2810
+ // This is the main test, which checks the offset values and the loop
2811
+ // increment value to determine if the accesses may be loop carried.
2812
+ if (!AccessSizeB.hasValue () || !AccessSizeO.hasValue ())
2813
+ return true ;
2814
+
2815
+ LLVM_DEBUG ({
2816
+ dbgs () << " Overlap check:\n " ;
2817
+ dbgs () << " BaseMI: " ;
2818
+ BaseMI->dump ();
2819
+ dbgs () << " Base + " << OffsetB << " + I * " << Delta
2820
+ << " , Len: " << AccessSizeB.getValue () << " \n " ;
2821
+ dbgs () << " OtherMI: " ;
2822
+ OtherMI->dump ();
2823
+ dbgs () << " Base + " << OffsetO << " + I * " << Delta
2824
+ << " , Len: " << AccessSizeO.getValue () << " \n " ;
2825
+ });
2826
+
2827
+ if (Delta < 0 ) {
2828
+ int64_t BaseMinAddr = OffsetB;
2829
+ int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue () - 1 ;
2830
+ if (BaseMinAddr > OhterNextIterMaxAddr) {
2831
+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2832
+ return false ;
2833
+ }
2834
+ } else {
2835
+ int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue () - 1 ;
2836
+ int64_t OtherNextIterMinAddr = OffsetO + Delta;
2837
+ if (BaseMaxAddr < OtherNextIterMinAddr) {
2838
+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2839
+ return false ;
2840
+ }
2841
+ }
2842
+ LLVM_DEBUG (dbgs () << " Result: Overlap\n " );
2843
+ return true ;
2844
+ }
2845
+
2678
2846
// / Return true for an order or output dependence that is loop carried
2679
2847
// / potentially. A dependence is loop carried if the destination defines a value
2680
2848
// / that may be used or defined by the source in a subsequent iteration.
@@ -2706,61 +2874,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(
2706
2874
// The conservative assumption is that a dependence between memory operations
2707
2875
// may be loop carried. The following code checks when it can be proved that
2708
2876
// there is no loop carried dependence.
2709
- unsigned DeltaS, DeltaD;
2710
- if (!computeDelta (*SI, DeltaS) || !computeDelta (*DI, DeltaD))
2711
- return true ;
2712
-
2713
- const MachineOperand *BaseOpS, *BaseOpD;
2714
- int64_t OffsetS, OffsetD;
2715
- bool OffsetSIsScalable, OffsetDIsScalable;
2716
- const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2717
- if (!TII->getMemOperandWithOffset (*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2718
- TRI) ||
2719
- !TII->getMemOperandWithOffset (*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2720
- TRI))
2721
- return true ;
2722
-
2723
- assert (!OffsetSIsScalable && !OffsetDIsScalable &&
2724
- " Expected offsets to be byte offsets" );
2725
-
2726
- MachineInstr *DefS = MRI.getVRegDef (BaseOpS->getReg ());
2727
- MachineInstr *DefD = MRI.getVRegDef (BaseOpD->getReg ());
2728
- if (!DefS || !DefD || !DefS->isPHI () || !DefD->isPHI ())
2729
- return true ;
2730
-
2731
- unsigned InitValS = 0 ;
2732
- unsigned LoopValS = 0 ;
2733
- unsigned InitValD = 0 ;
2734
- unsigned LoopValD = 0 ;
2735
- getPhiRegs (*DefS, BB, InitValS, LoopValS);
2736
- getPhiRegs (*DefD, BB, InitValD, LoopValD);
2737
- MachineInstr *InitDefS = MRI.getVRegDef (InitValS);
2738
- MachineInstr *InitDefD = MRI.getVRegDef (InitValD);
2739
-
2740
- if (!InitDefS->isIdenticalTo (*InitDefD))
2741
- return true ;
2742
-
2743
- // Check that the base register is incremented by a constant value for each
2744
- // iteration.
2745
- MachineInstr *LoopDefS = MRI.getVRegDef (LoopValS);
2746
- int D = 0 ;
2747
- if (!LoopDefS || !TII->getIncrementValue (*LoopDefS, D))
2748
- return true ;
2749
-
2750
- LocationSize AccessSizeS = (*SI->memoperands_begin ())->getSize ();
2751
- LocationSize AccessSizeD = (*DI->memoperands_begin ())->getSize ();
2752
-
2753
- // This is the main test, which checks the offset values and the loop
2754
- // increment value to determine if the accesses may be loop carried.
2755
- if (!AccessSizeS.hasValue () || !AccessSizeD.hasValue ())
2756
- return true ;
2757
-
2758
- if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue () ||
2759
- DeltaD < AccessSizeD.getValue ())
2760
- return true ;
2761
-
2762
- return (OffsetS + (int64_t )AccessSizeS.getValue () <
2763
- OffsetD + (int64_t )AccessSizeD.getValue ());
2877
+ return mayOverlapInLaterIter (DI, SI);
2764
2878
}
2765
2879
2766
2880
void SwingSchedulerDAG::postProcessDAG () {
0 commit comments