@@ -255,13 +255,15 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
255
255
<< " Failed to pipeline loop" ;
256
256
});
257
257
258
+ LI.LoopPipelinerInfo .reset ();
258
259
return Changed;
259
260
}
260
261
261
262
++NumTrytoPipeline;
262
263
263
264
Changed = swingModuloScheduler (L);
264
265
266
+ LI.LoopPipelinerInfo .reset ();
265
267
return Changed;
266
268
}
267
269
@@ -354,7 +356,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
354
356
355
357
LI.LoopInductionVar = nullptr ;
356
358
LI.LoopCompare = nullptr ;
357
- if (!TII->analyzeLoopForPipelining (L.getTopBlock ())) {
359
+ LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining (L.getTopBlock ());
360
+ if (!LI.LoopPipelinerInfo ) {
358
361
LLVM_DEBUG (dbgs () << " Unable to analyzeLoop, can NOT pipeline Loop\n " );
359
362
NumFailLoop++;
360
363
ORE->emit ([&]() {
@@ -419,7 +422,7 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
419
422
assert (L.getBlocks ().size () == 1 && " SMS works on single blocks only." );
420
423
421
424
SwingSchedulerDAG SMS (*this , L, getAnalysis<LiveIntervals>(), RegClassInfo,
422
- II_setByPragma);
425
+ II_setByPragma, LI. LoopPipelinerInfo . get () );
423
426
424
427
MachineBasicBlock *MBB = L.getHeader ();
425
428
// The kernel should not include any terminator instructions. These
@@ -1422,7 +1425,7 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
1422
1425
// / We ignore the back-edge recurrence in order to avoid unbounded recursion
1423
1426
// / in the calculation of the ASAP, ALAP, etc functions.
1424
1427
static bool ignoreDependence (const SDep &D, bool isPred) {
1425
- if (D.isArtificial ())
1428
+ if (D.isArtificial () || D. getSUnit ()-> isBoundaryNode () )
1426
1429
return true ;
1427
1430
return D.getKind () == SDep::Anti && isPred;
1428
1431
}
@@ -1471,6 +1474,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1471
1474
SUnit *SU = &SUnits[I];
1472
1475
for (const SDep &S : SU->Succs ) {
1473
1476
SUnit *succ = S.getSUnit ();
1477
+ if (succ->isBoundaryNode ())
1478
+ continue ;
1474
1479
if (S.getLatency () == 0 )
1475
1480
zeroLatencyHeight =
1476
1481
std::max (zeroLatencyHeight, getZeroLatencyHeight (succ) + 1 );
@@ -1788,7 +1793,8 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1788
1793
NodesAdded.insert (SU);
1789
1794
for (auto &SI : SU->Succs ) {
1790
1795
SUnit *Successor = SI.getSUnit ();
1791
- if (!SI.isArtificial () && NodesAdded.count (Successor) == 0 )
1796
+ if (!SI.isArtificial () && !Successor->isBoundaryNode () &&
1797
+ NodesAdded.count (Successor) == 0 )
1792
1798
addConnectedNodes (Successor, NewSet, NodesAdded);
1793
1799
}
1794
1800
for (auto &PI : SU->Preds ) {
@@ -2080,6 +2086,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2080
2086
});
2081
2087
} while (++NI != NE && scheduleFound);
2082
2088
2089
+ // If a schedule is found, ensure non-pipelined instructions are in stage 0
2090
+ if (scheduleFound)
2091
+ scheduleFound =
2092
+ Schedule.normalizeNonPipelinedInstructions (this , LoopPipelinerInfo);
2093
+
2083
2094
// If a schedule is found, check if it is a valid schedule too.
2084
2095
if (scheduleFound)
2085
2096
scheduleFound = Schedule.isValidSchedule (this );
@@ -2263,7 +2274,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2263
2274
bool SwingSchedulerDAG::isLoopCarriedDep (SUnit *Source, const SDep &Dep,
2264
2275
bool isSucc) {
2265
2276
if ((Dep.getKind () != SDep::Order && Dep.getKind () != SDep::Output) ||
2266
- Dep.isArtificial ())
2277
+ Dep.isArtificial () || Dep. getSUnit ()-> isBoundaryNode () )
2267
2278
return false ;
2268
2279
2269
2280
if (!SwpPruneLoopCarried)
@@ -2430,7 +2441,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {
2430
2441
while (!Worklist.empty ()) {
2431
2442
const SDep &Cur = Worklist.pop_back_val ();
2432
2443
SUnit *SuccSU = Cur.getSUnit ();
2433
- if (Visited.count (SuccSU))
2444
+ if (Visited.count (SuccSU) || SuccSU-> isBoundaryNode () )
2434
2445
continue ;
2435
2446
std::map<SUnit *, int >::const_iterator it = InstrToCycle.find (SuccSU);
2436
2447
if (it == InstrToCycle.end ())
@@ -2697,21 +2708,91 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
2697
2708
return false ;
2698
2709
}
2699
2710
2711
+ // / Determine transitive dependences of unpipelineable instructions
2712
+ SmallSet<SUnit *, 8 > SMSchedule::computeUnpipelineableNodes (
2713
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
2714
+ SmallSet<SUnit *, 8 > DoNotPipeline;
2715
+ SmallVector<SUnit *, 8 > Worklist;
2716
+
2717
+ for (auto &SU : SSD->SUnits )
2718
+ if (SU.isInstr () && PLI->shouldIgnoreForPipelining (SU.getInstr ()))
2719
+ Worklist.push_back (&SU);
2720
+
2721
+ while (!Worklist.empty ()) {
2722
+ auto SU = Worklist.pop_back_val ();
2723
+ if (DoNotPipeline.count (SU))
2724
+ continue ;
2725
+ LLVM_DEBUG (dbgs () << " Do not pipeline SU(" << SU->NodeNum << " )\n " );
2726
+ DoNotPipeline.insert (SU);
2727
+ for (auto &Dep : SU->Preds )
2728
+ Worklist.push_back (Dep.getSUnit ());
2729
+ if (SU->getInstr ()->isPHI ())
2730
+ for (auto &Dep : SU->Succs )
2731
+ if (Dep.getKind () == SDep::Anti)
2732
+ Worklist.push_back (Dep.getSUnit ());
2733
+ }
2734
+ return DoNotPipeline;
2735
+ }
2736
+
2737
+ // Determine all instructions upon which any unpipelineable instruction depends
2738
+ // and ensure that they are in stage 0. If unable to do so, return false.
2739
+ bool SMSchedule::normalizeNonPipelinedInstructions (
2740
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
2741
+ SmallSet<SUnit *, 8 > DNP = computeUnpipelineableNodes (SSD, PLI);
2742
+
2743
+ int NewLastCycle = INT_MIN;
2744
+ for (SUnit &SU : SSD->SUnits ) {
2745
+ if (!SU.isInstr ())
2746
+ continue ;
2747
+ if (!DNP.contains (&SU) || stageScheduled (&SU) == 0 ) {
2748
+ NewLastCycle = std::max (NewLastCycle, InstrToCycle[&SU]);
2749
+ continue ;
2750
+ }
2751
+
2752
+ // Put the non-pipelined instruction as early as possible in the schedule
2753
+ int NewCycle = getFirstCycle ();
2754
+ for (auto &Dep : SU.Preds )
2755
+ NewCycle = std::max (InstrToCycle[Dep.getSUnit ()], NewCycle);
2756
+
2757
+ int OldCycle = InstrToCycle[&SU];
2758
+ if (OldCycle != NewCycle) {
2759
+ InstrToCycle[&SU] = NewCycle;
2760
+ auto &OldS = getInstructions (OldCycle);
2761
+ OldS.erase (std::remove (OldS.begin (), OldS.end (), &SU), OldS.end ());
2762
+ getInstructions (NewCycle).emplace_back (&SU);
2763
+ LLVM_DEBUG (dbgs () << " SU(" << SU.NodeNum
2764
+ << " ) is not pipelined; moving from cycle " << OldCycle
2765
+ << " to " << NewCycle << " Instr:" << *SU.getInstr ());
2766
+ }
2767
+ NewLastCycle = std::max (NewLastCycle, NewCycle);
2768
+ }
2769
+ LastCycle = NewLastCycle;
2770
+ return true ;
2771
+ }
2772
+
2700
2773
// Check if the generated schedule is valid. This function checks if
2701
2774
// an instruction that uses a physical register is scheduled in a
2702
2775
// different stage than the definition. The pipeliner does not handle
2703
2776
// physical register values that may cross a basic block boundary.
2777
+ // Furthermore, if a physical def/use pair is assigned to the same
2778
+ // cycle, orderDependence does not guarantee def/use ordering, so that
2779
+ // case should be considered invalid. (The test checks for both
2780
+ // earlier and same-cycle use to be more robust.)
2704
2781
bool SMSchedule::isValidSchedule (SwingSchedulerDAG *SSD) {
2705
2782
for (SUnit &SU : SSD->SUnits ) {
2706
2783
if (!SU.hasPhysRegDefs )
2707
2784
continue ;
2708
2785
int StageDef = stageScheduled (&SU);
2786
+ int CycleDef = InstrToCycle[&SU];
2709
2787
assert (StageDef != -1 && " Instruction should have been scheduled." );
2710
2788
for (auto &SI : SU.Succs )
2711
- if (SI.isAssignedRegDep ())
2712
- if (Register::isPhysicalRegister (SI.getReg ()))
2789
+ if (SI.isAssignedRegDep () && !SI. getSUnit ()-> isBoundaryNode () )
2790
+ if (Register::isPhysicalRegister (SI.getReg ())) {
2713
2791
if (stageScheduled (SI.getSUnit ()) != StageDef)
2714
2792
return false ;
2793
+ if (InstrToCycle[SI.getSUnit ()] <= CycleDef)
2794
+ return false ;
2795
+ }
2715
2796
}
2716
2797
return true ;
2717
2798
}
0 commit comments