@@ -1309,7 +1309,9 @@ DDD::DDD(G4_BB *bb, const LatencyTable <, G4_Kernel *k, PointsToAnalysis &p)
1309
1309
TOTAL_BUCKETS = OTHER_ARF_BUCKET + 1 ;
1310
1310
1311
1311
LiveBuckets LB (this , GRF_BUCKET, TOTAL_BUCKETS);
1312
-
1312
+ for (int i = 0 ; i < PIPE_ALL; i++) {
1313
+ latestInstOfEachPipe[i] = nullptr ;
1314
+ }
1313
1315
// Building the graph in reverse relative to the original instruction
1314
1316
// order, to naturally take care of the liveness of operands.
1315
1317
std::list<G4_INST *>::reverse_iterator iInst (bb->rbegin ()),
@@ -2497,6 +2499,34 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
2497
2499
}
2498
2500
};
2499
2501
2502
+ auto getStepCycle = [&](Node *n, uint32_t currCycle) -> uint32_t {
2503
+ uint32_t stepCycle = 1 ;
2504
+ for (unsigned i = PIPE_INT; i < PIPE_ALL; i++) {
2505
+ if (latestInstOfEachPipe[i] == nullptr ) {
2506
+ continue ;
2507
+ }
2508
+
2509
+ if ((latestInstOfEachPipe[i]->schedTime +
2510
+ latestInstOfEachPipe[i]->getOccupancy ()) <= currCycle) {
2511
+ latestInstOfEachPipe[i] = nullptr ;
2512
+ continue ;
2513
+ }
2514
+
2515
+ if (i == n->instPipe ) {
2516
+ stepCycle = std::max (
2517
+ stepCycle, latestInstOfEachPipe[i]->schedTime +
2518
+ latestInstOfEachPipe[i]->getOccupancy () - currCycle);
2519
+ } else {
2520
+ if (latestInstOfEachPipe[i]->schedTime + 1 > currCycle) {
2521
+ stepCycle = std::max (stepCycle, latestInstOfEachPipe[i]->schedTime +
2522
+ 1 - currCycle);
2523
+ }
2524
+ }
2525
+ }
2526
+
2527
+ return stepCycle;
2528
+ };
2529
+
2500
2530
auto updateForScheduled = [&](Node *scheduled) {
2501
2531
// Append the scheduled node to the end of the schedule.
2502
2532
schedule->scheduledNodes .push_back (scheduled);
@@ -2519,9 +2549,15 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
2519
2549
2520
2550
2521
2551
updateForSucc (scheduled);
2552
+ if (getOptions ()->getOption (vISA_multiplePipeSched)) {
2553
+ // Increment the scheduler's clock after each scheduled node
2554
+ currCycle += getStepCycle (scheduled, currCycle);
2522
2555
2523
- // Increment the scheduler's clock after each scheduled node
2524
- currCycle += scheduled->getOccupancy ();
2556
+ latestInstOfEachPipe[scheduled->instPipe ] = scheduled;
2557
+ } else {
2558
+ // Increment the scheduler's clock after each scheduled node
2559
+ currCycle += scheduled->getOccupancy ();
2560
+ }
2525
2561
};
2526
2562
2527
2563
auto scheduleForSuppression = [&]() -> bool {
@@ -2596,6 +2632,32 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
2596
2632
return scheduled;
2597
2633
};
2598
2634
2635
+ auto getHigestOccupancyStallPipe = [&](uint32_t currCycle) -> SB_INST_PIPE {
2636
+ SB_INST_PIPE pipe = PIPE_NONE;
2637
+ uint32_t mostOccupancyStallCycle = 0 ;
2638
+ for (unsigned i = PIPE_INT; i < PIPE_ALL; i++) {
2639
+ if (latestInstOfEachPipe[i] == nullptr ) {
2640
+ continue ;
2641
+ }
2642
+
2643
+ if ((latestInstOfEachPipe[i]->schedTime +
2644
+ latestInstOfEachPipe[i]->getOccupancy ()) <= currCycle) {
2645
+ latestInstOfEachPipe[i] = nullptr ;
2646
+ continue ;
2647
+ }
2648
+
2649
+ if (latestInstOfEachPipe[i]->schedTime +
2650
+ latestInstOfEachPipe[i]->getOccupancy () >
2651
+ mostOccupancyStallCycle) {
2652
+ pipe = (SB_INST_PIPE)i;
2653
+ mostOccupancyStallCycle = latestInstOfEachPipe[i]->schedTime +
2654
+ latestInstOfEachPipe[i]->getOccupancy ();
2655
+ }
2656
+ }
2657
+
2658
+ return pipe;
2659
+ };
2660
+
2599
2661
// Try to avoid b2b math if possible as there are pipeline stalls.
2600
2662
auto scheduleForB2BMathReduction = [&](Node *scheduled) -> bool {
2601
2663
return !readyList.empty () && lastScheduled &&
@@ -2604,7 +2666,7 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
2604
2666
};
2605
2667
2606
2668
auto applyB2BMathReductionHeuristic = [&](Node *scheduled,
2607
- Node *lastScheduled) -> Node * {
2669
+ Node *lastScheduled) -> Node * {
2608
2670
// pick another node on the ready list if it's not math and won't cause
2609
2671
// a longer stall to save compile time we currently limit search size to 2
2610
2672
std::vector<Node *> popped;
@@ -2629,6 +2691,28 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
2629
2691
return scheduled;
2630
2692
};
2631
2693
2694
+ auto applyMultiplePipelineHeuristic = [&](Node *scheduled, SB_INST_PIPE stallPipe) -> Node * {
2695
+ // pick another node on the ready list if it's not math and won't cause
2696
+ // a longer stall to save compile time we currently limit search size to 2
2697
+ std::vector<Node *> popped;
2698
+ for (size_t i = 0 ; i < readyList.size (); ++i) {
2699
+ Node *next = readyList.top ();
2700
+ readyList.pop ();
2701
+ if (next->instPipe != stallPipe) {
2702
+ readyList.push (scheduled);
2703
+ scheduled = next;
2704
+ break ;
2705
+ } else {
2706
+ // keep searching
2707
+ popped.push_back (next);
2708
+ }
2709
+ }
2710
+ for (auto nodes : popped) {
2711
+ readyList.push (nodes);
2712
+ }
2713
+ return scheduled;
2714
+ };
2715
+
2632
2716
// Avoid WAW subreg hazard by skipping nodes that cause a WAW subreg
2633
2717
// hazard with the lastScheduled instruction.
2634
2718
auto scheduleForWAWSubregHazardReduction = [&]() -> bool {
@@ -2829,9 +2913,17 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
2829
2913
heuCandidate =
2830
2914
applyBankConflictReductionHeuristic (scheduled, lastScheduled);
2831
2915
}
2832
- if (!heuCandidate && scheduleForB2BMathReduction (scheduled)) {
2833
- heuCandidate =
2834
- applyB2BMathReductionHeuristic (scheduled, lastScheduled);
2916
+ if (!heuCandidate) {
2917
+ if (getOptions ()->getOption (vISA_multiplePipeSched)) {
2918
+ auto occupancyPipe = getHigestOccupancyStallPipe (currCycle);
2919
+ if (occupancyPipe != PIPE_NONE &&
2920
+ occupancyPipe == scheduled->instPipe ) {
2921
+ heuCandidate =
2922
+ applyMultiplePipelineHeuristic (scheduled, occupancyPipe);
2923
+ }
2924
+ } else if (scheduleForB2BMathReduction (scheduled)){
2925
+ heuCandidate = applyB2BMathReductionHeuristic (scheduled, lastScheduled);
2926
+ }
2835
2927
}
2836
2928
if (!heuCandidate && scheduleForWAWSubregHazardReduction ()) {
2837
2929
heuCandidate =
@@ -2958,6 +3050,10 @@ Node::Node(uint32_t id, G4_INST *inst, Edge_Allocator &depEdgeAllocator,
2958
3050
priority = occupancy;
2959
3051
2960
3052
barrier = CheckBarrier (inst);
3053
+
3054
+ if (!inst->isLabel ()) {
3055
+ instPipe = inst->getInstructionPipeXe ();
3056
+ }
2961
3057
}
2962
3058
2963
3059
void LocalScheduler::EmitNode (Node *node) {
0 commit comments