Skip to content

Commit f5c3537

Browse files
bcheng0127igcbot
authored andcommitted
Add heuristic to support issues of multiple different pipelines
Occupancy used to be the step for the instruction issue. However, will multiple pipelines, the issue step cycle can be 1 if adjcent instructions are going to different pipelines.
1 parent 0736e99 commit f5c3537

File tree

5 files changed

+113
-8
lines changed

5 files changed

+113
-8
lines changed

visa/G4_IR.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,12 @@ bool G4_INST::hasNoPipe() const {
854854
if (op == G4_wait || op == G4_halt || op == G4_nop) {
855855
return true;
856856
}
857-
857+
if (op == G4_label) {
858+
return true;
859+
}
860+
if (op == G4_intrinsic) {
861+
return true;
862+
}
858863
if (op == G4_sync_fence || op == G4_sync_nop || op == G4_sync_allrd ||
859864
op == G4_sync_allwr) {
860865
return true;

visa/G4_IR.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ enum SB_INST_PIPE {
9292
PIPE_MATH = 4,
9393
PIPE_DPAS = 6,
9494
PIPE_SEND = 7,
95+
PIPE_ALL = 8
9596
};
9697
// forward declaration for the binary of an instruction
9798
class BinInst;

visa/LocalScheduler/LocalScheduler_G4IR.cpp

Lines changed: 103 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,9 @@ DDD::DDD(G4_BB *bb, const LatencyTable &lt, G4_Kernel *k, PointsToAnalysis &p)
13091309
TOTAL_BUCKETS = OTHER_ARF_BUCKET + 1;
13101310

13111311
LiveBuckets LB(this, GRF_BUCKET, TOTAL_BUCKETS);
1312-
1312+
for (int i = 0; i < PIPE_ALL; i++) {
1313+
latestInstOfEachPipe[i] = nullptr;
1314+
}
13131315
// Building the graph in reverse relative to the original instruction
13141316
// order, to naturally take care of the liveness of operands.
13151317
std::list<G4_INST *>::reverse_iterator iInst(bb->rbegin()),
@@ -2497,6 +2499,34 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
24972499
}
24982500
};
24992501

2502+
auto getStepCycle = [&](Node *n, uint32_t currCycle) -> uint32_t {
2503+
uint32_t stepCycle = 1;
2504+
for (unsigned i = PIPE_INT; i < PIPE_ALL; i++) {
2505+
if (latestInstOfEachPipe[i] == nullptr) {
2506+
continue;
2507+
}
2508+
2509+
if ((latestInstOfEachPipe[i]->schedTime +
2510+
latestInstOfEachPipe[i]->getOccupancy()) <= currCycle) {
2511+
latestInstOfEachPipe[i] = nullptr;
2512+
continue;
2513+
}
2514+
2515+
if (i == n->instPipe) {
2516+
stepCycle = std::max(
2517+
stepCycle, latestInstOfEachPipe[i]->schedTime +
2518+
latestInstOfEachPipe[i]->getOccupancy() - currCycle);
2519+
} else {
2520+
if (latestInstOfEachPipe[i]->schedTime + 1 > currCycle) {
2521+
stepCycle = std::max(stepCycle, latestInstOfEachPipe[i]->schedTime +
2522+
1 - currCycle);
2523+
}
2524+
}
2525+
}
2526+
2527+
return stepCycle;
2528+
};
2529+
25002530
auto updateForScheduled = [&](Node *scheduled) {
25012531
// Append the scheduled node to the end of the schedule.
25022532
schedule->scheduledNodes.push_back(scheduled);
@@ -2519,9 +2549,15 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
25192549

25202550

25212551
updateForSucc(scheduled);
2552+
if (getOptions()->getOption(vISA_multiplePipeSched)) {
2553+
// Increment the scheduler's clock after each scheduled node
2554+
currCycle += getStepCycle(scheduled, currCycle);
25222555

2523-
// Increment the scheduler's clock after each scheduled node
2524-
currCycle += scheduled->getOccupancy();
2556+
latestInstOfEachPipe[scheduled->instPipe] = scheduled;
2557+
} else {
2558+
// Increment the scheduler's clock after each scheduled node
2559+
currCycle += scheduled->getOccupancy();
2560+
}
25252561
};
25262562

25272563
auto scheduleForSuppression = [&]() -> bool {
@@ -2596,6 +2632,32 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
25962632
return scheduled;
25972633
};
25982634

2635+
auto getHigestOccupancyStallPipe = [&](uint32_t currCycle) -> SB_INST_PIPE {
2636+
SB_INST_PIPE pipe = PIPE_NONE;
2637+
uint32_t mostOccupancyStallCycle = 0;
2638+
for (unsigned i = PIPE_INT; i < PIPE_ALL; i++) {
2639+
if (latestInstOfEachPipe[i] == nullptr) {
2640+
continue;
2641+
}
2642+
2643+
if ((latestInstOfEachPipe[i]->schedTime +
2644+
latestInstOfEachPipe[i]->getOccupancy()) <= currCycle) {
2645+
latestInstOfEachPipe[i] = nullptr;
2646+
continue;
2647+
}
2648+
2649+
if (latestInstOfEachPipe[i]->schedTime +
2650+
latestInstOfEachPipe[i]->getOccupancy() >
2651+
mostOccupancyStallCycle) {
2652+
pipe = (SB_INST_PIPE)i;
2653+
mostOccupancyStallCycle = latestInstOfEachPipe[i]->schedTime +
2654+
latestInstOfEachPipe[i]->getOccupancy();
2655+
}
2656+
}
2657+
2658+
return pipe;
2659+
};
2660+
25992661
// Try to avoid b2b math if possible as there are pipeline stalls.
26002662
auto scheduleForB2BMathReduction = [&](Node *scheduled) -> bool {
26012663
return !readyList.empty() && lastScheduled &&
@@ -2604,7 +2666,7 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
26042666
};
26052667

26062668
auto applyB2BMathReductionHeuristic = [&](Node *scheduled,
2607-
Node *lastScheduled) -> Node * {
2669+
Node *lastScheduled) -> Node * {
26082670
// pick another node on the ready list if it's not math and won't cause
26092671
// a longer stall to save compile time we currently limit search size to 2
26102672
std::vector<Node *> popped;
@@ -2629,6 +2691,28 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
26292691
return scheduled;
26302692
};
26312693

2694+
auto applyMultiplePipelineHeuristic = [&](Node *scheduled, SB_INST_PIPE stallPipe) -> Node * {
2695+
// pick another node on the ready list if it's not math and won't cause
2696+
// a longer stall to save compile time we currently limit search size to 2
2697+
std::vector<Node *> popped;
2698+
for (size_t i = 0; i < readyList.size(); ++i) {
2699+
Node *next = readyList.top();
2700+
readyList.pop();
2701+
if (next->instPipe != stallPipe) {
2702+
readyList.push(scheduled);
2703+
scheduled = next;
2704+
break;
2705+
} else {
2706+
// keep searching
2707+
popped.push_back(next);
2708+
}
2709+
}
2710+
for (auto nodes : popped) {
2711+
readyList.push(nodes);
2712+
}
2713+
return scheduled;
2714+
};
2715+
26322716
// Avoid WAW subreg hazard by skipping nodes that cause a WAW subreg
26332717
// hazard with the lastScheduled instruction.
26342718
auto scheduleForWAWSubregHazardReduction = [&]() -> bool {
@@ -2829,9 +2913,17 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule) {
28292913
heuCandidate =
28302914
applyBankConflictReductionHeuristic(scheduled, lastScheduled);
28312915
}
2832-
if (!heuCandidate && scheduleForB2BMathReduction(scheduled)) {
2833-
heuCandidate =
2834-
applyB2BMathReductionHeuristic(scheduled, lastScheduled);
2916+
if (!heuCandidate) {
2917+
if (getOptions()->getOption(vISA_multiplePipeSched)) {
2918+
auto occupancyPipe = getHigestOccupancyStallPipe(currCycle);
2919+
if (occupancyPipe != PIPE_NONE &&
2920+
occupancyPipe == scheduled->instPipe) {
2921+
heuCandidate =
2922+
applyMultiplePipelineHeuristic(scheduled, occupancyPipe);
2923+
}
2924+
} else if (scheduleForB2BMathReduction(scheduled)){
2925+
heuCandidate = applyB2BMathReductionHeuristic(scheduled, lastScheduled);
2926+
}
28352927
}
28362928
if (!heuCandidate && scheduleForWAWSubregHazardReduction()) {
28372929
heuCandidate =
@@ -2958,6 +3050,10 @@ Node::Node(uint32_t id, G4_INST *inst, Edge_Allocator &depEdgeAllocator,
29583050
priority = occupancy;
29593051

29603052
barrier = CheckBarrier(inst);
3053+
3054+
if (!inst->isLabel()) {
3055+
instPipe = inst->getInstructionPipeXe();
3056+
}
29613057
}
29623058

29633059
void LocalScheduler::EmitNode(Node *node) {

visa/LocalScheduler/LocalScheduler_G4IR.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ class Node {
104104
unsigned getNodeID() const { return nodeID; };
105105

106106
uint32_t schedTime = 0;
107+
SB_INST_PIPE instPipe = PIPE_NONE;
107108

108109
// Number of predecessor nodes not scheduled
109110
uint16_t predsNotScheduled = 0;
@@ -322,6 +323,7 @@ class DDD {
322323
typedef std::pair<Node *, Node *> instrPair_t;
323324
typedef std::vector<instrPair_t> instrPairVec_t;
324325
NODE_LIST Roots;
326+
Node *latestInstOfEachPipe[PIPE_ALL];
325327
NodeAlloc NodeAllocator;
326328
void moveDeps(Node *fromNode, Node *toNode);
327329
void pairTypedWriteOrURBWriteNodes(G4_BB *bb);

visa/include/VISAOptionsDefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,3 +778,4 @@ DEF_VISA_OPTION(
778778
"HW thread scheduling policy: 0: single thread first; 1: round robin;", 0)
779779
DEF_VISA_OPTION(vISA_SendQueueEntries, ET_INT32, "-sendQueueEntries", UNUSED, 0)
780780
DEF_VISA_OPTION(vISA_SendQueueSched, ET_BOOL, "-sendQueueSched", UNUSED, false)
781+
DEF_VISA_OPTION(vISA_multiplePipeSched, ET_BOOL, "-multiplePipeSched", UNUSED, false)

0 commit comments

Comments
 (0)