Skip to content

Commit 8598367

Browse files
Gang Y Chenigcbot
authored andcommitted
latency scheduling add hold-list
real latency scheduling needs hold-list not enabled yet, controlled by scheduler-config
1 parent ab5a7cf commit 8598367

File tree

1 file changed

+120
-8
lines changed

1 file changed

+120
-8
lines changed

visa/LocalScheduler/G4_Sched.cpp

Lines changed: 120 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class preEdge {
3838
preEdge(preNode* N, DepType Ty)
3939
: mNode(N)
4040
, mType(Ty)
41+
, mLatency(-1)
4142
{
4243
}
4344

@@ -59,12 +60,23 @@ class preEdge {
5960
return false;
6061
}
6162

63+
void setLatency(int L) { mLatency = L; }
64+
int getLatency()
65+
{
66+
return isDataDep() ? mLatency : 0;
67+
}
68+
6269
private:
6370
// Node at the end of this edge.
6471
preNode* mNode;
6572

6673
// Type of dependence (RAW, WAW, WAR, etc.).
6774
DepType mType;
75+
76+
// data-dependence Latency used in Latency scheduling.
77+
// only exists (i.e. >=0) on succ-edge during latency scheduling.
78+
// set in LatencyQueue::calculatePriority
79+
int mLatency;
6880
};
6981

7082
class preNode {
@@ -139,9 +151,13 @@ class preNode {
139151
return TupleParts;
140152
return TupleLead->TupleParts;
141153
}
142-
154+
// Used in latency scheduling
155+
void setReadyCycle(unsigned cyc) { ReadyCycle = cyc; }
156+
unsigned getReadyCycle() { return ReadyCycle; }
157+
// Used in ACC scheduling
143158
void setACCCandidate() { ACCCandidate = true; }
144159
bool isACCCandidate() { return ACCCandidate; }
160+
145161
void print(std::ostream& os) const;
146162
void dump() const;
147163

@@ -171,6 +187,9 @@ class preNode {
171187
// # of succs not scheduled.
172188
unsigned NumSuccsLeft = 0;
173189

190+
// the earliest cycle for latency scheduling
191+
unsigned ReadyCycle = 0;
192+
174193
// True once scheduled.
175194
bool isScheduled = false;
176195
bool isClustered = false;
@@ -433,17 +452,20 @@ struct SchedConfig
433452
MASK_LATENCY = 1U << 1,
434453
MASK_SETHI_ULLMAN = 1U << 2,
435454
MASK_CLUSTTERING = 1U << 3,
455+
MASK_HOLD_LIST = 1U << 4,
436456
};
437457
unsigned Dump : 1;
438458
unsigned UseLatency : 1;
439459
unsigned UseSethiUllman : 1;
440460
unsigned DoClustering : 1;
461+
unsigned UseHoldList : 1;
441462

442463
explicit SchedConfig(unsigned Config)
443464
: Dump((Config & MASK_DUMP) != 0)
444465
, UseLatency((Config & MASK_LATENCY) != 0)
445466
, UseSethiUllman((Config & MASK_SETHI_ULLMAN) != 0)
446467
, DoClustering((Config & MASK_CLUSTTERING) != 0)
468+
, UseHoldList((Config & MASK_HOLD_LIST) != 0)
447469
{
448470
}
449471
};
@@ -1275,7 +1297,11 @@ class LatencyQueue : public QueueBase {
12751297
const LatencyTable &LT;
12761298

12771299
// TODO: Try to apply priority queue to SethiUllmanQueue as well.
1300+
1301+
// nodes with all predecessors scheduled and ready-cycle <= current-cycle for topdown scheduling
12781302
std::priority_queue<preNode*, std::vector<preNode*>, std::function<bool(preNode*, preNode*)>> ReadyList;
1303+
// nodes with all predecessors scheduled and ready-cycle > current-cycle for topdown scheduling
1304+
std::priority_queue<preNode*, std::vector<preNode*>, std::function<bool(preNode*, preNode*)>> HoldList;
12791305
// The register-pressure limit we use to decide sub-blocking
12801306
unsigned GroupingPressureLimit;
12811307

@@ -1284,17 +1310,20 @@ class LatencyQueue : public QueueBase {
12841310
const LatencyTable& LT, unsigned GroupingThreshold)
12851311
: QueueBase(ddd, rp, config)
12861312
, LT(LT)
1287-
, ReadyList([this](preNode* a, preNode* b){ return compare(a, b);})
1313+
, ReadyList([this](preNode* a, preNode* b){ return compareReady(a, b);})
1314+
, HoldList([this](preNode* a, preNode* b) { return compareHold(a, b); })
12881315
, GroupingPressureLimit(GroupingThreshold)
12891316
{
12901317
init();
12911318
}
12921319

1293-
// Add a new ready node.
1320+
// Add a new node to queue.
12941321
void push(preNode* N) override
12951322
{
12961323
if (N->getInst() && N->getInst()->isPseudoKill())
12971324
pseudoKills.push_back(N);
1325+
else if (config.UseHoldList)
1326+
HoldList.push(N);
12981327
else
12991328
ReadyList.push(N);
13001329
}
@@ -1319,13 +1348,50 @@ class LatencyQueue : public QueueBase {
13191348
return pseudoKills.empty() && ReadyList.empty();
13201349
}
13211350

1351+
// moving instruction from HoldList to ReadyList
1352+
void advance(unsigned &CurCycle, unsigned& CurGroup)
1353+
{
1354+
if (!config.UseHoldList) {
1355+
assert(HoldList.empty());
1356+
return;
1357+
}
1358+
GroupInfo[nullptr] = CurGroup;
1359+
while (!HoldList.empty()) {
1360+
preNode* N = HoldList.top();
1361+
if (GroupInfo[N->getInst()] <= CurGroup &&
1362+
N->getReadyCycle() <= CurCycle) {
1363+
HoldList.pop();
1364+
ReadyList.push(N);
1365+
}
1366+
else
1367+
break;
1368+
}
1369+
if (ReadyList.empty() && !HoldList.empty()) {
1370+
preNode* N = HoldList.top();
1371+
CurCycle = std::max(CurCycle, N->getReadyCycle());
1372+
CurGroup = std::max(CurGroup, GroupInfo[N->getInst()]);
1373+
do {
1374+
preNode* N = HoldList.top();
1375+
if (GroupInfo[N->getInst()] <= CurGroup &&
1376+
N->getReadyCycle() <= CurCycle) {
1377+
HoldList.pop();
1378+
ReadyList.push(N);
1379+
}
1380+
else
1381+
break;
1382+
} while (!HoldList.empty());
1383+
}
1384+
}
1385+
13221386
private:
13231387
void init();
13241388
unsigned calculatePriority(preNode *N);
13251389

13261390
// Compare two ready nodes and decide which one should be scheduled first.
13271391
// Return true if N2 has a higher priority than N1, false otherwise.
1328-
bool compare(preNode* N1, preNode* N2);
1392+
bool compareReady(preNode* N1, preNode* N2);
1393+
1394+
bool compareHold(preNode* N1, preNode* N2);
13291395

13301396
// The ready pseudo kills.
13311397
std::vector<preNode *> pseudoKills;
@@ -1397,21 +1463,32 @@ void BB_Scheduler::LatencyScheduling(unsigned GroupingThreshold)
13971463
LatencyQueue Q(ddd, rp, config, LT, GroupingThreshold);
13981464
Q.push(ddd.getEntryNode());
13991465

1466+
unsigned CurrentCycle = 0;
1467+
unsigned CurrentGroup = 0;
1468+
Q.advance(CurrentCycle, CurrentGroup);
14001469
while (!Q.empty()) {
14011470
preNode *N = Q.pop();
14021471
assert(N->NumPredsLeft == 0);
1472+
unsigned NextCycle = CurrentCycle;
14031473
if (N->getInst() != nullptr) {
14041474
schedule.push_back(N->getInst());
1405-
N->isScheduled = true;
1475+
NextCycle += LT.getOccupancy(N->getInst());
14061476
}
1407-
1477+
N->isScheduled = true;
14081478
for (auto I = N->succ_begin(), E = N->succ_end(); I != E; ++I) {
14091479
preNode *Node = I->getNode();
14101480
assert(!Node->isScheduled && Node->NumPredsLeft);
1481+
int L = (*I).getLatency();
1482+
assert(L >= 0);
1483+
if (Node->getReadyCycle() < CurrentCycle + (unsigned)L)
1484+
Node->setReadyCycle(CurrentCycle + (unsigned)L);
14111485
--Node->NumPredsLeft;
1412-
if (Node->NumPredsLeft == 0)
1486+
if (Node->NumPredsLeft == 0) {
14131487
Q.push(Node);
1488+
}
14141489
}
1490+
CurrentCycle = NextCycle;
1491+
Q.advance(CurrentCycle, CurrentGroup);
14151492
}
14161493

14171494
relocatePseudoKills();
@@ -1640,6 +1717,7 @@ unsigned LatencyQueue::calculatePriority(preNode* N)
16401717
break;
16411718
}
16421719
}
1720+
Edge.setLatency(Latency);
16431721
Priority = std::max(Priority, SuccPriority + Latency);
16441722
}
16451723

@@ -1648,7 +1726,7 @@ unsigned LatencyQueue::calculatePriority(preNode* N)
16481726

16491727
// Compare two ready nodes and decide which one should be scheduled first.
16501728
// Return true if N2 has a higher priority than N1, false otherwise.
1651-
bool LatencyQueue::compare(preNode* N1, preNode* N2)
1729+
bool LatencyQueue::compareReady(preNode* N1, preNode* N2)
16521730
{
16531731
assert(N1->getID() != N2->getID());
16541732
assert(N1->getInst() && N2->getInst());
@@ -1697,6 +1775,37 @@ bool LatencyQueue::compare(preNode* N1, preNode* N2)
16971775
return N2->getID() > N1->getID();
16981776
}
16991777

1778+
// hold-list is sorted by nodes' ready cycle
1779+
bool LatencyQueue::compareHold(preNode* N1, preNode* N2)
1780+
{
1781+
assert(N1->getID() != N2->getID());
1782+
assert(N1->getInst() && N2->getInst());
1783+
assert(!N1->getInst()->isPseudoKill() &&
1784+
!N2->getInst()->isPseudoKill());
1785+
G4_INST* Inst1 = N1->getInst();
1786+
G4_INST* Inst2 = N2->getInst();
1787+
1788+
// Group ID has higher priority, smaller ID means higher priority.
1789+
unsigned GID1 = GroupInfo[Inst1];
1790+
unsigned GID2 = GroupInfo[Inst2];
1791+
if (GID1 > GID2)
1792+
return true;
1793+
if (GID1 < GID2)
1794+
return false;
1795+
1796+
// compare ready cycle, smaller ready cycle means higher priority
1797+
unsigned cyc1 = N1->getReadyCycle();
1798+
unsigned cyc2 = N2->getReadyCycle();
1799+
if (cyc1 > cyc2)
1800+
return true;
1801+
if (cyc1 < cyc2)
1802+
return false;
1803+
1804+
// Otherwise, break tie on ID.
1805+
// Larger ID means higher priority.
1806+
return N2->getID() > N1->getID();
1807+
}
1808+
17001809
// Find the edge with smallest ID.
17011810
static preNode* minElt(const std::vector<preEdge>& Elts)
17021811
{
@@ -2363,19 +2472,22 @@ void preDDD::reset(bool ReassignNodeID)
23632472
N->NumPredsLeft = unsigned(N->Preds.size());
23642473
N->NumSuccsLeft = unsigned(N->Succs.size());
23652474
N->isScheduled = false;
2475+
N->setReadyCycle(0);
23662476
N->isClustered = false;
23672477
N->isClusterLead = false;
23682478
}
23692479

23702480
EntryNode.NumPredsLeft = 0;
23712481
EntryNode.NumSuccsLeft = unsigned(EntryNode.Succs.size());
23722482
EntryNode.isScheduled = false;
2483+
EntryNode.setReadyCycle(0);
23732484
EntryNode.isClustered = false;
23742485
EntryNode.isClusterLead = false;
23752486

23762487
ExitNode.NumPredsLeft = unsigned(ExitNode.Preds.size());
23772488
ExitNode.NumSuccsLeft = 0;
23782489
ExitNode.isScheduled = false;
2490+
ExitNode.setReadyCycle(0);
23792491
ExitNode.isClustered = false;
23802492
ExitNode.isClusterLead = false;
23812493
}

0 commit comments

Comments
 (0)