@@ -38,6 +38,7 @@ class preEdge {
38
38
preEdge (preNode* N, DepType Ty)
39
39
: mNode (N)
40
40
, mType (Ty)
41
+ , mLatency (-1 )
41
42
{
42
43
}
43
44
@@ -59,12 +60,23 @@ class preEdge {
59
60
return false ;
60
61
}
61
62
63
+ void setLatency (int L) { mLatency = L; }
64
+ int getLatency ()
65
+ {
66
+ return isDataDep () ? mLatency : 0 ;
67
+ }
68
+
62
69
private:
63
70
// Node at the end of this edge.
64
71
preNode* mNode ;
65
72
66
73
// Type of dependence (RAW, WAW, WAR, etc.).
67
74
DepType mType ;
75
+
76
+ // data-dependence Latency used in Latency scheduling.
77
+ // only exists (i.e. >=0) on succ-edge during latency scheduling.
78
+ // set in LatencyQueue::calculatePriority
79
+ int mLatency ;
68
80
};
69
81
70
82
class preNode {
@@ -139,9 +151,13 @@ class preNode {
139
151
return TupleParts;
140
152
return TupleLead->TupleParts ;
141
153
}
142
-
154
+ // Used in latency scheduling
155
+ void setReadyCycle (unsigned cyc) { ReadyCycle = cyc; }
156
+ unsigned getReadyCycle () { return ReadyCycle; }
157
+ // Used in ACC scheduling
143
158
void setACCCandidate () { ACCCandidate = true ; }
144
159
bool isACCCandidate () { return ACCCandidate; }
160
+
145
161
void print (std::ostream& os) const ;
146
162
void dump () const ;
147
163
@@ -171,6 +187,9 @@ class preNode {
171
187
// # of succs not scheduled.
172
188
unsigned NumSuccsLeft = 0 ;
173
189
190
+ // the earliest cycle for latency scheduling
191
+ unsigned ReadyCycle = 0 ;
192
+
174
193
// True once scheduled.
175
194
bool isScheduled = false ;
176
195
bool isClustered = false ;
@@ -433,17 +452,20 @@ struct SchedConfig
433
452
MASK_LATENCY = 1U << 1 ,
434
453
MASK_SETHI_ULLMAN = 1U << 2 ,
435
454
MASK_CLUSTTERING = 1U << 3 ,
455
+ MASK_HOLD_LIST = 1U << 4 ,
436
456
};
437
457
unsigned Dump : 1 ;
438
458
unsigned UseLatency : 1 ;
439
459
unsigned UseSethiUllman : 1 ;
440
460
unsigned DoClustering : 1 ;
461
+ unsigned UseHoldList : 1 ;
441
462
442
463
explicit SchedConfig (unsigned Config)
443
464
: Dump((Config & MASK_DUMP) != 0)
444
465
, UseLatency((Config & MASK_LATENCY) != 0)
445
466
, UseSethiUllman((Config & MASK_SETHI_ULLMAN) != 0)
446
467
, DoClustering((Config & MASK_CLUSTTERING) != 0)
468
+ , UseHoldList((Config & MASK_HOLD_LIST) != 0)
447
469
{
448
470
}
449
471
};
@@ -1275,7 +1297,11 @@ class LatencyQueue : public QueueBase {
1275
1297
const LatencyTable <
1276
1298
1277
1299
// TODO: Try to apply priority queue to SethiUllmanQueue as well.
1300
+
1301
+ // nodes with all predecessors scheduled and ready-cycle <= current-cycle for topdown scheduling
1278
1302
std::priority_queue<preNode*, std::vector<preNode*>, std::function<bool (preNode*, preNode*)>> ReadyList;
1303
+ // nodes with all predecessors scheduled and ready-cycle > current-cycle for topdown scheduling
1304
+ std::priority_queue<preNode*, std::vector<preNode*>, std::function<bool (preNode*, preNode*)>> HoldList;
1279
1305
// The register-pressure limit we use to decide sub-blocking
1280
1306
unsigned GroupingPressureLimit;
1281
1307
@@ -1284,17 +1310,20 @@ class LatencyQueue : public QueueBase {
1284
1310
const LatencyTable& LT, unsigned GroupingThreshold)
1285
1311
: QueueBase(ddd, rp, config)
1286
1312
, LT(LT)
1287
- , ReadyList([this ](preNode* a, preNode* b){ return compare (a, b);})
1313
+ , ReadyList([this ](preNode* a, preNode* b){ return compareReady (a, b);})
1314
+ , HoldList([this ](preNode* a, preNode* b) { return compareHold (a, b); })
1288
1315
, GroupingPressureLimit(GroupingThreshold)
1289
1316
{
1290
1317
init ();
1291
1318
}
1292
1319
1293
- // Add a new ready node.
1320
+ // Add a new node to queue .
1294
1321
void push (preNode* N) override
1295
1322
{
1296
1323
if (N->getInst () && N->getInst ()->isPseudoKill ())
1297
1324
pseudoKills.push_back (N);
1325
+ else if (config.UseHoldList )
1326
+ HoldList.push (N);
1298
1327
else
1299
1328
ReadyList.push (N);
1300
1329
}
@@ -1319,13 +1348,50 @@ class LatencyQueue : public QueueBase {
1319
1348
return pseudoKills.empty () && ReadyList.empty ();
1320
1349
}
1321
1350
1351
+ // moving instruction from HoldList to ReadyList
1352
+ void advance (unsigned &CurCycle, unsigned & CurGroup)
1353
+ {
1354
+ if (!config.UseHoldList ) {
1355
+ assert (HoldList.empty ());
1356
+ return ;
1357
+ }
1358
+ GroupInfo[nullptr ] = CurGroup;
1359
+ while (!HoldList.empty ()) {
1360
+ preNode* N = HoldList.top ();
1361
+ if (GroupInfo[N->getInst ()] <= CurGroup &&
1362
+ N->getReadyCycle () <= CurCycle) {
1363
+ HoldList.pop ();
1364
+ ReadyList.push (N);
1365
+ }
1366
+ else
1367
+ break ;
1368
+ }
1369
+ if (ReadyList.empty () && !HoldList.empty ()) {
1370
+ preNode* N = HoldList.top ();
1371
+ CurCycle = std::max (CurCycle, N->getReadyCycle ());
1372
+ CurGroup = std::max (CurGroup, GroupInfo[N->getInst ()]);
1373
+ do {
1374
+ preNode* N = HoldList.top ();
1375
+ if (GroupInfo[N->getInst ()] <= CurGroup &&
1376
+ N->getReadyCycle () <= CurCycle) {
1377
+ HoldList.pop ();
1378
+ ReadyList.push (N);
1379
+ }
1380
+ else
1381
+ break ;
1382
+ } while (!HoldList.empty ());
1383
+ }
1384
+ }
1385
+
1322
1386
private:
1323
1387
void init ();
1324
1388
unsigned calculatePriority (preNode *N);
1325
1389
1326
1390
// Compare two ready nodes and decide which one should be scheduled first.
1327
1391
// Return true if N2 has a higher priority than N1, false otherwise.
1328
- bool compare (preNode* N1, preNode* N2);
1392
+ bool compareReady (preNode* N1, preNode* N2);
1393
+
1394
+ bool compareHold (preNode* N1, preNode* N2);
1329
1395
1330
1396
// The ready pseudo kills.
1331
1397
std::vector<preNode *> pseudoKills;
@@ -1397,21 +1463,32 @@ void BB_Scheduler::LatencyScheduling(unsigned GroupingThreshold)
1397
1463
LatencyQueue Q (ddd, rp, config, LT, GroupingThreshold);
1398
1464
Q.push (ddd.getEntryNode ());
1399
1465
1466
+ unsigned CurrentCycle = 0 ;
1467
+ unsigned CurrentGroup = 0 ;
1468
+ Q.advance (CurrentCycle, CurrentGroup);
1400
1469
while (!Q.empty ()) {
1401
1470
preNode *N = Q.pop ();
1402
1471
assert (N->NumPredsLeft == 0 );
1472
+ unsigned NextCycle = CurrentCycle;
1403
1473
if (N->getInst () != nullptr ) {
1404
1474
schedule.push_back (N->getInst ());
1405
- N-> isScheduled = true ;
1475
+ NextCycle += LT. getOccupancy (N-> getInst ()) ;
1406
1476
}
1407
-
1477
+ N-> isScheduled = true ;
1408
1478
for (auto I = N->succ_begin (), E = N->succ_end (); I != E; ++I) {
1409
1479
preNode *Node = I->getNode ();
1410
1480
assert (!Node->isScheduled && Node->NumPredsLeft );
1481
+ int L = (*I).getLatency ();
1482
+ assert (L >= 0 );
1483
+ if (Node->getReadyCycle () < CurrentCycle + (unsigned )L)
1484
+ Node->setReadyCycle (CurrentCycle + (unsigned )L);
1411
1485
--Node->NumPredsLeft ;
1412
- if (Node->NumPredsLeft == 0 )
1486
+ if (Node->NumPredsLeft == 0 ) {
1413
1487
Q.push (Node);
1488
+ }
1414
1489
}
1490
+ CurrentCycle = NextCycle;
1491
+ Q.advance (CurrentCycle, CurrentGroup);
1415
1492
}
1416
1493
1417
1494
relocatePseudoKills ();
@@ -1640,6 +1717,7 @@ unsigned LatencyQueue::calculatePriority(preNode* N)
1640
1717
break ;
1641
1718
}
1642
1719
}
1720
+ Edge.setLatency (Latency);
1643
1721
Priority = std::max (Priority, SuccPriority + Latency);
1644
1722
}
1645
1723
@@ -1648,7 +1726,7 @@ unsigned LatencyQueue::calculatePriority(preNode* N)
1648
1726
1649
1727
// Compare two ready nodes and decide which one should be scheduled first.
1650
1728
// Return true if N2 has a higher priority than N1, false otherwise.
1651
- bool LatencyQueue::compare (preNode* N1, preNode* N2)
1729
+ bool LatencyQueue::compareReady (preNode* N1, preNode* N2)
1652
1730
{
1653
1731
assert (N1->getID () != N2->getID ());
1654
1732
assert (N1->getInst () && N2->getInst ());
@@ -1697,6 +1775,37 @@ bool LatencyQueue::compare(preNode* N1, preNode* N2)
1697
1775
return N2->getID () > N1->getID ();
1698
1776
}
1699
1777
1778
+ // hold-list is sorted by nodes' ready cycle
1779
+ bool LatencyQueue::compareHold (preNode* N1, preNode* N2)
1780
+ {
1781
+ assert (N1->getID () != N2->getID ());
1782
+ assert (N1->getInst () && N2->getInst ());
1783
+ assert (!N1->getInst ()->isPseudoKill () &&
1784
+ !N2->getInst ()->isPseudoKill ());
1785
+ G4_INST* Inst1 = N1->getInst ();
1786
+ G4_INST* Inst2 = N2->getInst ();
1787
+
1788
+ // Group ID has higher priority, smaller ID means higher priority.
1789
+ unsigned GID1 = GroupInfo[Inst1];
1790
+ unsigned GID2 = GroupInfo[Inst2];
1791
+ if (GID1 > GID2)
1792
+ return true ;
1793
+ if (GID1 < GID2)
1794
+ return false ;
1795
+
1796
+ // compare ready cycle, smaller ready cycle means higher priority
1797
+ unsigned cyc1 = N1->getReadyCycle ();
1798
+ unsigned cyc2 = N2->getReadyCycle ();
1799
+ if (cyc1 > cyc2)
1800
+ return true ;
1801
+ if (cyc1 < cyc2)
1802
+ return false ;
1803
+
1804
+ // Otherwise, break tie on ID.
1805
+ // Larger ID means higher priority.
1806
+ return N2->getID () > N1->getID ();
1807
+ }
1808
+
1700
1809
// Find the edge with smallest ID.
1701
1810
static preNode* minElt (const std::vector<preEdge>& Elts)
1702
1811
{
@@ -2363,19 +2472,22 @@ void preDDD::reset(bool ReassignNodeID)
2363
2472
N->NumPredsLeft = unsigned (N->Preds .size ());
2364
2473
N->NumSuccsLeft = unsigned (N->Succs .size ());
2365
2474
N->isScheduled = false ;
2475
+ N->setReadyCycle (0 );
2366
2476
N->isClustered = false ;
2367
2477
N->isClusterLead = false ;
2368
2478
}
2369
2479
2370
2480
EntryNode.NumPredsLeft = 0 ;
2371
2481
EntryNode.NumSuccsLeft = unsigned (EntryNode.Succs .size ());
2372
2482
EntryNode.isScheduled = false ;
2483
+ EntryNode.setReadyCycle (0 );
2373
2484
EntryNode.isClustered = false ;
2374
2485
EntryNode.isClusterLead = false ;
2375
2486
2376
2487
ExitNode.NumPredsLeft = unsigned (ExitNode.Preds .size ());
2377
2488
ExitNode.NumSuccsLeft = 0 ;
2378
2489
ExitNode.isScheduled = false ;
2490
+ ExitNode.setReadyCycle (0 );
2379
2491
ExitNode.isClustered = false ;
2380
2492
ExitNode.isClusterLead = false ;
2381
2493
}
0 commit comments