@@ -296,11 +296,6 @@ void LocalScheduler::localScheduling()
296
296
jitInfo->BBNum = i;
297
297
}
298
298
299
- void G4_BB_Schedule::setOptimumConsecutiveSends ()
300
- {
301
- optimumConsecutiveSends = m_options->getuInt32Option (vISA_NumPackedSends);
302
- }
303
-
304
299
void G4_BB_Schedule::dumpSchedule (G4_BB *bb)
305
300
{
306
301
const char *asmName = nullptr ;
@@ -390,12 +385,10 @@ void G4_BB_Schedule::dumpSchedule(G4_BB *bb)
390
385
G4_BB_Schedule::G4_BB_Schedule (G4_Kernel* k, Mem_Manager &m, G4_BB *block,
391
386
int dddTimer, int schTimer, uint32_t & totalCycle,
392
387
const Options *options, const LatencyTable <)
393
- : kernel(k), mem(m), bb(block), curINum( 0 ),
388
+ : kernel(k), mem(m), bb(block),
394
389
lastCycle(0 ), sendStallCycle(0 ),
395
390
sequentialCycle(0 ), m_options(options)
396
391
{
397
- setOptimumConsecutiveSends ();
398
-
399
392
// we use local id in the scheduler for determining two instructions' original ordering
400
393
bb->resetLocalId ();
401
394
@@ -995,12 +988,10 @@ DDD::DDD(Mem_Manager &m, G4_BB* bb, const Options *options,
995
988
: mem(m), m_options(options), LT(lt), kernel(k)
996
989
{
997
990
Node* lastBarrier = NULL ;
998
- numOfPairs = 0 ;
999
- numSendsScheduled = 0 ;
1000
991
totalGRFNum = m_options->getuInt32Option (vISA_TotalGRFNum);
1001
992
HWthreadsPerEU = m_options->getuInt32Option (vISA_HWThreadNumberPerEU);
1002
-
1003
993
useMTLatencies = m_options->getOption (vISA_useMultiThreadedLatencies);
994
+ bool BTIIsRestrict = m_options->getOption (vISA_ReorderDPSendToDifferentBti);
1004
995
1005
996
GRF_BUCKET = 0 ;
1006
997
ACC_BUCKET = GRF_BUCKET + totalGRFNum;
@@ -1019,20 +1010,16 @@ DDD::DDD(Mem_Manager &m, G4_BB* bb, const Options *options,
1019
1010
std::list<G4_INST*>::reverse_iterator iInst (bb->rbegin ()), iInstEnd (bb->rend ());
1020
1011
std::vector<BucketDescr> BDvec;
1021
1012
1022
- bool BTIIsRestrict = m_options->getOption (vISA_ReorderDPSendToDifferentBti);
1023
1013
1024
1014
for (int nodeId = (int )(bb->size () - 1 ); iInst != iInstEnd; ++iInst, nodeId--)
1025
1015
{
1026
1016
Node *node = nullptr ;
1027
1017
// If we have a pair of instructions to be mapped on a single DAG node:
1028
1018
node = new (mem)Node (nodeId, *iInst, depEdgeAllocator, LT);
1029
1019
allNodes.push_back (node);
1030
-
1031
- assert (node->getInstructions ()->size () == 1 );
1032
- G4_INST *curInst = *node->getInstructions ()->begin ();
1020
+ G4_INST *curInst = node->getInstructions ()->front ();
1033
1021
bool hasIndir = false ;
1034
1022
BDvec.clear ();
1035
- unsigned NumRegs = m_options->getuInt32Option (vISA_TotalGRFNum);
1036
1023
1037
1024
1038
1025
// Get buckets for all physical registers assigned in curInst
@@ -1058,19 +1045,14 @@ DDD::DDD(Mem_Manager &m, G4_BB* bb, const Options *options,
1058
1045
for (auto it = LB.begin (), ite = LB.end (); it != ite; ++it) {
1059
1046
BucketNode *BNode = *it;
1060
1047
Node* liveNode = BNode->node ;
1061
- if (! liveNode->hasPreds ())
1048
+ if (liveNode->preds . empty ())
1062
1049
{
1063
1050
createAddEdge (node, liveNode, depType);
1064
1051
}
1065
1052
}
1066
1053
LB.clearAllLive ();
1067
-
1068
- if (depType == DEP_LABEL)
1054
+ if (lastBarrier)
1069
1055
{
1070
- Roots.push_back (node);
1071
- }
1072
-
1073
- if (lastBarrier) {
1074
1056
createAddEdge (node, lastBarrier, lastBarrier->isBarrier ());
1075
1057
}
1076
1058
@@ -1174,38 +1156,6 @@ DDD::DDD(Mem_Manager &m, G4_BB* bb, const Options *options,
1174
1156
// Insert this node into the graph.
1175
1157
InsertNode (node);
1176
1158
}
1177
-
1178
- // We have no label in this block. Need to initialize roots to traverse the DAG correctly.
1179
- if (Roots.size () == 0 )
1180
- {
1181
- // Iterate over all buckets and push all live instructions
1182
- // in to Root list
1183
- for (auto it = LB.begin (), ite = LB.end (); it != ite; ++it)
1184
- {
1185
- Node *curLiveNode = (*it)->node ;
1186
- if (!curLiveNode->hasPreds ())
1187
- {
1188
- if (std::find (Roots.begin (), Roots.end (), curLiveNode) == Roots.end ())
1189
- {
1190
- // Insert Root node only if it hasnt yet
1191
- // been inserted to Root list.
1192
- Roots.push_back (curLiveNode);
1193
- }
1194
- }
1195
- }
1196
-
1197
- // It is possible that first inst of a BB is a barrier
1198
- // If the inst does not have any operands then it will not be present in
1199
- // any bucket. Also since it is a barrier, all other buckets will have been
1200
- // emptied. So previous loop will not find any Roots. This will cause
1201
- // list scheduler to have 0-size ready list. The fix is to check whether
1202
- // size of Roots is zero and inserting barrier in to Roots if it is.
1203
- if (Roots.size () == 0 ) {
1204
- MUST_BE_TRUE (lastBarrier != NULL ,
1205
- " Size of Roots list was 0 and no barrier was found" );
1206
- Roots.push_back (lastBarrier);
1207
- }
1208
- }
1209
1159
}
1210
1160
1211
1161
// Return TRUE if there is a dependency fromNode->toNode
@@ -1486,7 +1436,6 @@ void DDD::pairTypedWriteOrURBWriteNodes(G4_BB *bb) {
1486
1436
}
1487
1437
1488
1438
// 2. Join nodes that need pairing
1489
- uint32_t cntPairs = 0 ;
1490
1439
for (auto && pair : instrPairs) {
1491
1440
Node *firstNode = pair.first ;
1492
1441
Node *secondNode = pair.second ;
@@ -1500,15 +1449,6 @@ void DDD::pairTypedWriteOrURBWriteNodes(G4_BB *bb) {
1500
1449
{
1501
1450
// A. move the deps of seconde node to the first.
1502
1451
moveDeps (secondNode, firstNode);
1503
- secondNode->setDead ();
1504
-
1505
- // if second node is not root, first node may not be either
1506
- // as it has inherited second node's predecessors
1507
- auto result2 = std::find (Roots.begin (), Roots.end (), secondNode);
1508
- if (result2 == std::end (Roots))
1509
- {
1510
- Roots.remove (firstNode);
1511
- }
1512
1452
1513
1453
// B. We add the second instruction to the first node.
1514
1454
assert (firstNode->getInstructions ()->size () == 1 );
@@ -1518,10 +1458,21 @@ void DDD::pairTypedWriteOrURBWriteNodes(G4_BB *bb) {
1518
1458
{
1519
1459
firstInstr->setOptionOn (InstOpt_Atomic);
1520
1460
}
1521
- cntPairs++;
1461
+
1462
+ // C. Cleanup the paired node.
1463
+ secondNode->clear ();
1522
1464
}
1523
1465
}
1524
- numOfPairs = cntPairs;
1466
+ }
1467
+
1468
+ void DDD::collectRoots ()
1469
+ {
1470
+ Roots.clear ();
1471
+ for (auto N : allNodes) {
1472
+ if (N->preds .empty () && !N->getInstructions ()->empty ()) {
1473
+ Roots.push_back (N);
1474
+ }
1475
+ }
1525
1476
}
1526
1477
1527
1478
void DDD::setPriority (Node *pred, const Edge &edge)
@@ -1795,7 +1746,7 @@ struct criticalCmp
1795
1746
else
1796
1747
{
1797
1748
return (*n1->getInstructions ())[0 ]->getLocalId ()
1798
- > (*n2->getInstructions ())[0 ]->getLocalId ();
1749
+ > (*n2->getInstructions ())[0 ]->getLocalId ();
1799
1750
}
1800
1751
}
1801
1752
}
@@ -1822,11 +1773,9 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule)
1822
1773
// that is their earliest cycle is >= than the current schedule cycle.
1823
1774
std::priority_queue<Node *, std::vector<Node *>, earlyCmp> preReadyQueue (SS);
1824
1775
1825
- for (NODE_LIST_ITER node_it = Roots.begin ();
1826
- node_it != Roots.end ();
1827
- node_it++)
1828
- {
1829
- preReadyQueue.push (*node_it);
1776
+ collectRoots ();
1777
+ for (auto N : Roots) {
1778
+ preReadyQueue.push (N);
1830
1779
}
1831
1780
1832
1781
// The scheduler's clock.
@@ -1869,7 +1818,7 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule)
1869
1818
// Pointer to node to be scheduled.
1870
1819
Node *scheduled = readyList.top ();
1871
1820
readyList.pop ();
1872
-
1821
+
1873
1822
// try to avoid b2b math if possible as there are pipeline stalls
1874
1823
if (scheduled->getInstructions ()->front ()->isMath () &&
1875
1824
lastScheduled && lastScheduled->getInstructions ()->front ()->isMath ())
@@ -1996,7 +1945,6 @@ uint32_t DDD::listSchedule(G4_BB_Schedule *schedule)
1996
1945
preReadyQueue.push (succ);
1997
1946
}
1998
1947
}
1999
- schedule->curINum ++;
2000
1948
2001
1949
// Increment the scheduler's clock after each scheduled node
2002
1950
currCycle += scheduled->getOccupancy ();
0 commit comments