@@ -4793,7 +4793,10 @@ void Augmentation::augmentIntfGraph()
4793
4793
!liveAnalysis.livenessClass (G4_ADDRESS) &&
4794
4794
kernel.fg .size () > 2 ))
4795
4795
{
4796
- return ;
4796
+ if (!kernel.getOption (vISA_DumpRegChart))
4797
+ {
4798
+ return ;
4799
+ }
4797
4800
}
4798
4801
}
4799
4802
@@ -4813,6 +4816,12 @@ void Augmentation::augmentIntfGraph()
4813
4816
// Sort live-intervals based on their start
4814
4817
sortLiveIntervals ();
4815
4818
4819
+ if (kernel.getOption (vISA_DumpRegChart))
4820
+ {
4821
+ gra.regChart = new RegChartDump (gra);
4822
+ gra.regChart ->recordLiveIntervals (sortedIntervals);
4823
+ }
4824
+
4816
4825
if (gra.verifyAugmentation )
4817
4826
{
4818
4827
gra.verifyAugmentation ->loadAugData (sortedIntervals, lrs, intf.liveAnalysis ->getNumSelectedVar (), &intf, gra);
@@ -5690,7 +5699,7 @@ void PhyRegUsage::updateRegUsage(LiveRange* lr)
5690
5699
}
5691
5700
}
5692
5701
5693
- bool GraphColor::assignColors (ColorHeuristic colorHeuristicGRF, bool doBankConflict, bool highInternalConflict)
5702
+ bool GraphColor::assignColors (ColorHeuristic colorHeuristicGRF, bool doBankConflict, bool highInternalConflict, bool honorHints )
5694
5703
{
5695
5704
if (builder.getOption (vISA_RATrace))
5696
5705
{
@@ -5758,7 +5767,7 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
5758
5767
bool skipParentIntf = false ;
5759
5768
if (lr->hasAllocHint ())
5760
5769
{
5761
- parms.startGRFReg = lr->getAllocHint ();
5770
+ parms.startGRFReg = ( lr->getAllocHint () >= maxGRFCanBeUsed ? 0 : lr-> getAllocHint () );
5762
5771
if (varSplitPass.isPartialDcl (lr->getDcl ()))
5763
5772
{
5764
5773
parentDcl = varSplitPass.getParentDcl (lr->getDcl ());
@@ -5966,7 +5975,19 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
5966
5975
if (!ret)
5967
5976
return false ;
5968
5977
5969
- if (gra.getIterNo () < 3 )
5978
+ if (lr->getSpillCost () == MAXSPILLCOST &&
5979
+ !lr->getPhyReg () &&
5980
+ honorHints)
5981
+ {
5982
+ // infinite spill cost range spilled
5983
+ // undo all allocations done to split vars
5984
+ // and skip adhering to hints for preserving
5985
+ // correctness.
5986
+ resetTemporaryRegisterAssignments ();
5987
+ return assignColors (colorHeuristicGRF, doBankConflict, highInternalConflict, false );
5988
+ }
5989
+
5990
+ if (honorHints && gra.getIterNo () < 3 )
5970
5991
{
5971
5992
if (varSplitPass.isSplitDcl (lr->getDcl ()))
5972
5993
{
@@ -5982,6 +6003,25 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
5982
6003
{
5983
6004
auto isChildSpilled = childLR->isSpilled ();
5984
6005
assignColor (childLR, false , !isChildSpilled);
6006
+ // if allocated GRF is different than hint, then
6007
+ // undo allocation and let coloring take its course.
6008
+ // this can be done only if the childLR wasnt
6009
+ // already processed in colorOrder.
6010
+ if (!isChildSpilled && childLR->getPhyReg ())
6011
+ {
6012
+ auto hint = childLR->getAllocHint ();
6013
+ if (childLR->getPhyReg ()->asGreg ()->getRegNum () != hint)
6014
+ {
6015
+ // this is executed only if childLR is guaranteed to be
6016
+ // processed later on in colorOrder.
6017
+ childLR->resetPhyReg ();
6018
+ }
6019
+ }
6020
+ else if (isChildSpilled && childLR->getPhyReg ())
6021
+ {
6022
+ // was spilled earlier, got allocation now
6023
+ spilledLRs.remove (childLR);
6024
+ }
5985
6025
}
5986
6026
else
5987
6027
{
@@ -6070,6 +6110,12 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
6070
6110
{
6071
6111
MUST_BE_TRUE (lr->isSpilled (), " LR not marked as spilled, but inserted in spilledLRs list" );
6072
6112
}
6113
+
6114
+ // Verify if all LRs have either an allocation or are spilled
6115
+ for (auto lr : colorOrder)
6116
+ {
6117
+ MUST_BE_TRUE (lr->isSpilled () || lr->getPhyReg () || lr->getDcl ()->isSpilled (), " Range without allocation and not spilled" );
6118
+ }
6073
6119
#endif
6074
6120
6075
6121
return true ;
@@ -6522,8 +6568,10 @@ void GraphColor::resetTemporaryRegisterAssignments()
6522
6568
if (lrs[i]->getVar ()->getPhyReg () == NULL ) {
6523
6569
lrs[i]->resetPhyReg ();
6524
6570
lrs[i]->resetAllocHint ();
6571
+ lrs[i]->setSpilled (false );
6525
6572
}
6526
6573
}
6574
+ spilledLRs.clear ();
6527
6575
}
6528
6576
6529
6577
void GraphColor::cleanupRedundantARFFillCode ()
@@ -9524,6 +9572,15 @@ int GlobalRA::coloringRegAlloc()
9524
9572
coloring.addSaveRestoreCode (localSpillAreaOwordSize);
9525
9573
}
9526
9574
9575
+ if (kernel.getOption (vISA_DumpRegChart))
9576
+ {
9577
+ assignRegForAliasDcl ();
9578
+ computePhyReg ();
9579
+ // invoke before expanding spill/fill since
9580
+ // it modifies IR
9581
+ regChart->dumpRegChart (std::cerr);
9582
+ }
9583
+
9527
9584
expandSpillFillIntrinsics ();
9528
9585
9529
9586
if (builder.getOption (vISA_OptReport))
@@ -12311,3 +12368,158 @@ void LiveRange::setAllocHint(unsigned int h)
12311
12368
if ((h + dcl->getNumRows ()) <= gra.kernel .getNumRegTotal ())
12312
12369
allocHint = h;
12313
12370
}
12371
+
12372
+ // sortedIntervals comes from augmentation.
12373
+ // This can be invoked either post RA where phy regs are assigned to dcls,
12374
+ // or after assignColors with lrs and numLRs passed which makes this function
12375
+ // use temp allocations from lrs. Doesnt handle sub-routines yet.
12376
+ void RegChartDump::dumpRegChart (std::ostream& os, LiveRange** lrs, unsigned int numLRs)
12377
+ {
12378
+ constexpr unsigned int N = 128 ;
12379
+ std::unordered_map<G4_INST*, std::bitset<N>> busyGRFPerInst;
12380
+ bool dumpHex = false ;
12381
+
12382
+ auto getPhyReg = [&](G4_Declare* dcl)
12383
+ {
12384
+ auto preg = dcl->getRegVar ()->getPhyReg ();
12385
+ if (preg)
12386
+ return preg;
12387
+
12388
+ for (unsigned int i = 0 ; i != numLRs; i++)
12389
+ {
12390
+ LiveRange* lr = lrs[i];
12391
+ if (lr->getDcl () == dcl)
12392
+ {
12393
+ preg = lr->getPhyReg ();
12394
+ break ;
12395
+ }
12396
+ }
12397
+
12398
+ return preg;
12399
+ };
12400
+
12401
+ for (auto dcl : sortedLiveIntervals)
12402
+ {
12403
+ if (dcl->getRegFile () != G4_RegFileKind::G4_GRF &&
12404
+ dcl->getRegFile () != G4_RegFileKind::G4_INPUT)
12405
+ continue ;
12406
+
12407
+ auto phyReg = getPhyReg (dcl);
12408
+ if (!phyReg)
12409
+ continue ;
12410
+
12411
+ if (!phyReg->isGreg ())
12412
+ continue ;
12413
+
12414
+ auto GRFStart = phyReg->asGreg ()->getRegNum ();
12415
+ auto numRows = dcl->getNumRows ();
12416
+
12417
+ auto startInst = startEnd[dcl].first ;
12418
+ auto endInst = startEnd[dcl].second ;
12419
+
12420
+ bool start = (dcl->getRegFile () == G4_RegFileKind::G4_INPUT);
12421
+ bool done = false ;
12422
+ for (auto bb : gra.kernel .fg .getBBList ())
12423
+ {
12424
+ for (auto inst : bb->getInstList ())
12425
+ {
12426
+ if (inst == startInst)
12427
+ {
12428
+ start = true ;
12429
+ continue ;
12430
+ }
12431
+
12432
+ if (!start)
12433
+ continue ;
12434
+
12435
+ for (unsigned int i = GRFStart; i != (GRFStart + numRows); i++)
12436
+ {
12437
+ busyGRFPerInst[inst].set (i, true );
12438
+ }
12439
+
12440
+ if (inst == endInst)
12441
+ {
12442
+ done = true ;
12443
+ break ;
12444
+ }
12445
+ }
12446
+
12447
+ if (done)
12448
+ break ;
12449
+ }
12450
+ }
12451
+
12452
+ // Now emit instructions with GRFs
12453
+ for (auto bb : gra.kernel .fg .getBBList ())
12454
+ {
12455
+ for (auto inst : bb->getInstList ())
12456
+ {
12457
+ constexpr unsigned int maxInstLen = 80 ;
12458
+ auto item = busyGRFPerInst[inst];
12459
+ std::stringstream ss;
12460
+ inst->emit (ss);
12461
+ auto len = ss.str ().length ();
12462
+
12463
+ if (len <= maxInstLen)
12464
+ {
12465
+ os << ss.str ();
12466
+ for (unsigned int i = 0 ; i != maxInstLen - ss.str ().length (); i++)
12467
+ os << " " ;
12468
+ }
12469
+ else
12470
+ {
12471
+ auto tmpStr = ss.str ();
12472
+ auto limitedStr = tmpStr.substr (0 , maxInstLen);
12473
+ os << std::string (limitedStr);
12474
+ }
12475
+
12476
+ os << " " ;
12477
+
12478
+ if (!dumpHex)
12479
+ {
12480
+ // dump GRFs | - busy, * - free
12481
+ for (unsigned int i = 0 ; i != N; i++)
12482
+ {
12483
+ // emit in groups of 10 GRFs
12484
+ if (i > 0 && (i % 10 ) == 0 )
12485
+ os << " " ;
12486
+
12487
+ if (item[i] == true )
12488
+ os << " |" ; // busy
12489
+ else
12490
+ os << " *" ; // free
12491
+ }
12492
+ }
12493
+ else
12494
+ {
12495
+ for (unsigned int i = 0 ; i != N; i+=sizeof (unsigned short )*8 )
12496
+ {
12497
+ unsigned short busyGRFs = 0 ;
12498
+ for (unsigned int j = 0 ; j != sizeof (unsigned short )*8 ; j++)
12499
+ {
12500
+ auto offset = i + j;
12501
+ if (offset < N)
12502
+ {
12503
+ if (item[offset])
12504
+ busyGRFs |= (1 << j);
12505
+ }
12506
+ }
12507
+ printf (" r%d:%4x " , i, busyGRFs);
12508
+ }
12509
+ }
12510
+ os << std::endl;
12511
+ }
12512
+ os << std::endl;
12513
+ }
12514
+ }
12515
+
12516
+ void RegChartDump::recordLiveIntervals (std::vector<G4_Declare*>& dcls)
12517
+ {
12518
+ sortedLiveIntervals = dcls;
12519
+ for (auto dcl : dcls)
12520
+ {
12521
+ auto start = gra.getStartInterval (dcl);
12522
+ auto end = gra.getEndInterval (dcl);
12523
+ startEnd.insert (std::make_pair (dcl, std::make_pair (start, end)));
12524
+ }
12525
+ }
0 commit comments