@@ -2650,12 +2650,9 @@ void GlobalRA::updateSubRegAlignment(G4_SubReg_Align subAlign) {
2650
2650
}
2651
2651
}
2652
2652
2653
- int GlobalRA::getAlignFromAugBucket (G4_Declare *dcl) {
2653
+ bool GlobalRA::evenAlignNeeded (G4_Declare *dcl) {
2654
2654
if (GlobalRA::useGenericAugAlign (builder.getPlatformGeneration ())) {
2655
- // Return 0 if no special alignment is needed
2656
- // Return 2 if even alignment is needed
2657
- // Return 4 if quad alignment is needed
2658
-
2655
+ // Return true if even alignment is needed
2659
2656
// Even align needed if for given SIMD size and elem type,
2660
2657
// a complete def uses between 1-2 GRFs.
2661
2658
auto kernelSimdSizeToUse = kernel.getSimdSizeWithSlicing ();
@@ -2673,41 +2670,14 @@ int GlobalRA::getAlignFromAugBucket(G4_Declare *dcl) {
2673
2670
topdclAugMask == AugmentationMasks::Default64Bit)
2674
2671
elemSizeToUse = 8 ;
2675
2672
2676
- auto totalByteSize = elemSizeToUse * kernelSimdSizeToUse;
2677
- auto bucketSpans2GRFs = [&]() {
2678
- return totalByteSize > (unsigned )kernel.numEltPerGRF <Type_UB>() &&
2679
- totalByteSize <= (unsigned )(2 * kernel.numEltPerGRF <Type_UB>());
2680
- };
2681
-
2682
- if (!(!builder.canReadR0 () && dcl == kernel.fg .builder ->getBuiltinR0 ())) {
2683
- if (use4GRFAlign) {
2684
- // The only time it's safe to do 2GRF align is when augmentation
2685
- // bucket is known to be Default32Bit, otherwise we need to align
2686
- // 4GRF. It isn't enough to simply check elemSize * GRF size to
2687
- // decide alignment.
2688
- if (topdclAugMask == AugmentationMasks::Default32Bit) {
2689
- if (bucketSpans2GRFs ())
2690
- return 2 ;
2691
- } else if (topdclAugMask == AugmentationMasks::Default64Bit) {
2692
- if (bucketSpans2GRFs ())
2693
- // :df SIMD16
2694
- return 2 ;
2695
-
2696
- // :df SIMD32
2697
- return 4 ;
2698
- } else {
2699
- // Local RA will take this path as augmentation buckets are set
2700
- // to Undetermined. Although this is conservative, hybrid RA
2701
- // will run augmentation and compute buckets to fill in "holes".
2702
- // For eg, mov (32|M0) V10<2>:f should use 4GRF alignment as
2703
- // it's Default64Bit variable, although elem size is :f.
2704
- return 4 ;
2705
- }
2706
- } else {
2707
- // Even align if size is between 1-2 GRFs, for >2GRF sizes.
2708
- if (bucketSpans2GRFs ())
2709
- return 2 ;
2710
- }
2673
+ if ( // Even align if size is between 1-2 GRFs, for >2GRF sizes use weak
2674
+ // edges
2675
+ (elemSizeToUse * kernelSimdSizeToUse) >
2676
+ (unsigned )kernel.numEltPerGRF <Type_UB>() &&
2677
+ (elemSizeToUse * kernelSimdSizeToUse) <=
2678
+ (unsigned )(2 * kernel.numEltPerGRF <Type_UB>()) &&
2679
+ !(!builder.canReadR0 () && dcl == kernel.fg .builder ->getBuiltinR0 ())) {
2680
+ return true ;
2711
2681
}
2712
2682
}
2713
2683
} else {
@@ -2723,28 +2693,21 @@ int GlobalRA::getAlignFromAugBucket(G4_Declare *dcl) {
2723
2693
topdcl->getByteSize () >= kernel.numEltPerGRF <Type_UB>() &&
2724
2694
!(!builder.canReadR0 () &&
2725
2695
dcl == kernel.fg .builder ->getBuiltinR0 ())) {
2726
- return 2 ;
2696
+ return true ;
2727
2697
}
2728
2698
}
2729
2699
}
2730
2700
}
2731
2701
2732
- return 0 ;
2702
+ return false ;
2733
2703
}
2734
2704
2735
- void GlobalRA::augAlign () {
2736
- // Update alignment of all GRF declares based on
2737
- // augmentation bucket and platform.
2705
+ // This function can be invoked before local RA or after augmentation.
2706
+ void GlobalRA::evenAlign () {
2707
+ // Update alignment of all GRF declares to align
2738
2708
for (auto dcl : kernel.Declares ) {
2739
2709
if (dcl->getRegFile () & G4_GRF) {
2740
- unsigned int align = getAlignFromAugBucket (dcl);
2741
- if (align == 4 ) {
2742
- if (!isQuadAligned (dcl)) {
2743
- incRA.evenAlignUpdate (dcl);
2744
- }
2745
- forceQuadAlign (dcl);
2746
- }
2747
- else if (align == 2 ) {
2710
+ if (evenAlignNeeded (dcl)) {
2748
2711
if (!isEvenAligned (dcl)) {
2749
2712
incRA.evenAlignUpdate (dcl);
2750
2713
}
@@ -3508,8 +3471,8 @@ bool Augmentation::markNonDefaultMaskDef() {
3508
3471
3509
3472
bool checkLRAAlign = false ;
3510
3473
if (liveAnalysis.livenessClass (G4_GRF)) {
3511
- if (GlobalRA::useGenericAugAlign (kernel.getPlatformGeneration ()) &&
3512
- gra.getAlignFromAugBucket (dcl) > 0 )
3474
+ if (( GlobalRA::useGenericAugAlign (kernel.getPlatformGeneration ()) &&
3475
+ gra.evenAlignNeeded (dcl)) )
3513
3476
checkLRAAlign = true ;
3514
3477
else if (gra.getAugmentationMask (dcl) ==
3515
3478
AugmentationMasks::Default32Bit &&
@@ -3522,16 +3485,10 @@ bool Augmentation::markNonDefaultMaskDef() {
3522
3485
if (dclLR) {
3523
3486
int s;
3524
3487
auto phyReg = dclLR->getPhyReg (s);
3525
- unsigned int maxAlign = 2 ;
3526
- if (gra.use4GRFAlign && gra.getAugmentationMask (dcl) == AugmentationMasks::Default64Bit) {
3527
- maxAlign = 4 ;
3528
- }
3529
- if (phyReg && phyReg->asGreg ()->getRegNum () % maxAlign != 0 ) {
3530
- // If LRA assignment is not aligned as expected then
3488
+ if (phyReg && phyReg->asGreg ()->getRegNum () % 2 != 0 ) {
3489
+ // If LRA assignment is not 2GRF aligned for then
3531
3490
// mark it as non-default. GRA candidates cannot fully
3532
3491
// overlap with such ranges. Partial overlap is illegal.
3533
- vISA_ASSERT (!gra.use4GRFAlign ,
3534
- " expecting LRA allocation to be 4GRF aligned" );
3535
3492
gra.setAugmentationMask (dcl, AugmentationMasks::NonDefault);
3536
3493
nonDefaultMaskDefFound = true ;
3537
3494
}
@@ -4238,8 +4195,6 @@ bool Interference::isStrongEdgeBetween(const G4_Declare *dcl1,
4238
4195
4239
4196
bool Augmentation::weakEdgeNeeded (AugmentationMasks defaultDclMask,
4240
4197
AugmentationMasks newDclMask) {
4241
- if (gra.use4GRFAlign )
4242
- return false ;
4243
4198
if (useGenericAugAlign) {
4244
4199
// Weak edge needed in case #GRF exceeds 2
4245
4200
if (newDclMask == AugmentationMasks::Default64Bit)
@@ -4791,9 +4746,9 @@ void Augmentation::augmentIntfGraph() {
4791
4746
// to 2GRF except for NoMask variables
4792
4747
VISA_DEBUG_VERBOSE (std::cout
4793
4748
<< " Kernel size is SIMD" << kernel.getSimdSize ()
4794
- << " so updating all GRFs to aug align "
4749
+ << " so updating all GRFs to be 2GRF aligned "
4795
4750
<< " \n " );
4796
- gra.augAlign ();
4751
+ gra.evenAlign ();
4797
4752
}
4798
4753
gra.updateSubRegAlignment (kernel.getGRFAlign ());
4799
4754
}
@@ -5099,7 +5054,6 @@ void GraphColor::computeDegreeForGRF() {
5099
5054
// consider weak edges in degree computation
5100
5055
auto *weakEdges = intf.getCompatibleSparseIntf (lrs[i]->getDcl ());
5101
5056
if (weakEdges) {
5102
- vISA_ASSERT (!gra.use4GRFAlign , " not expecting weak edges" );
5103
5057
for (auto weakNeighbor : *weakEdges) {
5104
5058
if (!weakNeighbor->getRegVar ()->isRegAllocPartaker ())
5105
5059
continue ;
@@ -5413,22 +5367,16 @@ void GraphColor::relaxNeighborDegreeGRF(LiveRange *lr) {
5413
5367
if (!(lr->getIsPseudoNode ()) && !(lr->getIsPartialDcl ())) {
5414
5368
unsigned lr_id = lr->getVar ()->getId ();
5415
5369
bool lr2EvenAlign = gra.isEvenAligned (lr->getDcl ());
5416
- unsigned int lr2AugAlign = gra.getAugAlign (lr->getDcl ());
5417
5370
unsigned lr2_nreg = lr->getNumRegNeeded ();
5418
5371
5419
5372
// relax degree between 2 nodes
5420
5373
auto relaxDegree = [&](LiveRange *lr1) {
5421
5374
if (lr1->getActive () && !lr1->getIsPseudoNode () &&
5422
5375
!(lr1->getIsPartialDcl ())) {
5376
+ bool lr1EvenAlign = gra.isEvenAligned (lr1->getDcl ());
5423
5377
unsigned lr1_nreg = lr1->getNumRegNeeded ();
5424
- unsigned w = 0 ;
5425
- if (gra.use4GRFAlign ) {
5426
- unsigned int lr1AugAlign = gra.getAugAlign (lr1->getDcl ());
5427
- w = edgeWeightWith4GRF (lr1AugAlign, lr2AugAlign, lr1_nreg, lr2_nreg);
5428
- } else {
5429
- bool lr1EvenAlign = gra.isEvenAligned (lr1->getDcl ());
5430
- w = edgeWeightGRF (lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
5431
- }
5378
+ unsigned w =
5379
+ edgeWeightGRF (lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
5432
5380
VISA_DEBUG_VERBOSE ({
5433
5381
std::cout << " \t relax " ;
5434
5382
lr1->dump ();
@@ -5834,15 +5782,9 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF,
5834
5782
if (!failed_alloc) {
5835
5783
// When evenAlignNeeded is true, it is binding for correctness
5836
5784
bool evenAlignNeeded = gra.isEvenAligned (lrVar->getDeclare ());
5837
- bool quadAlignNeeded = gra.isQuadAligned (lrVar->getDeclare ());
5838
- BankAlign align = BankAlign::Either;
5839
- if (quadAlignNeeded)
5840
- align = BankAlign::QuadGRF;
5841
- else if (evenAlignNeeded)
5842
- align = BankAlign::Even;
5843
-
5785
+ BankAlign align = evenAlignNeeded ? BankAlign::Even : BankAlign::Either;
5844
5786
if (allocFromBanks) {
5845
- vISA_ASSERT (align != BankAlign::QuadGRF, " unexpected value " );
5787
+
5846
5788
if (!isHybrid && oneGRFBankDivision &&
5847
5789
(!evenAlignNeeded ||
5848
5790
builder.getPlatformGeneration () == PlatformGen::GEN9)) {
@@ -10934,20 +10876,12 @@ void GlobalRA::insertRestoreAddr(G4_BB *bb) {
10934
10876
// correctness.
10935
10877
//
10936
10878
unsigned GraphColor::edgeWeightGRF (const LiveRange *lr1, const LiveRange *lr2) {
10879
+ bool lr1EvenAlign = gra.isEvenAligned (lr1->getDcl ());
10880
+ bool lr2EvenAlign = gra.isEvenAligned (lr2->getDcl ());
10937
10881
unsigned lr1_nreg = lr1->getNumRegNeeded ();
10938
10882
unsigned lr2_nreg = lr2->getNumRegNeeded ();
10939
10883
10940
- if (gra.use4GRFAlign ) {
10941
- auto lr1Align = gra.getAugAlign (lr1->getDcl ());
10942
- auto lr2Align = gra.getAugAlign (lr2->getDcl ());
10943
-
10944
- return edgeWeightWith4GRF (lr1Align, lr2Align, lr1_nreg, lr2_nreg);
10945
- } else {
10946
- bool lr1EvenAlign = gra.isEvenAligned (lr1->getDcl ());
10947
- bool lr2EvenAlign = gra.isEvenAligned (lr2->getDcl ());
10948
-
10949
- return edgeWeightGRF (lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
10950
- }
10884
+ return edgeWeightGRF (lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
10951
10885
}
10952
10886
10953
10887
unsigned GraphColor::edgeWeightARF (const LiveRange *lr1, const LiveRange *lr2) {
0 commit comments