@@ -2228,48 +2228,34 @@ void GlobalRA::updateSubRegAlignment(G4_SubReg_Align subAlign)
2228
2228
}
2229
2229
}
2230
2230
2231
- bool GlobalRA::evenAlignNeeded (G4_Declare* dcl)
2232
- {
2233
- // Return true if even alignment is needed
2234
- // Even align needed if for given SIMD size and elem type,
2235
- // a complete def uses between 1-2 GRFs.
2236
- auto kernelSimdSizeToUse = kernel.getSimdSizeWithSlicing ();
2237
- G4_Declare* topdcl = dcl->getRootDeclare ();
2238
- auto topdclAugMask = getAugmentationMask (topdcl);
2239
-
2240
- if (!areAllDefsNoMask (topdcl) && !topdcl->getIsPartialDcl () &&
2241
- topdclAugMask != AugmentationMasks::NonDefault)
2242
- {
2243
- auto elemSizeToUse = topdcl->getElemSize ();
2244
- if (elemSizeToUse < 4 && topdclAugMask == AugmentationMasks::Default32Bit)
2245
- // :uw with hstride 2 can also be Default32Bit and hence needs even alignment
2246
- elemSizeToUse = 4 ;
2247
- else if (elemSizeToUse < 8 && topdclAugMask == AugmentationMasks::Default64Bit)
2248
- elemSizeToUse = 8 ;
2249
-
2250
- if (// Even align if size is between 1-2 GRFs, for >2GRF sizes use weak edges
2251
- (elemSizeToUse * kernelSimdSizeToUse) > (unsigned int )GENX_GRF_REG_SIZ &&
2252
- (elemSizeToUse * kernelSimdSizeToUse) <= (unsigned int )(2 * GENX_GRF_REG_SIZ) &&
2253
- !(kernel.fg .builder ->getOption (vISA_enablePreemption) &&
2254
- dcl == kernel.fg .builder ->getBuiltinR0 ()))
2255
- {
2256
- return true ;
2257
- }
2258
- }
2259
- return false ;
2260
- }
2261
-
2262
2231
// This function can be invoked before local RA or after augmentation.
2232
+ // When invoked before local RA, it sets all vars to be Even aligned,
2233
+ // including NoMask ones. This is safe, but conservative. Post
2234
+ // augmentation, dcl masks are available so only non-NoMask vars will
2235
+ // be Even aligned. Others will be Either aligned. There is no need
2236
+ // to store old value of align because HW has no restriction on
2237
+ // even/odd alignment that HW conformity computes.
2263
2238
void GlobalRA::evenAlign ()
2264
2239
{
2265
2240
// Update alignment of all GRF declares to align
2266
2241
for (auto dcl : kernel.Declares )
2267
2242
{
2268
2243
if (dcl->getRegFile () & G4_GRF)
2269
2244
{
2270
- if (evenAlignNeeded (dcl))
2245
+ G4_Declare* topdcl = dcl->getRootDeclare ();
2246
+ auto topdclAugMask = getAugmentationMask (topdcl);
2247
+
2248
+ if (!areAllDefsNoMask (topdcl) && !topdcl->getIsPartialDcl () &&
2249
+ topdclAugMask != AugmentationMasks::NonDefault &&
2250
+ topdclAugMask != AugmentationMasks::Default64Bit)
2271
2251
{
2272
- setEvenAligned (dcl, true );
2252
+ if ((topdcl->getElemSize () >= 4 || topdclAugMask == AugmentationMasks::Default32Bit) &&
2253
+ topdcl->getByteSize () >= GENX_GRF_REG_SIZ &&
2254
+ !(kernel.fg .builder ->getOption (vISA_enablePreemption) &&
2255
+ dcl == kernel.fg .builder ->getBuiltinR0 ()))
2256
+ {
2257
+ setEvenAligned (dcl, true );
2258
+ }
2273
2259
}
2274
2260
}
2275
2261
}
@@ -3127,7 +3113,9 @@ bool Augmentation::markNonDefaultMaskDef()
3127
3113
prevAugMask = gra.getAugmentationMask (dcl);
3128
3114
}
3129
3115
3130
- if (gra.evenAlignNeeded (dcl))
3116
+ if (liveAnalysis.livenessClass (G4_GRF) &&
3117
+ gra.getAugmentationMask (dcl) == AugmentationMasks::Default32Bit &&
3118
+ kernel.getSimdSize () > NUM_DWORDS_PER_GRF)
3131
3119
{
3132
3120
auto dclLR = gra.getLocalLR (dcl);
3133
3121
if (dclLR)
@@ -3136,7 +3124,7 @@ bool Augmentation::markNonDefaultMaskDef()
3136
3124
auto phyReg = dclLR->getPhyReg (s);
3137
3125
if (phyReg && phyReg->asGreg ()->getRegNum () % 2 != 0 )
3138
3126
{
3139
- // If LRA assignment is not 2GRF aligned for then
3127
+ // If LRA assignment is not 2GRF aligned for SIMD16 then
3140
3128
// mark it as non-default. GRA candidates cannot fully
3141
3129
// overlap with such ranges. Partial overlap is illegal.
3142
3130
gra.setAugmentationMask (dcl, AugmentationMasks::NonDefault);
@@ -4178,22 +4166,6 @@ bool Interference::isStrongEdgeBetween(G4_Declare* dcl1, G4_Declare* dcl2)
4178
4166
return false ;
4179
4167
}
4180
4168
4181
- bool Augmentation::weakEdgeNeeded (AugmentationMasks m)
4182
- {
4183
- // Weak edge needed in case #GRF exceeds 2
4184
-
4185
- if (m == AugmentationMasks::Default64Bit)
4186
- return (G4_Type_Table[Type_Q].byteSize *kernel.getSimdSizeWithSlicing ()) > (unsigned int )(2 * GENX_GRF_REG_SIZ);
4187
-
4188
- if (m == AugmentationMasks::Default32Bit)
4189
- {
4190
- // Even align up to 2 GRFs size variable, use weak edges beyond
4191
- return (G4_Type_Table[Type_D].byteSize *kernel.getSimdSizeWithSlicing ()) > (unsigned int )(2 * GENX_GRF_REG_SIZ);
4192
- }
4193
-
4194
- return false ;
4195
- }
4196
-
4197
4169
//
4198
4170
// Mark interference between newDcl and other incompatible dcls in current active lists.
4199
4171
//
@@ -4211,8 +4183,10 @@ void Augmentation::buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall)
4211
4183
{
4212
4184
if (liveAnalysis.livenessClass (G4_GRF) &&
4213
4185
// Populate compatible sparse intf data structure
4214
- // only for weak edges.
4215
- weakEdgeNeeded (newDclAugMask))
4186
+ // only for 64-bit bit types since others can be
4187
+ // handled using Even align.
4188
+ gra.getAugmentationMask (defaultDcl) == AugmentationMasks::Default64Bit &&
4189
+ newDclAugMask == AugmentationMasks::Default64Bit)
4216
4190
{
4217
4191
if (defaultDcl->getRegVar ()->isPhyRegAssigned () &&
4218
4192
newDcl->getRegVar ()->isPhyRegAssigned ())
@@ -4428,7 +4402,7 @@ void Augmentation::augmentIntfGraph()
4428
4402
4429
4403
if (liveAnalysis.livenessClass (G4_GRF))
4430
4404
{
4431
- if (kernel.getSimdSize () >= NUM_DWORDS_PER_GRF)
4405
+ if (kernel.getSimdSize () > NUM_DWORDS_PER_GRF)
4432
4406
{
4433
4407
// Set alignment of all GRF candidates
4434
4408
// to 2GRF except for NoMask variables
@@ -10570,9 +10544,9 @@ void VerifyAugmentation::verifyAlign(G4_Declare* dcl)
10570
10544
if (it == masks.end ())
10571
10545
return ;
10572
10546
10573
- if (dcl-> getByteSize () >= NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD]. byteSize &&
10574
- dcl-> getByteSize () <= 2 * NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD]. byteSize &&
10575
- kernel-> getSimdSize () > NUM_DWORDS_PER_GRF )
10547
+ auto dclMask = std::get< 1 >((*it). second );
10548
+
10549
+ if (dclMask == AugmentationMasks::Default32Bit )
10576
10550
{
10577
10551
auto assignment = dcl->getRegVar ()->getPhyReg ();
10578
10552
if (assignment && assignment->isGreg ())
@@ -10668,14 +10642,6 @@ void VerifyAugmentation::labelBBs()
10668
10642
#endif
10669
10643
}
10670
10644
10671
- unsigned int getGRFBaseOffset (G4_Declare* dcl)
10672
- {
10673
- unsigned int regNum = dcl->getRegVar ()->getPhyReg ()->asGreg ()->getRegNum ();
10674
- unsigned int regOff = dcl->getRegVar ()->getPhyRegOff ();
10675
- auto type = dcl->getElemType ();
10676
- return (regNum * G4_GRF_REG_NBYTES) + (regOff * getTypeSize (type));
10677
- }
10678
-
10679
10645
bool VerifyAugmentation::interfereBetween (G4_Declare* dcl1, G4_Declare* dcl2)
10680
10646
{
10681
10647
bool interferes = true ;
@@ -10744,8 +10710,8 @@ bool VerifyAugmentation::interfereBetween(G4_Declare* dcl1, G4_Declare* dcl2)
10744
10710
10745
10711
if (lr1->getAssigned () && lr2->getAssigned ())
10746
10712
{
10747
- auto preg1Start = getGRFBaseOffset (dcl1 );
10748
- auto preg2Start = getGRFBaseOffset (dcl2 );
10713
+ auto preg1Start = dcl1-> getGRFBaseOffset ();
10714
+ auto preg2Start = dcl2-> getGRFBaseOffset ();
10749
10715
auto preg1End = preg1Start + dcl1->getByteSize ();
10750
10716
auto preg2End = preg2Start + dcl2->getByteSize ();
10751
10717
@@ -10790,8 +10756,8 @@ void VerifyAugmentation::verify()
10790
10756
{
10791
10757
if (dcl1->getRegFile () == G4_RegFileKind::G4_GRF && dcl2->getRegFile () == G4_RegFileKind::G4_GRF)
10792
10758
{
10793
- auto preg1Start = getGRFBaseOffset (dcl1 );
10794
- auto preg2Start = getGRFBaseOffset (dcl2 );
10759
+ auto preg1Start = dcl1-> getGRFBaseOffset ();
10760
+ auto preg2Start = dcl2-> getGRFBaseOffset ();
10795
10761
auto preg1End = preg1Start + dcl1->getByteSize ();
10796
10762
auto preg2End = preg2Start + dcl2->getByteSize ();
10797
10763
@@ -10857,9 +10823,6 @@ void VerifyAugmentation::verify()
10857
10823
{
10858
10824
bool interfere = interfereBetween (activeDcl, dcl);
10859
10825
10860
- if (activeDcl->getIsPartialDcl () || dcl->getIsPartialDcl ())
10861
- continue ;
10862
-
10863
10826
if (!interfere)
10864
10827
{
10865
10828
std::cerr << dcl->getRegVar ()->getName () << " (" << getStr (dclMask) << " ) and " << activeDcl->getRegVar ()->getName () << " (" <<
0 commit comments