@@ -2228,34 +2228,48 @@ void GlobalRA::updateSubRegAlignment(G4_SubReg_Align subAlign)
2228
2228
}
2229
2229
}
2230
2230
2231
+ bool GlobalRA::evenAlignNeeded (G4_Declare* dcl)
2232
+ {
2233
+ // Return true if even alignment is needed
2234
+ // Even align needed if for given SIMD size and elem type,
2235
+ // a complete def uses between 1-2 GRFs.
2236
+ auto kernelSimdSizeToUse = kernel.getSimdSizeWithSlicing ();
2237
+ G4_Declare* topdcl = dcl->getRootDeclare ();
2238
+ auto topdclAugMask = getAugmentationMask (topdcl);
2239
+
2240
+ if (!areAllDefsNoMask (topdcl) && !topdcl->getIsPartialDcl () &&
2241
+ topdclAugMask != AugmentationMasks::NonDefault)
2242
+ {
2243
+ auto elemSizeToUse = topdcl->getElemSize ();
2244
+ if (elemSizeToUse < 4 && topdclAugMask == AugmentationMasks::Default32Bit)
2245
+ // :uw with hstride 2 can also be Default32Bit and hence needs even alignment
2246
+ elemSizeToUse = 4 ;
2247
+ else if (elemSizeToUse < 8 && topdclAugMask == AugmentationMasks::Default64Bit)
2248
+ elemSizeToUse = 8 ;
2249
+
2250
+ if (// Even align if size is between 1-2 GRFs, for >2GRF sizes use weak edges
2251
+ (elemSizeToUse * kernelSimdSizeToUse) > (unsigned int )GENX_GRF_REG_SIZ &&
2252
+ (elemSizeToUse * kernelSimdSizeToUse) <= (unsigned int )(2 * GENX_GRF_REG_SIZ) &&
2253
+ !(kernel.fg .builder ->getOption (vISA_enablePreemption) &&
2254
+ dcl == kernel.fg .builder ->getBuiltinR0 ()))
2255
+ {
2256
+ return true ;
2257
+ }
2258
+ }
2259
+ return false ;
2260
+ }
2261
+
2231
2262
// This function can be invoked before local RA or after augmentation.
2232
- // When invoked before local RA, it sets all vars to be Even aligned,
2233
- // including NoMask ones. This is safe, but conservative. Post
2234
- // augmentation, dcl masks are available so only non-NoMask vars will
2235
- // be Even aligned. Others will be Either aligned. There is no need
2236
- // to store old value of align because HW has no restriction on
2237
- // even/odd alignment that HW conformity computes.
2238
2263
void GlobalRA::evenAlign ()
2239
2264
{
2240
2265
// Update alignment of all GRF declares to align
2241
2266
for (auto dcl : kernel.Declares )
2242
2267
{
2243
2268
if (dcl->getRegFile () & G4_GRF)
2244
2269
{
2245
- G4_Declare* topdcl = dcl->getRootDeclare ();
2246
- auto topdclAugMask = getAugmentationMask (topdcl);
2247
-
2248
- if (!areAllDefsNoMask (topdcl) && !topdcl->getIsPartialDcl () &&
2249
- topdclAugMask != AugmentationMasks::NonDefault &&
2250
- topdclAugMask != AugmentationMasks::Default64Bit)
2270
+ if (evenAlignNeeded (dcl))
2251
2271
{
2252
- if ((topdcl->getElemSize () >= 4 || topdclAugMask == AugmentationMasks::Default32Bit) &&
2253
- topdcl->getByteSize () >= GENX_GRF_REG_SIZ &&
2254
- !(kernel.fg .builder ->getOption (vISA_enablePreemption) &&
2255
- dcl == kernel.fg .builder ->getBuiltinR0 ()))
2256
- {
2257
- setEvenAligned (dcl, true );
2258
- }
2272
+ setEvenAligned (dcl, true );
2259
2273
}
2260
2274
}
2261
2275
}
@@ -3113,9 +3127,7 @@ bool Augmentation::markNonDefaultMaskDef()
3113
3127
prevAugMask = gra.getAugmentationMask (dcl);
3114
3128
}
3115
3129
3116
- if (liveAnalysis.livenessClass (G4_GRF) &&
3117
- gra.getAugmentationMask (dcl) == AugmentationMasks::Default32Bit &&
3118
- kernel.getSimdSize () > NUM_DWORDS_PER_GRF)
3130
+ if (gra.evenAlignNeeded (dcl))
3119
3131
{
3120
3132
auto dclLR = gra.getLocalLR (dcl);
3121
3133
if (dclLR)
@@ -3124,7 +3136,7 @@ bool Augmentation::markNonDefaultMaskDef()
3124
3136
auto phyReg = dclLR->getPhyReg (s);
3125
3137
if (phyReg && phyReg->asGreg ()->getRegNum () % 2 != 0 )
3126
3138
{
3127
- // If LRA assignment is not 2GRF aligned for SIMD16 then
3139
+ // If LRA assignment is not 2GRF aligned for then
3128
3140
// mark it as non-default. GRA candidates cannot fully
3129
3141
// overlap with such ranges. Partial overlap is illegal.
3130
3142
gra.setAugmentationMask (dcl, AugmentationMasks::NonDefault);
@@ -4166,6 +4178,22 @@ bool Interference::isStrongEdgeBetween(G4_Declare* dcl1, G4_Declare* dcl2)
4166
4178
return false ;
4167
4179
}
4168
4180
4181
+ bool Augmentation::weakEdgeNeeded (AugmentationMasks m)
4182
+ {
4183
+ // Weak edge needed in case #GRF exceeds 2
4184
+
4185
+ if (m == AugmentationMasks::Default64Bit)
4186
+ return (G4_Type_Table[Type_Q].byteSize *kernel.getSimdSizeWithSlicing ()) > (unsigned int )(2 * GENX_GRF_REG_SIZ);
4187
+
4188
+ if (m == AugmentationMasks::Default32Bit)
4189
+ {
4190
+ // Even align up to 2 GRFs size variable, use weak edges beyond
4191
+ return (G4_Type_Table[Type_D].byteSize *kernel.getSimdSizeWithSlicing ()) > (unsigned int )(2 * GENX_GRF_REG_SIZ);
4192
+ }
4193
+
4194
+ return false ;
4195
+ }
4196
+
4169
4197
//
4170
4198
// Mark interference between newDcl and other incompatible dcls in current active lists.
4171
4199
//
@@ -4183,10 +4211,8 @@ void Augmentation::buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall)
4183
4211
{
4184
4212
if (liveAnalysis.livenessClass (G4_GRF) &&
4185
4213
// Populate compatible sparse intf data structure
4186
- // only for 64-bit bit types since others can be
4187
- // handled using Even align.
4188
- gra.getAugmentationMask (defaultDcl) == AugmentationMasks::Default64Bit &&
4189
- newDclAugMask == AugmentationMasks::Default64Bit)
4214
+ // only for weak edges.
4215
+ weakEdgeNeeded (newDclAugMask))
4190
4216
{
4191
4217
if (defaultDcl->getRegVar ()->isPhyRegAssigned () &&
4192
4218
newDcl->getRegVar ()->isPhyRegAssigned ())
@@ -4402,7 +4428,7 @@ void Augmentation::augmentIntfGraph()
4402
4428
4403
4429
if (liveAnalysis.livenessClass (G4_GRF))
4404
4430
{
4405
- if (kernel.getSimdSize () > NUM_DWORDS_PER_GRF)
4431
+ if (kernel.getSimdSize () >= NUM_DWORDS_PER_GRF)
4406
4432
{
4407
4433
// Set alignment of all GRF candidates
4408
4434
// to 2GRF except for NoMask variables
@@ -10544,9 +10570,9 @@ void VerifyAugmentation::verifyAlign(G4_Declare* dcl)
10544
10570
if (it == masks.end ())
10545
10571
return ;
10546
10572
10547
- auto dclMask = std::get< 1 >((*it). second );
10548
-
10549
- if (dclMask == AugmentationMasks::Default32Bit )
10573
+ if (dcl-> getByteSize () >= NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD]. byteSize &&
10574
+ dcl-> getByteSize () <= 2 * NUM_DWORDS_PER_GRF * G4_Type_Table[Type_UD]. byteSize &&
10575
+ kernel-> getSimdSize () > NUM_DWORDS_PER_GRF )
10550
10576
{
10551
10577
auto assignment = dcl->getRegVar ()->getPhyReg ();
10552
10578
if (assignment && assignment->isGreg ())
@@ -10642,6 +10668,14 @@ void VerifyAugmentation::labelBBs()
10642
10668
#endif
10643
10669
}
10644
10670
10671
+ unsigned int getGRFBaseOffset (G4_Declare* dcl)
10672
+ {
10673
+ unsigned int regNum = dcl->getRegVar ()->getPhyReg ()->asGreg ()->getRegNum ();
10674
+ unsigned int regOff = dcl->getRegVar ()->getPhyRegOff ();
10675
+ auto type = dcl->getElemType ();
10676
+ return (regNum * G4_GRF_REG_NBYTES) + (regOff * getTypeSize (type));
10677
+ }
10678
+
10645
10679
bool VerifyAugmentation::interfereBetween (G4_Declare* dcl1, G4_Declare* dcl2)
10646
10680
{
10647
10681
bool interferes = true ;
@@ -10710,8 +10744,8 @@ bool VerifyAugmentation::interfereBetween(G4_Declare* dcl1, G4_Declare* dcl2)
10710
10744
10711
10745
if (lr1->getAssigned () && lr2->getAssigned ())
10712
10746
{
10713
- auto preg1Start = dcl1-> getGRFBaseOffset ();
10714
- auto preg2Start = dcl2-> getGRFBaseOffset ();
10747
+ auto preg1Start = getGRFBaseOffset (dcl1 );
10748
+ auto preg2Start = getGRFBaseOffset (dcl2 );
10715
10749
auto preg1End = preg1Start + dcl1->getByteSize ();
10716
10750
auto preg2End = preg2Start + dcl2->getByteSize ();
10717
10751
@@ -10756,8 +10790,8 @@ void VerifyAugmentation::verify()
10756
10790
{
10757
10791
if (dcl1->getRegFile () == G4_RegFileKind::G4_GRF && dcl2->getRegFile () == G4_RegFileKind::G4_GRF)
10758
10792
{
10759
- auto preg1Start = dcl1-> getGRFBaseOffset ();
10760
- auto preg2Start = dcl2-> getGRFBaseOffset ();
10793
+ auto preg1Start = getGRFBaseOffset (dcl1 );
10794
+ auto preg2Start = getGRFBaseOffset (dcl2 );
10761
10795
auto preg1End = preg1Start + dcl1->getByteSize ();
10762
10796
auto preg2End = preg2Start + dcl2->getByteSize ();
10763
10797
@@ -10823,6 +10857,9 @@ void VerifyAugmentation::verify()
10823
10857
{
10824
10858
bool interfere = interfereBetween (activeDcl, dcl);
10825
10859
10860
+ if (activeDcl->getIsPartialDcl () || dcl->getIsPartialDcl ())
10861
+ continue ;
10862
+
10826
10863
if (!interfere)
10827
10864
{
10828
10865
std::cerr << dcl->getRegVar ()->getName () << " (" << getStr (dclMask) << " ) and " << activeDcl->getRegVar ()->getName () << " (" <<
0 commit comments