@@ -6400,16 +6400,33 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
6400
6400
}
6401
6401
}
6402
6402
6403
- // For now it is assumed only 8-byte types will appear
6404
- // here. If other sized types will also appear then
6405
- // augmentation mask also needs to be sent in
6406
- // weak edge data structure below.
6403
+ // If 2 variables have weak edge, they can either fully overlap
6404
+ // or have no overlap at all. However, if 1 variable exceeds
6405
+ // 1 value per channel, then partial overlap can be allowed.
6406
+ // Assume var1 has weak edge with var2. This means both var1
6407
+ // and var2 are either Default64Bit or Default32Bit and they
6408
+ // are > 2 GRFs in size. Assume they're 64-bits in size. By definition
6409
+ // of AugmentationMask::Default64Bit it means the program defines
6410
+ // var1 in blocks of N GRFs, where N is #GRFs needed to define all
6411
+ // channels of 1 variable of the type. In case of SIMD16, GRF row
6412
+ // size = 32 bytes, N = 4 GRFs. This means 4 GRFs are needed to
6413
+ // define all 16 channels of a 64-bit variable.
6414
+ //
6415
+ // Every block of N GRFs is defined using mask M0, ie for row4, row8, row12,
6416
+ // etc. mask offset used is reset. This means if var1 is 16 GRFs and
6417
+ // var2 is 4 GRFs, it is safe to overlap var2 every 4 rows of var1.
6418
+
6419
+
6420
+ auto simdSize = kernel.getSimdSize ();
6421
+ auto numElemsPerGRF = numEltPerGRF<Type_UQ>();
6422
+ auto numGRFPerBlock = simdSize / numElemsPerGRF;
6423
+
6407
6424
for (unsigned r = pvar; r < (pvar + numRegs); r++)
6408
6425
{
6409
6426
auto use = regUsage.getWeakEdgeUse (r);
6410
- if (use == 0 || use == (r - pvar + 1 ))
6427
+ if (use == 0 || use == (( r - pvar + 1 ) % numGRFPerBlock ))
6411
6428
{
6412
- regUsage.setWeakEdgeUse (r, r - pvar + 1 );
6429
+ regUsage.setWeakEdgeUse (r, ( r - pvar + 1 ) % numGRFPerBlock );
6413
6430
}
6414
6431
else
6415
6432
{
@@ -10120,7 +10137,6 @@ int GlobalRA::coloringRegAlloc()
10120
10137
Rematerialization remat (kernel, liveAnalysis, coloring, rpe, *this );
10121
10138
remat.run ();
10122
10139
rematDone = true ;
10123
-
10124
10140
// Re-run GRA loop only if remat caused changes to IR
10125
10141
rerunGRA |= remat.getChangesMade ();
10126
10142
}
0 commit comments