@@ -6400,33 +6400,16 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
6400
6400
}
6401
6401
}
6402
6402
6403
- // If 2 variables have weak edge, they can either fully overlap
6404
- // or have no overlap at all. However, if 1 variable exceeds
6405
- // 1 value per channel, then partial overlap can be allowed.
6406
- // Assume var1 has weak edge with var2. This means both var1
6407
- // and var2 are either Default64Bit or Default32Bit and they
6408
- // are > 2 GRFs in size. Assume they're 64-bits in size. By definition
6409
- // of AugmentationMask::Default64Bit it means the program defines
6410
- // var1 in blocks of N GRFs, where N is #GRFs needed to define all
6411
- // channels of 1 variable of the type. In case of SIMD16, GRF row
6412
- // size = 32 bytes, N = 4 GRFs. This means 4 GRFs are needed to
6413
- // define all 16 channels of a 64-bit variable.
6414
- //
6415
- // Every block of N GRFs is defined using mask M0, ie for row4, row8, row12,
6416
- // etc. mask offset used is reset. This means if var1 is 16 GRFs and
6417
- // var2 is 4 GRFs, it is safe to overlap var2 every 4 rows of var1.
6418
-
6419
-
6420
- auto simdSize = kernel.getSimdSize ();
6421
- auto numElemsPerGRF = numEltPerGRF<Type_UQ>();
6422
- auto numGRFPerBlock = simdSize / numElemsPerGRF;
6423
-
6403
+ // For now it is assumed only 8-byte types will appear
6404
+ // here. If other sized types will also appear then
6405
+ // augmentation mask also needs to be sent in
6406
+ // weak edge data structure below.
6424
6407
for (unsigned r = pvar; r < (pvar + numRegs); r++)
6425
6408
{
6426
6409
auto use = regUsage.getWeakEdgeUse (r);
6427
- if (use == 0 || use == (( r - pvar + 1 ) % numGRFPerBlock ))
6410
+ if (use == 0 || use == (r - pvar + 1 ))
6428
6411
{
6429
- regUsage.setWeakEdgeUse (r, ( r - pvar + 1 ) % numGRFPerBlock );
6412
+ regUsage.setWeakEdgeUse (r, r - pvar + 1 );
6430
6413
}
6431
6414
else
6432
6415
{
@@ -10137,6 +10120,7 @@ int GlobalRA::coloringRegAlloc()
10137
10120
Rematerialization remat (kernel, liveAnalysis, coloring, rpe, *this );
10138
10121
remat.run ();
10139
10122
rematDone = true ;
10123
+
10140
10124
// Re-run GRA loop only if remat caused changes to IR
10141
10125
rerunGRA |= remat.getChangesMade ();
10142
10126
}
0 commit comments