@@ -1257,10 +1257,10 @@ bool LoopVarSplit::split(G4_Declare* dcl, Loop& loop)
1257
1257
1258
1258
// replace all occurences of dcl in loop with TMP
1259
1259
for (auto src : srcs)
1260
- replaceSrc (src, splitDcl, loop );
1260
+ replaceSrc (src, splitDcl);
1261
1261
1262
1262
for (auto dst : dsts)
1263
- replaceDst (dst, splitDcl, loop );
1263
+ replaceDst (dst, splitDcl);
1264
1264
1265
1265
splitResults[dcl].push_back (std::make_pair (splitDcl, &loop));
1266
1266
@@ -1328,17 +1328,14 @@ void LoopVarSplit::copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResult
1328
1328
const RegionDesc* rd = kernel.fg .builder ->getRegionStride1 ();
1329
1329
G4_ExecSize execSize{ kernel.numEltPerGRF <Type_UD>() };
1330
1330
1331
- // copy 2 GRFs at a time if byte size permits
1332
- if (bytesRemaining >= kernel.numEltPerGRF <Type_UB>() * 2 )
1331
+ if ((i + 1 ) < numRows)
1333
1332
execSize = G4_ExecSize (kernel.numEltPerGRF <Type_UD>() * 2 );
1334
1333
1335
1334
auto dstRgn = kernel.fg .builder ->createDst (dst->getRegVar (), (short )i, 0 , 1 , Type_F);
1336
1335
auto srcRgn = kernel.fg .builder ->createSrc (src->getRegVar (), (short )i, 0 , rd, Type_F);
1337
1336
auto inst = kernel.fg .builder ->createMov (execSize, dstRgn, srcRgn, instOption, false );
1338
1337
1339
1338
insertCopy (inst);
1340
- MUST_BE_TRUE (bytesRemaining >= (unsigned int )(execSize.value * G4_Type_Table[Type_F].byteSize ),
1341
- " Invalid copy exec size" );
1342
1339
bytesRemaining -= (execSize.value * G4_Type_Table[Type_F].byteSize );
1343
1340
1344
1341
if (bytesRemaining < kernel.numEltPerGRF <Type_UB>())
@@ -1389,16 +1386,14 @@ void LoopVarSplit::copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResult
1389
1386
1390
1387
insertCopy (inst);
1391
1388
1392
- MUST_BE_TRUE (bytesRemaining >= (execSize.value * (unsigned int )G4_Type_Table[type].byteSize ),
1393
- " Invalid copy exec size" );
1394
1389
bytesRemaining -= (execSize.value * G4_Type_Table[type].byteSize );
1395
1390
};
1396
1391
}
1397
1392
1398
- void LoopVarSplit::replaceSrc (G4_SrcRegRegion* src, G4_Declare* dcl, const Loop& loop )
1393
+ void LoopVarSplit::replaceSrc (G4_SrcRegRegion* src, G4_Declare* dcl)
1399
1394
{
1400
1395
auto srcDcl = src->getBase ()->asRegVar ()->getDeclare ();
1401
- dcl = getNewDcl (srcDcl, dcl, loop );
1396
+ dcl = getNewDcl (srcDcl, dcl);
1402
1397
1403
1398
auto newSrcRgn = kernel.fg .builder ->createSrc (dcl->getRegVar (), src->getRegOff (),
1404
1399
src->getSubRegOff (), src->getRegion (), src->getType (), src->getAccRegSel ());
@@ -1414,10 +1409,10 @@ void LoopVarSplit::replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl, const Loop&
1414
1409
}
1415
1410
}
1416
1411
1417
- void LoopVarSplit::replaceDst (G4_DstRegRegion* dst, G4_Declare* dcl, const Loop& loop )
1412
+ void LoopVarSplit::replaceDst (G4_DstRegRegion* dst, G4_Declare* dcl)
1418
1413
{
1419
1414
auto dstDcl = dst->getBase ()->asRegVar ()->getDeclare ();
1420
- dcl = getNewDcl (dstDcl, dcl, loop );
1415
+ dcl = getNewDcl (dstDcl, dcl);
1421
1416
1422
1417
auto newDstRgn = kernel.fg .builder ->createDst (dcl->getRegVar (), dst->getRegOff (),
1423
1418
dst->getSubRegOff (), dst->getHorzStride (), dst->getType (), dst->getAccRegSel ());
@@ -1426,35 +1421,22 @@ void LoopVarSplit::replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl, const Loop&
1426
1421
inst->setDest (newDstRgn);
1427
1422
}
1428
1423
1429
- G4_Declare* LoopVarSplit::getNewDcl (G4_Declare* dcl1, G4_Declare* dcl2, const Loop& loop )
1424
+ G4_Declare* LoopVarSplit::getNewDcl (G4_Declare* dcl1, G4_Declare* dcl2)
1430
1425
{
1431
1426
// this method gets args dcl1, dcl2. this method is invoked
1432
1427
// when the transformation replaces existing src/dst rgn with
1433
- // equivalent one but using split variable. for eg,
1434
- //
1435
- // op ... V10(0,5) ... <-- assume V10 is alias of V9
1436
- //
1437
- // assume V9 gets split so V10 src rgn above has to be replaced.
1438
- // say V9's split dcl is called LOOP_SPLIT_V9.
1439
- // so in this function we create a new dcl, LOOP_SPLIT_V10 that
1440
- // aliases LOOP_SPLIT_V9 exactly like V10 aliases V9. this
1441
- // way we dont need any complicated logic to flatten V10.
1428
+ // equivalent one but using split variable.
1442
1429
//
1443
1430
// dcl1 is a dcl used to construct some src or dst rgn.
1444
1431
// dcl2 is a new dcl that splits dcl1. dcl2 is always root dcl.
1445
- // dcl1 may or may not be alias of another dcl.
1432
+ // dcl1 may or may not be aliased of another dcl.
1446
1433
// if dcl1 is also root dcl, then return dcl2.
1447
1434
// if dcl1 is an alias dcl, then construct new dcl that aliases
1448
1435
// dcl2 at similar offset.
1449
1436
// mapping from old dcl to new dcl is stored for future invocations.
1450
- // this mapping is done per loop as a single spilled variable could
1451
- // be split in multiple loops and each split instance would use a
1452
- // different loop split variable.
1453
1437
1454
1438
MUST_BE_TRUE (!dcl2->getAliasDeclare (), " Expecting to see root dcl for dcl2" );
1455
1439
1456
- auto & oldNewDcl = oldNewDclPerLoop[&loop];
1457
-
1458
1440
auto it = oldNewDcl.find (dcl1);
1459
1441
if (it != oldNewDcl.end ())
1460
1442
return (*it).second ;
@@ -1467,7 +1449,7 @@ G4_Declare* LoopVarSplit::getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2, const Lo
1467
1449
1468
1450
auto newDcl = kernel.fg .builder ->createTempVar (dcl1->getTotalElems (), dcl1->getElemType (),
1469
1451
dcl1->getSubRegAlign ());
1470
- newDcl->setAliasDeclare (getNewDcl (dcl1->getRootDeclare (), dcl2, loop ), dcl1->getOffsetFromBase ());
1452
+ newDcl->setAliasDeclare (getNewDcl (dcl1->getRootDeclare (), dcl2), dcl1->getOffsetFromBase ());
1471
1453
1472
1454
oldNewDcl[dcl1] = newDcl;
1473
1455
0 commit comments