Skip to content

Commit ab5a7cf

Browse files
pratikasharigcbot
authored andcommitted
Bug fixes to loop variable splitting
* When a variable is split in multiple loops, fix problem where split pass reused stale LOOP_SPLIT variable across loops. * Fix logic to copy accurately across dcl sizes.
1 parent 714f220 commit ab5a7cf

File tree

2 files changed

+33
-15
lines changed

2 files changed

+33
-15
lines changed

visa/VarSplit.cpp

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,10 +1257,10 @@ bool LoopVarSplit::split(G4_Declare* dcl, Loop& loop)
12571257

12581258
// replace all occurences of dcl in loop with TMP
12591259
for (auto src : srcs)
1260-
replaceSrc(src, splitDcl);
1260+
replaceSrc(src, splitDcl, loop);
12611261

12621262
for (auto dst : dsts)
1263-
replaceDst(dst, splitDcl);
1263+
replaceDst(dst, splitDcl, loop);
12641264

12651265
splitResults[dcl].push_back(std::make_pair(splitDcl, &loop));
12661266

@@ -1328,14 +1328,17 @@ void LoopVarSplit::copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResult
13281328
const RegionDesc* rd = kernel.fg.builder->getRegionStride1();
13291329
G4_ExecSize execSize{ kernel.numEltPerGRF<Type_UD>() };
13301330

1331-
if ((i + 1) < numRows)
1331+
// copy 2 GRFs at a time if byte size permits
1332+
if (bytesRemaining >= kernel.numEltPerGRF<Type_UB>() * 2)
13321333
execSize = G4_ExecSize(kernel.numEltPerGRF<Type_UD>() * 2);
13331334

13341335
auto dstRgn = kernel.fg.builder->createDst(dst->getRegVar(), (short)i, 0, 1, Type_F);
13351336
auto srcRgn = kernel.fg.builder->createSrc(src->getRegVar(), (short)i, 0, rd, Type_F);
13361337
auto inst = kernel.fg.builder->createMov(execSize, dstRgn, srcRgn, instOption, false);
13371338

13381339
insertCopy(inst);
1340+
MUST_BE_TRUE(bytesRemaining >= (unsigned int)(execSize.value * G4_Type_Table[Type_F].byteSize),
1341+
"Invalid copy exec size");
13391342
bytesRemaining -= (execSize.value * G4_Type_Table[Type_F].byteSize);
13401343

13411344
if (bytesRemaining < kernel.numEltPerGRF<Type_UB>())
@@ -1386,14 +1389,16 @@ void LoopVarSplit::copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResult
13861389

13871390
insertCopy(inst);
13881391

1392+
MUST_BE_TRUE(bytesRemaining >= (execSize.value * (unsigned int)G4_Type_Table[type].byteSize),
1393+
"Invalid copy exec size");
13891394
bytesRemaining -= (execSize.value * G4_Type_Table[type].byteSize);
13901395
};
13911396
}
13921397

1393-
void LoopVarSplit::replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl)
1398+
void LoopVarSplit::replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl, const Loop& loop)
13941399
{
13951400
auto srcDcl = src->getBase()->asRegVar()->getDeclare();
1396-
dcl = getNewDcl(srcDcl, dcl);
1401+
dcl = getNewDcl(srcDcl, dcl, loop);
13971402

13981403
auto newSrcRgn = kernel.fg.builder->createSrc(dcl->getRegVar(), src->getRegOff(),
13991404
src->getSubRegOff(), src->getRegion(), src->getType(), src->getAccRegSel());
@@ -1409,10 +1414,10 @@ void LoopVarSplit::replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl)
14091414
}
14101415
}
14111416

1412-
void LoopVarSplit::replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl)
1417+
void LoopVarSplit::replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl, const Loop& loop)
14131418
{
14141419
auto dstDcl = dst->getBase()->asRegVar()->getDeclare();
1415-
dcl = getNewDcl(dstDcl, dcl);
1420+
dcl = getNewDcl(dstDcl, dcl, loop);
14161421

14171422
auto newDstRgn = kernel.fg.builder->createDst(dcl->getRegVar(), dst->getRegOff(),
14181423
dst->getSubRegOff(), dst->getHorzStride(), dst->getType(), dst->getAccRegSel());
@@ -1421,22 +1426,35 @@ void LoopVarSplit::replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl)
14211426
inst->setDest(newDstRgn);
14221427
}
14231428

1424-
G4_Declare* LoopVarSplit::getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2)
1429+
G4_Declare* LoopVarSplit::getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2, const Loop& loop)
14251430
{
14261431
// this method gets args dcl1, dcl2. this method is invoked
14271432
// when the transformation replaces existing src/dst rgn with
1428-
// equivalent one but using split variable.
1433+
// equivalent one but using split variable. for eg,
1434+
//
1435+
// op ... V10(0,5) ... <-- assume V10 is alias of V9
1436+
//
1437+
// assume V9 gets split so V10 src rgn above has to be replaced.
1438+
// say V9's split dcl is called LOOP_SPLIT_V9.
1439+
// so in this function we create a new dcl, LOOP_SPLIT_V10 that
1440+
// aliases LOOP_SPLIT_V9 exactly like V10 aliases V9. this
1441+
// way we dont need any complicated logic to flatten V10.
14291442
//
14301443
// dcl1 is a dcl used to construct some src or dst rgn.
14311444
// dcl2 is a new dcl that splits dcl1. dcl2 is always root dcl.
1432-
// dcl1 may or may not be aliased of another dcl.
1445+
// dcl1 may or may not be alias of another dcl.
14331446
// if dcl1 is also root dcl, then return dcl2.
14341447
// if dcl1 is an alias dcl, then construct new dcl that aliases
14351448
// dcl2 at similar offset.
14361449
// mapping from old dcl to new dcl is stored for future invocations.
1450+
// this mapping is done per loop as a single spilled variable could
1451+
// be split in multiple loops and each split instance would use a
1452+
// different loop split variable.
14371453

14381454
MUST_BE_TRUE(!dcl2->getAliasDeclare(), "Expecting to see root dcl for dcl2");
14391455

1456+
auto& oldNewDcl = oldNewDclPerLoop[&loop];
1457+
14401458
auto it = oldNewDcl.find(dcl1);
14411459
if (it != oldNewDcl.end())
14421460
return (*it).second;
@@ -1449,7 +1467,7 @@ G4_Declare* LoopVarSplit::getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2)
14491467

14501468
auto newDcl = kernel.fg.builder->createTempVar(dcl1->getTotalElems(), dcl1->getElemType(),
14511469
dcl1->getSubRegAlign());
1452-
newDcl->setAliasDeclare(getNewDcl(dcl1->getRootDeclare(), dcl2), dcl1->getOffsetFromBase());
1470+
newDcl->setAliasDeclare(getNewDcl(dcl1->getRootDeclare(), dcl2, loop), dcl1->getOffsetFromBase());
14531471

14541472
oldNewDcl[dcl1] = newDcl;
14551473

visa/VarSplit.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ class LoopVarSplit
6666
private:
6767
bool split(G4_Declare* dcl, Loop& loop);
6868
void copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResults* splitData, AugmentationMasks augMask, bool pushBack = true);
69-
void replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl);
70-
void replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl);
71-
G4_Declare* getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2);
69+
void replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl, const Loop& loop);
70+
void replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl, const Loop& loop);
71+
G4_Declare* getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2, const Loop& loop);
7272
std::vector<Loop*> getLoopsToSplitAround(G4_Declare* dcl);
7373
void adjustLoopMaxPressure(Loop& loop, unsigned int numRows);
7474

@@ -83,7 +83,7 @@ class LoopVarSplit
8383
// store spill cost for each dcl
8484
std::map<G4_Declare*, float, CComparator> dclSpillCost;
8585

86-
std::unordered_map<G4_Declare*, G4_Declare*> oldNewDcl;
86+
std::unordered_map<const Loop*, std::unordered_map<G4_Declare*, G4_Declare*>> oldNewDclPerLoop;
8787

8888
std::unordered_map<Loop*, std::unordered_set<G4_Declare*>> splitsPerLoop;
8989

0 commit comments

Comments
 (0)