Skip to content

Commit 64fbde1

Browse files
pratikasharigcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 6243a1a
Disable split on spill pass Disable split on spill pass
1 parent 046b766 commit 64fbde1

File tree

2 files changed

+15
-33
lines changed

2 files changed

+15
-33
lines changed

visa/VarSplit.cpp

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,10 +1257,10 @@ bool LoopVarSplit::split(G4_Declare* dcl, Loop& loop)
12571257

12581258
// replace all occurences of dcl in loop with TMP
12591259
for (auto src : srcs)
1260-
replaceSrc(src, splitDcl, loop);
1260+
replaceSrc(src, splitDcl);
12611261

12621262
for (auto dst : dsts)
1263-
replaceDst(dst, splitDcl, loop);
1263+
replaceDst(dst, splitDcl);
12641264

12651265
splitResults[dcl].push_back(std::make_pair(splitDcl, &loop));
12661266

@@ -1328,17 +1328,14 @@ void LoopVarSplit::copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResult
13281328
const RegionDesc* rd = kernel.fg.builder->getRegionStride1();
13291329
G4_ExecSize execSize{ kernel.numEltPerGRF<Type_UD>() };
13301330

1331-
// copy 2 GRFs at a time if byte size permits
1332-
if (bytesRemaining >= kernel.numEltPerGRF<Type_UB>() * 2)
1331+
if ((i + 1) < numRows)
13331332
execSize = G4_ExecSize(kernel.numEltPerGRF<Type_UD>() * 2);
13341333

13351334
auto dstRgn = kernel.fg.builder->createDst(dst->getRegVar(), (short)i, 0, 1, Type_F);
13361335
auto srcRgn = kernel.fg.builder->createSrc(src->getRegVar(), (short)i, 0, rd, Type_F);
13371336
auto inst = kernel.fg.builder->createMov(execSize, dstRgn, srcRgn, instOption, false);
13381337

13391338
insertCopy(inst);
1340-
MUST_BE_TRUE(bytesRemaining >= (unsigned int)(execSize.value * G4_Type_Table[Type_F].byteSize),
1341-
"Invalid copy exec size");
13421339
bytesRemaining -= (execSize.value * G4_Type_Table[Type_F].byteSize);
13431340

13441341
if (bytesRemaining < kernel.numEltPerGRF<Type_UB>())
@@ -1389,16 +1386,14 @@ void LoopVarSplit::copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResult
13891386

13901387
insertCopy(inst);
13911388

1392-
MUST_BE_TRUE(bytesRemaining >= (execSize.value * (unsigned int)G4_Type_Table[type].byteSize),
1393-
"Invalid copy exec size");
13941389
bytesRemaining -= (execSize.value * G4_Type_Table[type].byteSize);
13951390
};
13961391
}
13971392

1398-
void LoopVarSplit::replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl, const Loop& loop)
1393+
void LoopVarSplit::replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl)
13991394
{
14001395
auto srcDcl = src->getBase()->asRegVar()->getDeclare();
1401-
dcl = getNewDcl(srcDcl, dcl, loop);
1396+
dcl = getNewDcl(srcDcl, dcl);
14021397

14031398
auto newSrcRgn = kernel.fg.builder->createSrc(dcl->getRegVar(), src->getRegOff(),
14041399
src->getSubRegOff(), src->getRegion(), src->getType(), src->getAccRegSel());
@@ -1414,10 +1409,10 @@ void LoopVarSplit::replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl, const Loop&
14141409
}
14151410
}
14161411

1417-
void LoopVarSplit::replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl, const Loop& loop)
1412+
void LoopVarSplit::replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl)
14181413
{
14191414
auto dstDcl = dst->getBase()->asRegVar()->getDeclare();
1420-
dcl = getNewDcl(dstDcl, dcl, loop);
1415+
dcl = getNewDcl(dstDcl, dcl);
14211416

14221417
auto newDstRgn = kernel.fg.builder->createDst(dcl->getRegVar(), dst->getRegOff(),
14231418
dst->getSubRegOff(), dst->getHorzStride(), dst->getType(), dst->getAccRegSel());
@@ -1426,35 +1421,22 @@ void LoopVarSplit::replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl, const Loop&
14261421
inst->setDest(newDstRgn);
14271422
}
14281423

1429-
G4_Declare* LoopVarSplit::getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2, const Loop& loop)
1424+
G4_Declare* LoopVarSplit::getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2)
14301425
{
14311426
// this method gets args dcl1, dcl2. this method is invoked
14321427
// when the transformation replaces existing src/dst rgn with
1433-
// equivalent one but using split variable. for eg,
1434-
//
1435-
// op ... V10(0,5) ... <-- assume V10 is alias of V9
1436-
//
1437-
// assume V9 gets split so V10 src rgn above has to be replaced.
1438-
// say V9's split dcl is called LOOP_SPLIT_V9.
1439-
// so in this function we create a new dcl, LOOP_SPLIT_V10 that
1440-
// aliases LOOP_SPLIT_V9 exactly like V10 aliases V9. this
1441-
// way we dont need any complicated logic to flatten V10.
1428+
// equivalent one but using split variable.
14421429
//
14431430
// dcl1 is a dcl used to construct some src or dst rgn.
14441431
// dcl2 is a new dcl that splits dcl1. dcl2 is always root dcl.
1445-
// dcl1 may or may not be alias of another dcl.
1432+
// dcl1 may or may not be aliased of another dcl.
14461433
// if dcl1 is also root dcl, then return dcl2.
14471434
// if dcl1 is an alias dcl, then construct new dcl that aliases
14481435
// dcl2 at similar offset.
14491436
// mapping from old dcl to new dcl is stored for future invocations.
1450-
// this mapping is done per loop as a single spilled variable could
1451-
// be split in multiple loops and each split instance would use a
1452-
// different loop split variable.
14531437

14541438
MUST_BE_TRUE(!dcl2->getAliasDeclare(), "Expecting to see root dcl for dcl2");
14551439

1456-
auto& oldNewDcl = oldNewDclPerLoop[&loop];
1457-
14581440
auto it = oldNewDcl.find(dcl1);
14591441
if (it != oldNewDcl.end())
14601442
return (*it).second;
@@ -1467,7 +1449,7 @@ G4_Declare* LoopVarSplit::getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2, const Lo
14671449

14681450
auto newDcl = kernel.fg.builder->createTempVar(dcl1->getTotalElems(), dcl1->getElemType(),
14691451
dcl1->getSubRegAlign());
1470-
newDcl->setAliasDeclare(getNewDcl(dcl1->getRootDeclare(), dcl2, loop), dcl1->getOffsetFromBase());
1452+
newDcl->setAliasDeclare(getNewDcl(dcl1->getRootDeclare(), dcl2), dcl1->getOffsetFromBase());
14711453

14721454
oldNewDcl[dcl1] = newDcl;
14731455

visa/VarSplit.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ class LoopVarSplit
6666
private:
6767
bool split(G4_Declare* dcl, Loop& loop);
6868
void copy(G4_BB* bb, G4_Declare* dst, G4_Declare* src, SplitResults* splitData, AugmentationMasks augMask, bool pushBack = true);
69-
void replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl, const Loop& loop);
70-
void replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl, const Loop& loop);
71-
G4_Declare* getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2, const Loop& loop);
69+
void replaceSrc(G4_SrcRegRegion* src, G4_Declare* dcl);
70+
void replaceDst(G4_DstRegRegion* dst, G4_Declare* dcl);
71+
G4_Declare* getNewDcl(G4_Declare* dcl1, G4_Declare* dcl2);
7272
std::vector<Loop*> getLoopsToSplitAround(G4_Declare* dcl);
7373
void adjustLoopMaxPressure(Loop& loop, unsigned int numRows);
7474

@@ -83,7 +83,7 @@ class LoopVarSplit
8383
// store spill cost for each dcl
8484
std::map<G4_Declare*, float, CComparator> dclSpillCost;
8585

86-
std::unordered_map<const Loop*, std::unordered_map<G4_Declare*, G4_Declare*>> oldNewDclPerLoop;
86+
std::unordered_map<G4_Declare*, G4_Declare*> oldNewDcl;
8787

8888
std::unordered_map<Loop*, std::unordered_set<G4_Declare*>> splitsPerLoop;
8989

0 commit comments

Comments
 (0)