Skip to content

Commit 1403698

Browse files
bcheng0127gfxbot
authored andcommitted
Internal feature
Change-Id: Ieb32090c431b55064f72055dff98f3f43dc5250b
1 parent 966f6c8 commit 1403698

File tree

6 files changed

+127
-33
lines changed

6 files changed

+127
-33
lines changed

visa/GraphColor.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,8 @@ namespace vISA
789789
AugmentationMasks maskType = AugmentationMasks::Undetermined;
790790
std::vector<G4_Declare*> subDclList;
791791
unsigned int subOff = 0;
792+
std::vector<G4_Declare*> bundleConflictDcls;
793+
std::vector<int> bundleConflictoffsets;
792794
};
793795

794796
class VerifyAugmentation
@@ -1155,6 +1157,37 @@ namespace vISA
11551157
return (getAugmentationMask(dcl) == AugmentationMasks::NonDefault);
11561158
}
11571159

1160+
void addBundleConflictDcl(G4_Declare *dcl, G4_Declare* subDcl, int offset)
1161+
{
1162+
auto dclid = dcl->getDeclId();
1163+
resize(dclid);
1164+
vars[dclid].bundleConflictDcls.push_back(subDcl);
1165+
vars[dclid].bundleConflictoffsets.push_back(offset);
1166+
}
1167+
1168+
void clearBundleConflictDcl(G4_Declare* dcl)
1169+
{
1170+
auto dclid = dcl->getDeclId();
1171+
resize(dclid);
1172+
vars[dclid].bundleConflictDcls.clear();
1173+
vars[dclid].bundleConflictoffsets.clear();
1174+
}
1175+
1176+
G4_Declare* getBundleConflictDcl(G4_Declare* dcl, unsigned i, int &offset)
1177+
{
1178+
auto dclid = dcl->getDeclId();
1179+
resize(dclid);
1180+
offset = vars[dclid].bundleConflictoffsets[i];
1181+
return vars[dclid].bundleConflictDcls[i];
1182+
}
1183+
1184+
unsigned getBundleConflictDclSize(G4_Declare* dcl)
1185+
{
1186+
auto dclid = dcl->getDeclId();
1187+
resize(dclid);
1188+
return (unsigned)(vars[dclid].bundleConflictDcls.size());
1189+
}
1190+
11581191
void addSubDcl(G4_Declare *dcl, G4_Declare* subDcl)
11591192
{
11601193
auto dclid = dcl->getDeclId();

visa/LocalRA.cpp

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ using namespace vISA;
4444
#define SPLIT_USE_CNT_THRESHOLD 2
4545
#define SPLIT_USE_DISTANCE_THRESHOLD 100
4646

47+
#define GET_BUNDLE(r, o) (((r + o) % 64) / 4)
48+
4749
extern unsigned int getStackCallRegSize(bool reserveStackCallRegs);
4850
extern void getForbiddenGRFs(vector<unsigned int>& regNum, const Options *opt, unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned reservedRegNum);
4951
extern void getCallerSaveGRF(vector<unsigned int>& regNum, G4_Kernel* kernel);
@@ -825,13 +827,25 @@ bool LocalRA::assignUniqueRegisters(bool twoBanksRA, bool twoDirectionsAssign)
825827

826828
if (assignFromFront)
827829
{
830+
unsigned short occupiedBundles = 0;
831+
for (size_t i = 0; i < gra.getBundleConflictDclSize(dcl); i++)
832+
{
833+
int offset = 0;
834+
G4_Declare *bDcl = gra.getBundleConflictDcl(dcl, i, offset);
835+
if (bDcl->getRegVar()->isPhyRegAssigned())
836+
{
837+
unsigned int reg = bDcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
838+
unsigned int bundle = GET_BUNDLE(reg, offset);
839+
occupiedBundles |= (unsigned short)1 << bundle;
840+
}
841+
}
828842
nrows = phyRegMgr.findFreeRegs(sizeInWords, (bankAlign != Either) ? bankAlign : align, subAlign,
829-
regNum, subregNum, 0, numRegLRA - 1, 0, false);
843+
regNum, subregNum, 0, numRegLRA - 1, occupiedBundles, 0, false);
830844
}
831845
else
832846
{
833847
nrows = phyRegMgr.findFreeRegs(sizeInWords, (bankAlign != Either) ? bankAlign : align, subAlign,
834-
regNum, subregNum, numRegLRA - 1, 0, 0, false);
848+
regNum, subregNum, numRegLRA - 1, 0, 0, 0, false);
835849
}
836850

837851
if (nrows)
@@ -1970,7 +1984,7 @@ inline bool PhyRegsLocalRA::isWordBusy(int whichgrf, int word, int howmany)
19701984
return retval;
19711985
}
19721986

1973-
bool PhyRegsLocalRA::findFreeMultipleRegsForward(int regIdx, G4_Align align, int &regnum, int nrows, int lastRowSize, int endReg, int instID, bool isHybridAlloc)
1987+
bool PhyRegsLocalRA::findFreeMultipleRegsForward(int regIdx, G4_Align align, int &regnum, int nrows, int lastRowSize, int endReg, unsigned short occupiedBundles, int instID, bool isHybridAlloc)
19741988
{
19751989
int foundItem = 0;
19761990
int startReg = 0;
@@ -1989,8 +2003,13 @@ bool PhyRegsLocalRA::findFreeMultipleRegsForward(int regIdx, G4_Align align, int
19892003

19902004
LocalRA::findRegisterCandiateWithAlignForward(i, align, multiSteps);
19912005

1992-
startReg = i;
2006+
while (occupiedBundles & (1 << GET_BUNDLE(i, 0)))
2007+
{
2008+
i++;
2009+
LocalRA::findRegisterCandiateWithAlignForward(i, align, multiSteps);
2010+
}
19932011

2012+
startReg = i;
19942013
while (i <= endReg + nrows - 1)
19952014
{
19962015
if (isGRFAvailable(i) &&
@@ -2004,6 +2023,11 @@ bool PhyRegsLocalRA::findFreeMultipleRegsForward(int regIdx, G4_Align align, int
20042023
foundItem = 0;
20052024
i++;
20062025
LocalRA::findRegisterCandiateWithAlignForward(i, align, multiSteps);
2026+
while (occupiedBundles & (1 << GET_BUNDLE(i, 0)))
2027+
{
2028+
i++;
2029+
LocalRA::findRegisterCandiateWithAlignForward(i, align, multiSteps);
2030+
}
20072031
startReg = i;
20082032
continue;
20092033
}
@@ -2030,6 +2054,11 @@ bool PhyRegsLocalRA::findFreeMultipleRegsForward(int regIdx, G4_Align align, int
20302054
foundItem = 0;
20312055
i++;
20322056
LocalRA::findRegisterCandiateWithAlignForward(i, align, multiSteps);
2057+
while (occupiedBundles & (1 << GET_BUNDLE(i, 0)))
2058+
{
2059+
i++;
2060+
LocalRA::findRegisterCandiateWithAlignForward(i, align, multiSteps);
2061+
}
20332062
startReg = i;
20342063
continue;
20352064
}
@@ -2305,7 +2334,7 @@ bool PhyRegsLocalRA::findFreeSingleReg(int regIdx, G4_SubReg_Align subalign, int
23052334
}
23062335

23072336
int PhyRegsManager::findFreeRegs(int size, G4_Align align, G4_SubReg_Align subalign, int& regnum, int& subregnum,
2308-
int startRegNum, int endRegNum, unsigned int instID, bool isHybridAlloc)
2337+
int startRegNum, int endRegNum, unsigned short occupiedBundles, unsigned int instID, bool isHybridAlloc)
23092338
{
23102339
int nrows = 0;
23112340
int lastRowSize = 0;
@@ -2321,7 +2350,7 @@ int PhyRegsManager::findFreeRegs(int size, G4_Align align, G4_SubReg_Align subal
23212350
{
23222351
if (forward)
23232352
{
2324-
found = availableRegs.findFreeMultipleRegsForward(startReg, align, regnum, nrows, lastRowSize, endReg, instID, isHybridAlloc);
2353+
found = availableRegs.findFreeMultipleRegsForward(startReg, align, regnum, nrows, lastRowSize, endReg, occupiedBundles, instID, isHybridAlloc);
23252354
}
23262355
else
23272356
{
@@ -2577,6 +2606,7 @@ void LinearScan::expireInputRanges(unsigned int global_idx, unsigned int local_i
25772606
}
25782607
}
25792608

2609+
25802610
// Allocate registers to live range. It makes a decision whether to spill
25812611
// a currently active range or the range passed as parameter. The range
25822612
// that has larger size and is longer is the spill candidate.
@@ -2595,9 +2625,23 @@ bool LinearScan::allocateRegs(LocalLiveRange* lr, G4_BB* bb, IR_Builder& builder
25952625
// spill cost.
25962626
int nrows = 0;
25972627
int size = lr->getSizeInWords();
2598-
G4_Align align = lr->getTopDcl()->getRegVar()->getAlignment();
2599-
G4_SubReg_Align subalign = lr->getTopDcl()->getRegVar()->getSubRegAlignment();
2628+
G4_Declare *dcl = lr->getTopDcl();
2629+
G4_Align align = dcl->getRegVar()->getAlignment();
2630+
G4_SubReg_Align subalign = dcl->getRegVar()->getSubRegAlignment();
26002631
G4_Align bankAlign = Either;
2632+
unsigned short occupiedBundles = 0;
2633+
2634+
for (size_t i = 0; i < gra.getBundleConflictDclSize(dcl); i++)
2635+
{
2636+
int offset = 0;
2637+
G4_Declare *bDcl = gra.getBundleConflictDcl(dcl, i, offset);
2638+
if (bDcl->getRegVar()->isPhyRegAssigned())
2639+
{
2640+
unsigned int reg = bDcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
2641+
unsigned int bundle = GET_BUNDLE(reg, offset);
2642+
occupiedBundles |= (unsigned short)1 << bundle;
2643+
}
2644+
}
26012645

26022646
localRABound = numRegLRA - globalLRSize - 1; //-1, localRABound will be counted in findFreeRegs()
26032647

@@ -2616,6 +2660,7 @@ bool LinearScan::allocateRegs(LocalLiveRange* lr, G4_BB* bb, IR_Builder& builder
26162660
subregnum,
26172661
*startGRFReg,
26182662
localRABound,
2663+
occupiedBundles,
26192664
instID,
26202665
false);
26212666
}
@@ -2628,6 +2673,7 @@ bool LinearScan::allocateRegs(LocalLiveRange* lr, G4_BB* bb, IR_Builder& builder
26282673
subregnum,
26292674
*startGRFReg,
26302675
localRABound,
2676+
occupiedBundles,
26312677
instID,
26322678
true);
26332679

@@ -2640,6 +2686,7 @@ bool LinearScan::allocateRegs(LocalLiveRange* lr, G4_BB* bb, IR_Builder& builder
26402686
subregnum,
26412687
*startGRFReg,
26422688
localRABound,
2689+
occupiedBundles,
26432690
instID,
26442691
false);
26452692
}
@@ -2663,6 +2710,7 @@ bool LinearScan::allocateRegs(LocalLiveRange* lr, G4_BB* bb, IR_Builder& builder
26632710
subregnum,
26642711
0,
26652712
endGRFReg,
2713+
occupiedBundles,
26662714
instID,
26672715
false);
26682716

@@ -2994,6 +3042,7 @@ bool LinearScan::allocateRegsFromBanks(LocalLiveRange* lr)
29943042
subregnum,
29953043
*startGRFReg,
29963044
tmpLocalRABound,
3045+
0,
29973046
instID,
29983047
false);
29993048

@@ -3042,6 +3091,7 @@ bool LinearScan::allocateRegsFromBanks(LocalLiveRange* lr)
30423091
subregnum,
30433092
*startGRFReg,
30443093
tmpLocalRABound,
3094+
0,
30453095
instID,
30463096
false);
30473097

@@ -3067,6 +3117,7 @@ bool LinearScan::allocateRegsFromBanks(LocalLiveRange* lr)
30673117
subregnum,
30683118
*startGRFReg,
30693119
tmpLocalRABound,
3120+
0,
30703121
instID,
30713122
false);
30723123
}
@@ -3081,6 +3132,7 @@ bool LinearScan::allocateRegsFromBanks(LocalLiveRange* lr)
30813132
subregnum,
30823133
*startGRFReg,
30833134
tmpLocalRABound,
3135+
0,
30843136
instID,
30853137
true);
30863138

@@ -3093,6 +3145,7 @@ bool LinearScan::allocateRegsFromBanks(LocalLiveRange* lr)
30933145
subregnum,
30943146
*startGRFReg,
30953147
tmpLocalRABound,
3148+
0,
30963149
instID,
30973150
false);
30983151
}
@@ -3118,6 +3171,7 @@ bool LinearScan::allocateRegsFromBanks(LocalLiveRange* lr)
31183171
subregnum,
31193172
*startGRFReg,
31203173
tmpLocalRABound,
3174+
0,
31213175
instID,
31223176
false);
31233177
}

visa/LocalRA.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,8 @@ class PhyRegsLocalRA
334334
inline bool isWordBusy( int whichgrf, int word );
335335
inline bool isWordBusy( int whichgrf, int word, int howmany );
336336

337+
bool findFreeMultipleRegsForward(int regIdx, G4_Align align, int & regnum, int nrows, int lastRowSize, int endReg, unsigned short occupiedBundles, int instID, bool isHybridAlloc);
338+
337339
void markPhyRegs( G4_Declare* topdcl );
338340

339341
// Available/unavailable is different from busy/free
@@ -420,7 +422,6 @@ class PhyRegsLocalRA
420422
void setSimpleGRFAvailable(bool simple) {simpleGRFAvailable = simple; }
421423
void setR0Forbidden() {r0Forbidden = true;}
422424
void setR1Forbidden() {r1Forbidden = true;}
423-
bool findFreeMultipleRegsForward(int regIdx, G4_Align align, int &regnum, int nrows, int lastRowSize, int endReg, int instID, bool isHybridAlloc);
424425
bool findFreeMultipleRegsBackward(int regIdx, G4_Align align, int &regnum, int nrows, int lastRowSize, int endReg, int instID, bool isHybridAlloc);
425426
bool findFreeSingleReg( int regIdx, G4_SubReg_Align subalign, int &regnum, int &subregnum, int size);
426427
bool findFreeSingleReg(int regIdx, int size, G4_Align align, G4_SubReg_Align subalign, int &regnum, int &subregnum, int endReg, int instID, bool isHybridAlloc, bool forward);
@@ -439,10 +440,9 @@ class PhyRegsManager
439440
availableRegs.setTwoBanksRA(_twoBanksRA);
440441
}
441442

442-
int findFreeRegs( int numwords, G4_Align align, G4_SubReg_Align subalign, int& regnum, int& subregnum,
443-
int startRegNum, int endRegNum, unsigned int instID, bool isHybridAlloc);
443+
int findFreeRegs(int size, G4_Align align, G4_SubReg_Align subalign, int & regnum, int & subregnum, int startRegNum, int endRegNum, unsigned short occupiedBundles, unsigned int instID, bool isHybridAlloc);
444444

445-
void freeRegs( int regnum, int subregnum, int numwords, int instID);
445+
void freeRegs( int regnum, int subregnum, int numwords, int instID);
446446
PhyRegsLocalRA * getAvaialableRegs() { return &availableRegs; }
447447
};
448448

visa/PhyRegUsage.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ int PhyRegUsage::findContiguousWords(
261261
//
262262
bool PhyRegUsage::findContiguousGRF(bool availRegs[],
263263
const bool forbidden[],
264+
unsigned occupiedBundles,
264265
G4_Align align,
265266
unsigned numRegNeeded,
266267
unsigned maxRegs,
@@ -282,7 +283,7 @@ bool PhyRegUsage::findContiguousGRF(bool availRegs[],
282283
}
283284
bool found =
284285
findContiguousNoWrapGRF(
285-
availRegs, forbidden, align, numRegNeeded, startPosRunOne, endPosRunOne, idx);
286+
availRegs, forbidden, occupiedBundles, align, numRegNeeded, startPosRunOne, endPosRunOne, idx);
286287

287288
if (startPosRunOne > 0 && found == false && !isEOTSrc && !isCalleeSaveBias)
288289
{
@@ -292,7 +293,7 @@ bool PhyRegUsage::findContiguousGRF(bool availRegs[],
292293
MUST_BE_TRUE(endPosRunTwo > 0 && endPosRunTwo <= maxRegs, ERROR_UNKNOWN);
293294
found =
294295
findContiguousNoWrapGRF(
295-
availRegs, forbidden, align, numRegNeeded, startPosRunTwo, endPosRunTwo, idx);
296+
availRegs, forbidden, occupiedBundles, align, numRegNeeded, startPosRunTwo, endPosRunTwo, idx);
296297
}
297298

298299
if (found)
@@ -495,11 +496,14 @@ bool PhyRegUsage::isOverlapValid(unsigned int reg, unsigned int numRegs)
495496
return true;
496497
}
497498

499+
#define GET_BUNDLE(r, o) (((r + o) % 64) / 4)
500+
498501
//
499502
// look for contiguous available regs from startPos to maxRegs
500503
//
501504
bool PhyRegUsage::findContiguousNoWrapGRF(bool availRegs[],
502505
const bool forbidden[],
506+
unsigned short occupiedBundles,
503507
G4_Align align,
504508
unsigned numRegNeeded,
505509
unsigned startPos,
@@ -542,6 +546,10 @@ bool PhyRegUsage::findContiguousNoWrapGRF(bool availRegs[],
542546
{
543547
i++;
544548
}
549+
else if (occupiedBundles & (1 << GET_BUNDLE(i, 0)))
550+
{
551+
i++;
552+
}
545553
else
546554
{
547555
for (; j < i + numRegNeeded && availRegs[j] && (forbidden == NULL || !forbidden[j]); j++);
@@ -977,7 +985,7 @@ bool PhyRegUsage::assignGRFRegsFromBanks(LiveRange* varBasis,
977985
if (varBasis->getEOTSrc() && builder.hasEOTGRFBinding())
978986
{
979987
startGRFReg = totalGRFNum - 16;
980-
success = findContiguousGRF(availableGregs, forbidden, align, decl->getNumRows(), maxGRFCanBeUsed,
988+
success = findContiguousGRF(availableGregs, forbidden, 0, align, decl->getNumRows(), maxGRFCanBeUsed,
981989
startGRFReg, i, false, true);
982990
}
983991
else
@@ -1127,8 +1135,20 @@ bool PhyRegUsage::assignRegs(bool highInternalConflict,
11271135
startGRFReg = totalGRFNum - 16;
11281136
}
11291137

1138+
unsigned short occupiedBundles = 0;
1139+
for (size_t i = 0; i < gra.getBundleConflictDclSize(decl); i++)
1140+
{
1141+
int offset = 0;
1142+
G4_Declare *bDcl = gra.getBundleConflictDcl(decl, i, offset);
1143+
if (bDcl->getRegVar()->isPhyRegAssigned())
1144+
{
1145+
unsigned int reg = bDcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
1146+
unsigned int bundle = GET_BUNDLE(reg, offset);
1147+
occupiedBundles |= (unsigned short)1 << bundle;
1148+
}
1149+
}
11301150

1131-
bool success = findContiguousGRF(availableGregs, forbidden, bankAlign != Either ? bankAlign : align, decl->getNumRows(), endGRFReg,
1151+
bool success = findContiguousGRF(availableGregs, forbidden, occupiedBundles, bankAlign != Either ? bankAlign : align, decl->getNumRows(), endGRFReg,
11321152
startGRFReg, i, varBasis->getCalleeSaveBias(), varBasis->getEOTSrc());
11331153
if (success) {
11341154
varBasis->setPhyReg(regPool.getGreg(i), 0);

0 commit comments

Comments
 (0)