Skip to content

Commit ab205c8

Browse files
pratikasharigcbot
authored andcommitted
Remove condition from hot path
When 4GRF align is supported, a different edge weight computation function is used. This change reduces cost of dynamic check for flag in boilerplate code.
1 parent 93247ff commit ab205c8

File tree

2 files changed

+80
-51
lines changed

2 files changed

+80
-51
lines changed

visa/GraphColor.cpp

Lines changed: 53 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5940,6 +5940,7 @@ void GraphColor::createLiveRanges() {
59405940
}
59415941
}
59425942

5943+
template<bool Support4GRFAlign>
59435944
void GraphColor::computeDegreeForGRF() {
59445945
for (unsigned i = 0; i < numVar; i++) {
59455946
unsigned degree = 0;
@@ -5954,7 +5955,7 @@ void GraphColor::computeDegreeForGRF() {
59545955

59555956
auto computeDegree = [&](LiveRange *lr1) {
59565957
if (!lr1->getIsPartialDcl()) {
5957-
unsigned edgeDegree = edgeWeightGRF(lrs[i], lr1);
5958+
unsigned edgeDegree = edgeWeightGRF<Support4GRFAlign>(lrs[i], lr1);
59585959

59595960
degree += edgeDegree;
59605961

@@ -6286,60 +6287,61 @@ void GraphColor::computeSpillCosts(bool useSplitLLRHeuristic, const RPE *rpe) {
62866287
// subtract lr's neighbors that are still in work list
62876288
//
62886289
void GraphColor::relaxNeighborDegreeGRF(LiveRange *lr) {
6289-
if (!(lr->getIsPseudoNode()) && !(lr->getIsPartialDcl())) {
6290-
unsigned lr_id = lr->getVar()->getId();
6291-
bool lr2EvenAlign = gra.isEvenAligned(lr->getDcl());
6292-
unsigned int lr2AugAlign = gra.getAugAlign(lr->getDcl());
6293-
unsigned lr2_nreg = lr->getNumRegNeeded();
6290+
if (lr->getIsPseudoNode() || lr->getIsPartialDcl())
6291+
return;
62946292

6295-
// relax degree between 2 nodes
6296-
auto relaxDegree = [&](LiveRange *lr1) {
6293+
unsigned lr_id = lr->getVar()->getId();
6294+
unsigned lr2_nreg = lr->getNumRegNeeded();
6295+
6296+
const std::vector<unsigned> &intfs = intf.getSparseIntfForVar(lr_id);
6297+
if (gra.use4GRFAlign) {
6298+
unsigned int lr2AugAlign = gra.getAugAlign(lr->getDcl());
6299+
for (auto it : intfs) {
6300+
LiveRange *lr1 = lrs[it];
62976301
if (lr1->getActive() && !lr1->getIsPseudoNode() &&
62986302
!(lr1->getIsPartialDcl())) {
62996303
unsigned lr1_nreg = lr1->getNumRegNeeded();
6300-
unsigned w = 0;
6301-
if (gra.use4GRFAlign) {
6302-
unsigned int lr1AugAlign = gra.getAugAlign(lr1->getDcl());
6303-
w = edgeWeightWith4GRF(lr1AugAlign, lr2AugAlign, lr1_nreg, lr2_nreg);
6304-
} else {
6305-
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
6306-
w = edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
6307-
}
6308-
VISA_DEBUG_VERBOSE({
6309-
std::cout << "\t relax ";
6310-
lr1->dump();
6311-
std::cout << " degree(" << lr1->getDegree() << ") - " << w << "\n";
6312-
});
6313-
lr1->subtractDegree(w);
6314-
6315-
unsigned availColor = numColor;
6316-
availColor = numColor - lr1->getNumForbidden();
6317-
6318-
if (lr1->getDegree() + lr1->getNumRegNeeded() <= availColor) {
6319-
unconstrainedWorklist.push_back(lr1);
6320-
lr1->setActive(false);
6321-
}
6304+
unsigned int lr1AugAlign = gra.getAugAlign(lr1->getDcl());
6305+
auto w =
6306+
edgeWeightWith4GRF(lr1AugAlign, lr2AugAlign, lr1_nreg, lr2_nreg);
6307+
relax(lr1, w);
63226308
}
6323-
};
6324-
6325-
const std::vector<unsigned> &intfs = intf.getSparseIntfForVar(lr_id);
6326-
for (auto it : intfs) {
6327-
LiveRange *lrs_it = lrs[it];
6309+
}
6310+
return;
6311+
}
63286312

6329-
relaxDegree(lrs_it);
6313+
// Handle case where 4GRF align is unsupported
6314+
bool lr2EvenAlign = gra.isEvenAligned(lr->getDcl());
6315+
for (auto it : intfs) {
6316+
LiveRange *lr1 = lrs[it];
6317+
if (lr1->getActive() && !lr1->getIsPseudoNode() &&
6318+
!(lr1->getIsPartialDcl())) {
6319+
unsigned lr1_nreg = lr1->getNumRegNeeded();
6320+
unsigned w = 0;
6321+
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
6322+
w = edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
6323+
relax(lr1, w);
63306324
}
6325+
}
63316326

6332-
auto *weakEdges = intf.getCompatibleSparseIntf(lr->getDcl());
6333-
if (weakEdges) {
6334-
for (auto weakNeighbor : *weakEdges) {
6335-
if (!weakNeighbor->getRegVar()->isRegAllocPartaker())
6336-
continue;
6337-
auto lr1 = lrs[weakNeighbor->getRegVar()->getId()];
6338-
relaxDegree(lr1);
6327+
// Weak edges are supported only when 4GRF align is unsupported
6328+
auto *weakEdges = intf.getCompatibleSparseIntf(lr->getDcl());
6329+
if (weakEdges) {
6330+
for (auto weakNeighbor : *weakEdges) {
6331+
if (!weakNeighbor->getRegVar()->isRegAllocPartaker())
6332+
continue;
6333+
auto lr1 = lrs[weakNeighbor->getRegVar()->getId()];
6334+
if (lr1->getActive() && !lr1->getIsPseudoNode() &&
6335+
!(lr1->getIsPartialDcl())) {
6336+
unsigned lr1_nreg = lr1->getNumRegNeeded();
6337+
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
6338+
auto w = edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
6339+
relax(lr1, w);
63396340
}
63406341
}
63416342
}
63426343
}
6344+
63436345
void GraphColor::relaxNeighborDegreeARF(LiveRange *lr) {
63446346
if (!(lr->getIsPseudoNode())) {
63456347
unsigned lr_id = lr->getVar()->getId();
@@ -7273,7 +7275,10 @@ bool GraphColor::regAlloc(bool doBankConflictReduction,
72737275
// compute degree and spill costs for each live range
72747276
//
72757277
if (liveAnalysis.livenessClass(G4_GRF)) {
7276-
computeDegreeForGRF();
7278+
if (gra.use4GRFAlign)
7279+
computeDegreeForGRF<true>();
7280+
else
7281+
computeDegreeForGRF<false>();
72777282
} else {
72787283
computeDegreeForARF();
72797284
}
@@ -11812,18 +11817,19 @@ void GlobalRA::insertRestoreAddr(G4_BB *bb) {
1181211817
// weight computation and later during simplification is necessary for
1181311818
// correctness.
1181411819
//
11820+
template <bool Support4GRFAlign>
1181511821
unsigned GraphColor::edgeWeightGRF(const LiveRange *lr1, const LiveRange *lr2) {
1181611822
unsigned lr1_nreg = lr1->getNumRegNeeded();
1181711823
unsigned lr2_nreg = lr2->getNumRegNeeded();
1181811824

11819-
if (gra.use4GRFAlign) {
11825+
if constexpr (Support4GRFAlign) {
1182011826
auto lr1Align = gra.getAugAlign(lr1->getDcl());
1182111827
auto lr2Align = gra.getAugAlign(lr2->getDcl());
1182211828

1182311829
return edgeWeightWith4GRF(lr1Align, lr2Align, lr1_nreg, lr2_nreg);
1182411830
} else {
11825-
bool lr1EvenAlign = gra.isEvenAligned(lr1->getDcl());
11826-
bool lr2EvenAlign = gra.isEvenAligned(lr2->getDcl());
11831+
bool lr1EvenAlign = gra.isEvenAligned<false>(lr1->getDcl());
11832+
bool lr2EvenAlign = gra.isEvenAligned<false>(lr2->getDcl());
1182711833

1182811834
return edgeWeightGRF(lr1EvenAlign, lr2EvenAlign, lr1_nreg, lr2_nreg);
1182911835
}

visa/GraphColor.h

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,15 +1036,16 @@ class GraphColor {
10361036
// Reserved GRF count for fail-safe RA
10371037
unsigned reserveSpillGRFCount = 0;
10381038

1039+
template<bool Support4GRFAlign>
10391040
unsigned edgeWeightGRF(const LiveRange *lr1, const LiveRange *lr2);
10401041
unsigned edgeWeightARF(const LiveRange *lr1, const LiveRange *lr2);
10411042
static unsigned edgeWeightGRF(bool lr1EvenAlign, bool lr2EvenAlign,
10421043
unsigned lr1_nreg, unsigned lr2_nreg) {
1044+
unsigned sum = lr1_nreg + lr2_nreg;
10431045
if (!lr1EvenAlign) {
1044-
return lr1_nreg + lr2_nreg - 1;
1046+
return sum - 1;
10451047
}
10461048

1047-
unsigned sum = lr1_nreg + lr2_nreg;
10481049
if (!lr2EvenAlign)
10491050
return sum + 1 - ((sum) % 2);
10501051

@@ -1086,6 +1087,25 @@ class GraphColor {
10861087
return edgeWeightWith4GRF(lr2Align, lr1Align, lr2_nreg, lr1_nreg);
10871088
}
10881089

1090+
void inline relax(LiveRange *lr1, unsigned int w) {
1091+
// relax degree between 2 nodes
1092+
VISA_DEBUG_VERBOSE({
1093+
std::cout << "\t relax ";
1094+
lr1->dump();
1095+
std::cout << " degree(" << lr1->getDegree() << ") - " << w << "\n";
1096+
});
1097+
lr1->subtractDegree(w);
1098+
1099+
unsigned availColor = numColor;
1100+
availColor = numColor - lr1->getNumForbidden();
1101+
1102+
if (lr1->getDegree() + lr1->getNumRegNeeded() <= availColor) {
1103+
unconstrainedWorklist.push_back(lr1);
1104+
lr1->setActive(false);
1105+
}
1106+
}
1107+
1108+
template <bool Support4GRFAlign>
10891109
void computeDegreeForGRF();
10901110
void computeDegreeForARF();
10911111
void computeSpillCosts(bool useSplitLLRHeuristic, const RPE *rpe);
@@ -1269,7 +1289,6 @@ class ForbiddenRegs {
12691289
};
12701290

12711291
class GlobalRA {
1272-
12731292
private:
12741293
std::unordered_set<G4_INST *> EUFusionCallWAInsts;
12751294
bool m_EUFusionCallWANeeded;
@@ -1843,9 +1862,13 @@ class GlobalRA {
18431862
return augAlign == 4;
18441863
}
18451864

1865+
template <bool Support4GRFAlign = true>
18461866
bool isEvenAligned(const G4_Declare* dcl) const {
18471867
auto augAlign = getAugAlign(dcl);
1848-
return augAlign > 0 && augAlign % 2 == 0;
1868+
if constexpr (Support4GRFAlign)
1869+
return augAlign > 0 && augAlign % 2 == 0;
1870+
else
1871+
return (augAlign > 0);
18491872
}
18501873

18511874
int getAugAlign(const G4_Declare *dcl) const {

0 commit comments

Comments
 (0)