Skip to content

Commit dc26dac

Browse files
pratikasharigcbot
authored andcommitted
Fix spill/fill cleanup logic in certain scenarios
For Cm, in divergent BB a variable may be defined multiple times with different EM. If such a variable is spilled then we cannot treat all spills of the instructions as same during spill/fill cleanup. Consider following case with V10 spilled: op1 (16) V10, ... {NoMask} op2 (16) V10, ... {H1} op3 (16) ..., V10 {NoMask} If above code is in divergent BB, op1 and op2 write different # of channels of V10. Fill in op3 uses NoMask. Since op2 uses H1, we cannot coalesce spill from op2 and fill from op3. This change recognizes such cases and prevents such cleanup.
1 parent 0d1c68f commit dc26dac

File tree

2 files changed

+62
-22
lines changed

2 files changed

+62
-22
lines changed

visa/SpillCleanup.cpp

Lines changed: 49 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ CoalesceSpillFills::createCoalescedSpillDcl(unsigned int payloadSize) {
108108

109109
void CoalesceSpillFills::coalesceSpills(
110110
std::list<INST_LIST_ITER> &coalesceableSpills, unsigned int min,
111-
unsigned int max, bool useNoMask, G4_InstOption mask, G4_BB *bb) {
111+
unsigned int max, bool useNoMask, G4_InstOption mask) {
112112
// Generate fill with minimum size = max-min. This should be compatible with
113113
// payload sizes supported by hardware.
114114
unsigned int payloadSize = (max - min) + 1;
@@ -163,15 +163,15 @@ void CoalesceSpillFills::coalesceSpills(
163163
for (const auto &spill : coalesceableSpills) {
164164
gra.incRA.markForIntfUpdate(
165165
(*spill)->asSpillIntrinsic()->getPayload()->getTopDcl());
166-
bb->erase(spill);
166+
curBB->erase(spill);
167167
}
168168
coalesceableSpills.clear();
169-
bb->insertBefore(f, coalescedSpillSrc->getInst());
169+
curBB->insertBefore(f, coalescedSpillSrc->getInst());
170170
}
171171

172172
void CoalesceSpillFills::coalesceFills(
173173
std::list<INST_LIST_ITER> &coalesceableFills, unsigned int min,
174-
unsigned int max, G4_BB *bb) {
174+
unsigned int max) {
175175
// Generate fill with minimum size = max-min. This should be compatible with
176176
// payload sizes supported by hardware.
177177
unsigned int payloadSize = (max - min) + 1;
@@ -235,11 +235,11 @@ void CoalesceSpillFills::coalesceFills(
235235
if (fill == f) {
236236
f++;
237237
}
238-
bb->erase(fill);
238+
curBB->erase(fill);
239239
}
240240

241241
coalesceableFills.clear();
242-
bb->insertBefore(f, newFill);
242+
curBB->insertBefore(f, newFill);
243243
}
244244

245245
// Return true if heuristic agrees to coalescing.
@@ -637,7 +637,7 @@ void CoalesceSpillFills::keepConsecutiveSpills(
637637
INST_LIST_ITER
638638
CoalesceSpillFills::analyzeSpillCoalescing(std::list<INST_LIST_ITER> &instList,
639639
INST_LIST_ITER start,
640-
INST_LIST_ITER end, G4_BB *bb) {
640+
INST_LIST_ITER end) {
641641
// Check and perform coalescing, if possible, amongst spills in instList.
642642
// Return inst iter points to either last inst+1 in instList if all spills
643643
// were coalesced. Otherwise, it points to first spill that wasnt coalesced.
@@ -657,7 +657,7 @@ CoalesceSpillFills::analyzeSpillCoalescing(std::list<INST_LIST_ITER> &instList,
657657
keepConsecutiveSpills(instList, coalesceableSpills, cMaxSpillPayloadSize, min,
658658
max, useNoMask, mask);
659659
if (coalesceableSpills.size() > 1) {
660-
coalesceSpills(coalesceableSpills, min, max, useNoMask, mask, bb);
660+
coalesceSpills(coalesceableSpills, min, max, useNoMask, mask);
661661
} else {
662662
// When coalescing is not done, we want to
663663
// move to second instruction in instList in
@@ -675,7 +675,7 @@ CoalesceSpillFills::analyzeSpillCoalescing(std::list<INST_LIST_ITER> &instList,
675675
INST_LIST_ITER
676676
CoalesceSpillFills::analyzeFillCoalescing(std::list<INST_LIST_ITER> &instList,
677677
INST_LIST_ITER start,
678-
INST_LIST_ITER end, G4_BB *bb) {
678+
INST_LIST_ITER end) {
679679
// Check and perform coalescing, if possible, amongst fills in instList.
680680
// Return inst iter points to either last inst+1 in instList if all fills
681681
// were coalesced. Otherwise, it points to first fill that wasnt coalesced.
@@ -704,7 +704,7 @@ CoalesceSpillFills::analyzeFillCoalescing(std::list<INST_LIST_ITER> &instList,
704704
}
705705

706706
if (coalesceableFills.size() > 1) {
707-
coalesceFills(coalesceableFills, min, max, bb);
707+
coalesceFills(coalesceableFills, min, max);
708708
}
709709

710710
if (instList.size() == 0) {
@@ -733,7 +733,7 @@ bool CoalesceSpillFills::overlap(G4_INST *inst1, G4_INST *inst2,
733733
if (scratchEnd1 >= scratchOffset2) {
734734
if (scratchOffset1 <= scratchOffset2 &&
735735
(scratchOffset1 + scratchSize1) >= (scratchOffset2 + scratchSize2)) {
736-
isFullOverlap = true;
736+
isFullOverlap = !isIncompatibleEMCm(inst1, inst2);
737737
}
738738

739739
return true;
@@ -744,7 +744,7 @@ bool CoalesceSpillFills::overlap(G4_INST *inst1, G4_INST *inst2,
744744
if (scratchEnd2 >= scratchOffset1) {
745745
if (scratchOffset1 <= scratchOffset2 &&
746746
(scratchOffset1 + scratchSize1) >= (scratchOffset2 + scratchSize2)) {
747-
isFullOverlap = true;
747+
isFullOverlap = !isIncompatibleEMCm(inst1, inst2);
748748
}
749749

750750
return true;
@@ -762,7 +762,6 @@ bool CoalesceSpillFills::overlap(G4_INST *inst,
762762
if (overlap(inst, spillInst, t))
763763
return true;
764764
}
765-
766765
return false;
767766
}
768767

@@ -853,6 +852,7 @@ void CoalesceSpillFills::fills() {
853852
for (auto bb : kernel.fg) {
854853
if (!gra.hasSpillCodeInBB(bb))
855854
continue;
855+
curBB = bb;
856856
auto endIter = bb->end();
857857
std::list<INST_LIST_ITER> fillsToCoalesce;
858858
std::list<INST_LIST_ITER> spills;
@@ -944,7 +944,7 @@ void CoalesceSpillFills::fills() {
944944
earlyCoalesce) {
945945
if (fillsToCoalesce.size() > 1) {
946946
auto nextInst =
947-
analyzeFillCoalescing(fillsToCoalesce, startIter, instIter, bb);
947+
analyzeFillCoalescing(fillsToCoalesce, startIter, instIter);
948948
if (earlyCoalesce)
949949
instIter++;
950950
else
@@ -1028,6 +1028,7 @@ void CoalesceSpillFills::spills() {
10281028
for (auto bb : kernel.fg) {
10291029
if (!gra.hasSpillCodeInBB(bb))
10301030
continue;
1031+
curBB = bb;
10311032
auto endIter = bb->end();
10321033
std::list<INST_LIST_ITER> spillsToCoalesce;
10331034
INST_LIST_ITER startIter = bb->begin();
@@ -1143,7 +1144,7 @@ void CoalesceSpillFills::spills() {
11431144
earlyCoalesce) {
11441145
if (spillsToCoalesce.size() > 1) {
11451146
auto nextInst =
1146-
analyzeSpillCoalescing(spillsToCoalesce, startIter, instIter, bb);
1147+
analyzeSpillCoalescing(spillsToCoalesce, startIter, instIter);
11471148
if (earlyCoalesce)
11481149
instIter++;
11491150
else
@@ -1205,6 +1206,7 @@ void CoalesceSpillFills::fixSendsSrcOverlap() {
12051206
// where V441 and V449 are both scalars of type :uq and :ud respectively
12061207
//
12071208
for (auto bb : kernel.fg) {
1209+
curBB = bb;
12081210
for (auto instIt = bb->begin(); instIt != bb->end(); instIt++) {
12091211
auto inst = (*instIt);
12101212

@@ -1522,6 +1524,7 @@ void CoalesceSpillFills::spillFillCleanup() {
15221524
for (auto bb : kernel.fg) {
15231525
if (!gra.hasSpillCodeInBB(bb))
15241526
continue;
1527+
curBB = bb;
15251528
auto startIt = bb->begin();
15261529
auto endIt = bb->end();
15271530
const auto &splitInsts = LoopVarSplit::getSplitInsts(&gra, bb);
@@ -1685,7 +1688,9 @@ void CoalesceSpillFills::spillFillCleanup() {
16851688
// Check whether writes for all rows were found
16861689
bool found = true;
16871690
for (auto row = rowStart; row <= lastRow; row++) {
1688-
if (writesPerOffset.find(row) == writesPerOffset.end()) {
1691+
auto spillIt = writesPerOffset.find(row);
1692+
if (spillIt == writesPerOffset.end() ||
1693+
isIncompatibleEMCm((*spillIt).second, inst)) {
16891694
found = false;
16901695
break;
16911696
}
@@ -1757,6 +1762,33 @@ void CoalesceSpillFills::spillFillCleanup() {
17571762
}
17581763
}
17591764

1765+
bool CoalesceSpillFills::isIncompatibleEMCm(G4_INST *inst1,
1766+
G4_INST *inst2) const {
1767+
vISA_ASSERT(curBB, "expecting valid G4_BB* containing inst1, inst2");
1768+
vISA_ASSERT(std::find(curBB->begin(), curBB->end(), inst1) != curBB->end(),
1769+
"expecting inst1 in bb");
1770+
vISA_ASSERT(std::find(curBB->begin(), curBB->end(), inst2) != curBB->end(),
1771+
"expecting inst2 in bb");
1772+
1773+
if (curBB->isDivergent() && isCm) {
1774+
// Cm program may write a variable using NoMask once and then
1775+
// write it again with default EM. Later use of the variable could
1776+
// use NoMask. For eg,
1777+
// op1 (16) V10, ... {NoMask}
1778+
// op2 (16) V10, ... {H1}
1779+
// op3 (16) ..., V10 {NoMask}
1780+
//
1781+
// If V10 is spilled in above snippet, we'll emit 2 spill operations
1782+
// and 1 fill operation. Spill for op2 doesn't use NoMask as send msg
1783+
// can directly rely on EM behavior. Since fill uses NoMask, spill
1784+
// cleanup cannot coalesce the second spill and fill away. So for Cm,
1785+
// we disallow spill cleanup between a NoMask fill and default EM
1786+
// spill in divergent BBs.
1787+
return (inst1->isWriteEnableInst() ^ inst2->isWriteEnableInst());
1788+
}
1789+
return false;
1790+
}
1791+
17601792
void CoalesceSpillFills::removeRedundantWrites() {
17611793
typedef std::list<std::pair<G4_BB *, INST_LIST_ITER>> SPILLS;
17621794
typedef std::list<std::pair<G4_BB *, INST_LIST_ITER>> FILLS;
@@ -1766,6 +1798,7 @@ void CoalesceSpillFills::removeRedundantWrites() {
17661798
// 1. Successive writes to same offset without a fill in between,
17671799
// 2. Writes in program without any fill from that slot throughout
17681800
for (auto bb : kernel.fg) {
1801+
curBB = bb;
17691802
auto endIt = bb->end();
17701803
endIt--;
17711804
// Store spill slots that are written in to alongwith emask used

visa/SpillCleanup.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ class CoalesceSpillFills {
2525
// Store declares spilled by sends like sampler
2626
std::set<G4_Declare *> sendDstDcl;
2727
RPE &rpe;
28+
// Spill cleanup is a per-BB optimization. Store current BB being optimized.
29+
G4_BB *curBB = nullptr;
30+
bool isCm = false;
2831

2932
// Set window size to coalesce
3033
const unsigned int cWindowSize = 10;
@@ -66,13 +69,11 @@ class CoalesceSpillFills {
6669
void fills();
6770
void spills();
6871
INST_LIST_ITER analyzeFillCoalescing(std::list<INST_LIST_ITER> &,
69-
INST_LIST_ITER, INST_LIST_ITER, G4_BB *);
72+
INST_LIST_ITER, INST_LIST_ITER);
7073
INST_LIST_ITER analyzeSpillCoalescing(std::list<INST_LIST_ITER> &,
71-
INST_LIST_ITER, INST_LIST_ITER,
72-
G4_BB *);
74+
INST_LIST_ITER, INST_LIST_ITER);
7375
void removeWARFills(std::list<INST_LIST_ITER> &, std::list<INST_LIST_ITER> &);
74-
void coalesceFills(std::list<INST_LIST_ITER> &, unsigned int, unsigned int,
75-
G4_BB *);
76+
void coalesceFills(std::list<INST_LIST_ITER> &, unsigned int, unsigned int);
7677
G4_INST *generateCoalescedFill(G4_SrcRegRegion *, unsigned int, unsigned int,
7778
unsigned int, bool);
7879
G4_SrcRegRegion *generateCoalescedSpill(G4_SrcRegRegion *, unsigned int,
@@ -84,14 +85,18 @@ class CoalesceSpillFills {
8485
bool overlap(G4_INST *, std::list<INST_LIST_ITER> &);
8586
bool overlap(G4_INST *, G4_INST *, bool &);
8687
void coalesceSpills(std::list<INST_LIST_ITER> &, unsigned int, unsigned int,
87-
bool, G4_InstOption, G4_BB *);
88+
bool, G4_InstOption);
8889
bool allSpillsSameVar(std::list<INST_LIST_ITER> &);
8990
void fixSendsSrcOverlap();
9091
void removeRedundantSplitMovs();
9192
G4_Declare *createCoalescedSpillDcl(unsigned int);
9293
void populateSendDstDcl();
9394
void spillFillCleanup();
9495
void removeRedundantWrites();
96+
// For Cm, if BB is in divergent CF and spill inst and fill inst have
97+
// mismatched WriteEnable bit then return true as cleanup may be
98+
// illegal.
99+
bool isIncompatibleEMCm(G4_INST *inst1, G4_INST *inst2) const;
95100

96101
public:
97102
CoalesceSpillFills(G4_Kernel &k, LivenessAnalysis &l, GraphColor &g,
@@ -117,6 +122,8 @@ class CoalesceSpillFills {
117122
totalInputSize += input_info->size;
118123
}
119124
totalInputSize = totalInputSize / k.numEltPerGRF<Type_UB>();
125+
126+
isCm = (kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM);
120127
}
121128

122129
void run();

0 commit comments

Comments
 (0)