@@ -108,7 +108,7 @@ CoalesceSpillFills::createCoalescedSpillDcl(unsigned int payloadSize) {
108
108
109
109
void CoalesceSpillFills::coalesceSpills (
110
110
std::list<INST_LIST_ITER> &coalesceableSpills, unsigned int min,
111
- unsigned int max, bool useNoMask, G4_InstOption mask, G4_BB *bb ) {
111
+ unsigned int max, bool useNoMask, G4_InstOption mask) {
112
112
// Generate fill with minimum size = max-min. This should be compatible with
113
113
// payload sizes supported by hardware.
114
114
unsigned int payloadSize = (max - min) + 1 ;
@@ -163,15 +163,15 @@ void CoalesceSpillFills::coalesceSpills(
163
163
for (const auto &spill : coalesceableSpills) {
164
164
gra.incRA .markForIntfUpdate (
165
165
(*spill)->asSpillIntrinsic ()->getPayload ()->getTopDcl ());
166
- bb ->erase (spill);
166
+ curBB ->erase (spill);
167
167
}
168
168
coalesceableSpills.clear ();
169
- bb ->insertBefore (f, coalescedSpillSrc->getInst ());
169
+ curBB ->insertBefore (f, coalescedSpillSrc->getInst ());
170
170
}
171
171
172
172
void CoalesceSpillFills::coalesceFills (
173
173
std::list<INST_LIST_ITER> &coalesceableFills, unsigned int min,
174
- unsigned int max, G4_BB *bb ) {
174
+ unsigned int max) {
175
175
// Generate fill with minimum size = max-min. This should be compatible with
176
176
// payload sizes supported by hardware.
177
177
unsigned int payloadSize = (max - min) + 1 ;
@@ -235,11 +235,11 @@ void CoalesceSpillFills::coalesceFills(
235
235
if (fill == f) {
236
236
f++;
237
237
}
238
- bb ->erase (fill);
238
+ curBB ->erase (fill);
239
239
}
240
240
241
241
coalesceableFills.clear ();
242
- bb ->insertBefore (f, newFill);
242
+ curBB ->insertBefore (f, newFill);
243
243
}
244
244
245
245
// Return true if heuristic agrees to coalescing.
@@ -637,7 +637,7 @@ void CoalesceSpillFills::keepConsecutiveSpills(
637
637
INST_LIST_ITER
638
638
CoalesceSpillFills::analyzeSpillCoalescing (std::list<INST_LIST_ITER> &instList,
639
639
INST_LIST_ITER start,
640
- INST_LIST_ITER end, G4_BB *bb ) {
640
+ INST_LIST_ITER end) {
641
641
// Check and perform coalescing, if possible, amongst spills in instList.
642
642
// Return inst iter points to either last inst+1 in instList if all spills
643
643
// were coalesced. Otherwise, it points to first spill that wasnt coalesced.
@@ -657,7 +657,7 @@ CoalesceSpillFills::analyzeSpillCoalescing(std::list<INST_LIST_ITER> &instList,
657
657
keepConsecutiveSpills (instList, coalesceableSpills, cMaxSpillPayloadSize, min,
658
658
max, useNoMask, mask);
659
659
if (coalesceableSpills.size () > 1 ) {
660
- coalesceSpills (coalesceableSpills, min, max, useNoMask, mask, bb );
660
+ coalesceSpills (coalesceableSpills, min, max, useNoMask, mask);
661
661
} else {
662
662
// When coalescing is not done, we want to
663
663
// move to second instruction in instList in
@@ -675,7 +675,7 @@ CoalesceSpillFills::analyzeSpillCoalescing(std::list<INST_LIST_ITER> &instList,
675
675
INST_LIST_ITER
676
676
CoalesceSpillFills::analyzeFillCoalescing (std::list<INST_LIST_ITER> &instList,
677
677
INST_LIST_ITER start,
678
- INST_LIST_ITER end, G4_BB *bb ) {
678
+ INST_LIST_ITER end) {
679
679
// Check and perform coalescing, if possible, amongst fills in instList.
680
680
// Return inst iter points to either last inst+1 in instList if all fills
681
681
// were coalesced. Otherwise, it points to first fill that wasnt coalesced.
@@ -704,7 +704,7 @@ CoalesceSpillFills::analyzeFillCoalescing(std::list<INST_LIST_ITER> &instList,
704
704
}
705
705
706
706
if (coalesceableFills.size () > 1 ) {
707
- coalesceFills (coalesceableFills, min, max, bb );
707
+ coalesceFills (coalesceableFills, min, max);
708
708
}
709
709
710
710
if (instList.size () == 0 ) {
@@ -733,7 +733,7 @@ bool CoalesceSpillFills::overlap(G4_INST *inst1, G4_INST *inst2,
733
733
if (scratchEnd1 >= scratchOffset2) {
734
734
if (scratchOffset1 <= scratchOffset2 &&
735
735
(scratchOffset1 + scratchSize1) >= (scratchOffset2 + scratchSize2)) {
736
- isFullOverlap = true ;
736
+ isFullOverlap = ! isIncompatibleEMCm (inst1, inst2) ;
737
737
}
738
738
739
739
return true ;
@@ -744,7 +744,7 @@ bool CoalesceSpillFills::overlap(G4_INST *inst1, G4_INST *inst2,
744
744
if (scratchEnd2 >= scratchOffset1) {
745
745
if (scratchOffset1 <= scratchOffset2 &&
746
746
(scratchOffset1 + scratchSize1) >= (scratchOffset2 + scratchSize2)) {
747
- isFullOverlap = true ;
747
+ isFullOverlap = ! isIncompatibleEMCm (inst1, inst2) ;
748
748
}
749
749
750
750
return true ;
@@ -762,7 +762,6 @@ bool CoalesceSpillFills::overlap(G4_INST *inst,
762
762
if (overlap (inst, spillInst, t))
763
763
return true ;
764
764
}
765
-
766
765
return false ;
767
766
}
768
767
@@ -853,6 +852,7 @@ void CoalesceSpillFills::fills() {
853
852
for (auto bb : kernel.fg ) {
854
853
if (!gra.hasSpillCodeInBB (bb))
855
854
continue ;
855
+ curBB = bb;
856
856
auto endIter = bb->end ();
857
857
std::list<INST_LIST_ITER> fillsToCoalesce;
858
858
std::list<INST_LIST_ITER> spills;
@@ -944,7 +944,7 @@ void CoalesceSpillFills::fills() {
944
944
earlyCoalesce) {
945
945
if (fillsToCoalesce.size () > 1 ) {
946
946
auto nextInst =
947
- analyzeFillCoalescing (fillsToCoalesce, startIter, instIter, bb );
947
+ analyzeFillCoalescing (fillsToCoalesce, startIter, instIter);
948
948
if (earlyCoalesce)
949
949
instIter++;
950
950
else
@@ -1028,6 +1028,7 @@ void CoalesceSpillFills::spills() {
1028
1028
for (auto bb : kernel.fg ) {
1029
1029
if (!gra.hasSpillCodeInBB (bb))
1030
1030
continue ;
1031
+ curBB = bb;
1031
1032
auto endIter = bb->end ();
1032
1033
std::list<INST_LIST_ITER> spillsToCoalesce;
1033
1034
INST_LIST_ITER startIter = bb->begin ();
@@ -1143,7 +1144,7 @@ void CoalesceSpillFills::spills() {
1143
1144
earlyCoalesce) {
1144
1145
if (spillsToCoalesce.size () > 1 ) {
1145
1146
auto nextInst =
1146
- analyzeSpillCoalescing (spillsToCoalesce, startIter, instIter, bb );
1147
+ analyzeSpillCoalescing (spillsToCoalesce, startIter, instIter);
1147
1148
if (earlyCoalesce)
1148
1149
instIter++;
1149
1150
else
@@ -1205,6 +1206,7 @@ void CoalesceSpillFills::fixSendsSrcOverlap() {
1205
1206
// where V441 and V449 are both scalars of type :uq and :ud respectively
1206
1207
//
1207
1208
for (auto bb : kernel.fg ) {
1209
+ curBB = bb;
1208
1210
for (auto instIt = bb->begin (); instIt != bb->end (); instIt++) {
1209
1211
auto inst = (*instIt);
1210
1212
@@ -1522,6 +1524,7 @@ void CoalesceSpillFills::spillFillCleanup() {
1522
1524
for (auto bb : kernel.fg ) {
1523
1525
if (!gra.hasSpillCodeInBB (bb))
1524
1526
continue ;
1527
+ curBB = bb;
1525
1528
auto startIt = bb->begin ();
1526
1529
auto endIt = bb->end ();
1527
1530
const auto &splitInsts = LoopVarSplit::getSplitInsts (&gra, bb);
@@ -1685,7 +1688,9 @@ void CoalesceSpillFills::spillFillCleanup() {
1685
1688
// Check whether writes for all rows were found
1686
1689
bool found = true ;
1687
1690
for (auto row = rowStart; row <= lastRow; row++) {
1688
- if (writesPerOffset.find (row) == writesPerOffset.end ()) {
1691
+ auto spillIt = writesPerOffset.find (row);
1692
+ if (spillIt == writesPerOffset.end () ||
1693
+ isIncompatibleEMCm ((*spillIt).second , inst)) {
1689
1694
found = false ;
1690
1695
break ;
1691
1696
}
@@ -1757,6 +1762,33 @@ void CoalesceSpillFills::spillFillCleanup() {
1757
1762
}
1758
1763
}
1759
1764
1765
+ bool CoalesceSpillFills::isIncompatibleEMCm (G4_INST *inst1,
1766
+ G4_INST *inst2) const {
1767
+ vISA_ASSERT (curBB, " expecting valid G4_BB* containing inst1, inst2" );
1768
+ vISA_ASSERT (std::find (curBB->begin (), curBB->end (), inst1) != curBB->end (),
1769
+ " expecting inst1 in bb" );
1770
+ vISA_ASSERT (std::find (curBB->begin (), curBB->end (), inst2) != curBB->end (),
1771
+ " expecting inst2 in bb" );
1772
+
1773
+ if (curBB->isDivergent () && isCm) {
1774
+ // Cm program may write a variable using NoMask once and then
1775
+ // write it again with default EM. Later use of the variable could
1776
+ // use NoMask. For eg,
1777
+ // op1 (16) V10, ... {NoMask}
1778
+ // op2 (16) V10, ... {H1}
1779
+ // op3 (16) ..., V10 {NoMask}
1780
+ //
1781
+ // If V10 is spilled in above snippet, we'll emit 2 spill operations
1782
+ // and 1 fill operation. Spill for op2 doesn't use NoMask as send msg
1783
+ // can directly rely on EM behavior. Since fill uses NoMask, spill
1784
+ // cleanup cannot coalesce the second spill and fill away. So for Cm,
1785
+ // we disallow spill cleanup between a NoMask fill and default EM
1786
+ // spill in divergent BBs.
1787
+ return (inst1->isWriteEnableInst () ^ inst2->isWriteEnableInst ());
1788
+ }
1789
+ return false ;
1790
+ }
1791
+
1760
1792
void CoalesceSpillFills::removeRedundantWrites () {
1761
1793
typedef std::list<std::pair<G4_BB *, INST_LIST_ITER>> SPILLS;
1762
1794
typedef std::list<std::pair<G4_BB *, INST_LIST_ITER>> FILLS;
@@ -1766,6 +1798,7 @@ void CoalesceSpillFills::removeRedundantWrites() {
1766
1798
// 1. Successive writes to same offset without a fill in between,
1767
1799
// 2. Writes in program without any fill from that slot throughout
1768
1800
for (auto bb : kernel.fg ) {
1801
+ curBB = bb;
1769
1802
auto endIt = bb->end ();
1770
1803
endIt--;
1771
1804
// Store spill slots that are written in to alongwith emask used
0 commit comments