Skip to content

Commit af1e59a

Browse files
committed
[SLP]Fix PR107037: correctly track origonal/modified after vectorizations reduced values
Need to correctly track reduced values with multiple uses in the same reduction emission attempt. Otherwise, the number of the reuses might be calculated incorrectly, and may cause compiler crash. Fixes #107037
1 parent d94199c commit af1e59a

File tree

2 files changed

+85
-31
lines changed

2 files changed

+85
-31
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17721,6 +17721,12 @@ class HorizontalReduction {
1772117721
for (Value *V : Candidates)
1772217722
TrackedVals.try_emplace(V, V);
1772317723

17724+
auto At = [](MapVector<Value *, unsigned> &MV, Value *V) -> unsigned & {
17725+
auto *It = MV.find(V);
17726+
assert(It != MV.end() && "Unable to find given key.");
17727+
return It->second;
17728+
};
17729+
1772417730
DenseMap<Value *, unsigned> VectorizedVals(ReducedVals.size());
1772517731
// List of the values that were reduced in other trees as part of gather
1772617732
// nodes and thus requiring extract if fully vectorized in other trees.
@@ -17738,7 +17744,7 @@ class HorizontalReduction {
1773817744
Candidates.reserve(2 * OrigReducedVals.size());
1773917745
DenseMap<Value *, Value *> TrackedToOrig(2 * OrigReducedVals.size());
1774017746
for (unsigned Cnt = 0, Sz = OrigReducedVals.size(); Cnt < Sz; ++Cnt) {
17741-
Value *RdxVal = TrackedVals.find(OrigReducedVals[Cnt])->second;
17747+
Value *RdxVal = TrackedVals.at(OrigReducedVals[Cnt]);
1774217748
// Check if the reduction value was not overriden by the extractelement
1774317749
// instruction because of the vectorization and exclude it, if it is not
1774417750
// compatible with other values.
@@ -17757,7 +17763,7 @@ class HorizontalReduction {
1775717763
I + 1 < E) {
1775817764
SmallVector<Value *> CommonCandidates(Candidates);
1775917765
for (Value *RV : ReducedVals[I + 1]) {
17760-
Value *RdxVal = TrackedVals.find(RV)->second;
17766+
Value *RdxVal = TrackedVals.at(RV);
1776117767
// Check if the reduction value was not overriden by the
1776217768
// extractelement instruction because of the vectorization and
1776317769
// exclude it, if it is not compatible with other values.
@@ -17778,10 +17784,12 @@ class HorizontalReduction {
1777817784
// Emit code for constant values.
1777917785
if (Candidates.size() > 1 && allConstant(Candidates)) {
1778017786
Value *Res = Candidates.front();
17781-
++VectorizedVals.try_emplace(Candidates.front(), 0).first->getSecond();
17787+
Value *OrigV = TrackedToOrig.at(Candidates.front());
17788+
++VectorizedVals.try_emplace(OrigV).first->getSecond();
1778217789
for (Value *VC : ArrayRef(Candidates).drop_front()) {
1778317790
Res = createOp(Builder, RdxKind, Res, VC, "const.rdx", ReductionOps);
17784-
++VectorizedVals.try_emplace(VC, 0).first->getSecond();
17791+
Value *OrigV = TrackedToOrig.at(VC);
17792+
++VectorizedVals.try_emplace(OrigV).first->getSecond();
1778517793
if (auto *ResI = dyn_cast<Instruction>(Res))
1778617794
V.analyzedReductionRoot(ResI);
1778717795
}
@@ -17802,8 +17810,10 @@ class HorizontalReduction {
1780217810
// Gather same values.
1780317811
MapVector<Value *, unsigned> SameValuesCounter;
1780417812
if (IsSupportedHorRdxIdentityOp)
17805-
for (Value *V : Candidates)
17806-
++SameValuesCounter.insert(std::make_pair(V, 0)).first->second;
17813+
for (Value *V : Candidates) {
17814+
Value *OrigV = TrackedToOrig.at(V);
17815+
++SameValuesCounter.try_emplace(OrigV).first->second;
17816+
}
1780717817
// Used to check if the reduced values used same number of times. In this
1780817818
// case the compiler may produce better code. E.g. if reduced values are
1780917819
// aabbccdd (8 x values), then the first node of the tree will have a node
@@ -17827,12 +17837,12 @@ class HorizontalReduction {
1782717837
});
1782817838
Candidates.resize(SameValuesCounter.size());
1782917839
transform(SameValuesCounter, Candidates.begin(),
17830-
[](const auto &P) { return P.first; });
17840+
[&](const auto &P) { return TrackedVals.at(P.first); });
1783117841
NumReducedVals = Candidates.size();
1783217842
// Have a reduction of the same element.
1783317843
if (NumReducedVals == 1) {
17834-
Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
17835-
unsigned Cnt = SameValuesCounter.lookup(OrigV);
17844+
Value *OrigV = TrackedToOrig.at(Candidates.front());
17845+
unsigned Cnt = At(SameValuesCounter, OrigV);
1783617846
Value *RedVal =
1783717847
emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
1783817848
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
@@ -17936,8 +17946,8 @@ class HorizontalReduction {
1793617946
if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
1793717947
continue;
1793817948
Value *V = Candidates[Cnt];
17939-
Value *OrigV = TrackedToOrig.find(V)->second;
17940-
++SameValuesCounter[OrigV];
17949+
Value *OrigV = TrackedToOrig.at(V);
17950+
++SameValuesCounter.try_emplace(OrigV).first->second;
1794117951
}
1794217952
}
1794317953
SmallPtrSet<Value *, 4> VLScalars(VL.begin(), VL.end());
@@ -17955,10 +17965,10 @@ class HorizontalReduction {
1795517965
LocalExternallyUsedValues[RdxVal];
1795617966
continue;
1795717967
}
17958-
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
17968+
Value *OrigV = TrackedToOrig.at(RdxVal);
1795917969
unsigned NumOps =
17960-
VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
17961-
if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
17970+
VectorizedVals.lookup(OrigV) + At(SameValuesCounter, OrigV);
17971+
if (NumOps != ReducedValsToOps.at(OrigV).size())
1796217972
LocalExternallyUsedValues[RdxVal];
1796317973
}
1796417974
// Do not need the list of reused scalars in regular mode anymore.
@@ -17983,9 +17993,8 @@ class HorizontalReduction {
1798317993
break;
1798417994
if (Cost >= -SLPCostThreshold) {
1798517995
V.getORE()->emit([&]() {
17986-
return OptimizationRemarkMissed(
17987-
SV_NAME, "HorSLPNotBeneficial",
17988-
ReducedValsToOps.find(VL[0])->second.front())
17996+
return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
17997+
ReducedValsToOps.at(VL[0]).front())
1798917998
<< "Vectorizing horizontal reduction is possible "
1799017999
<< "but not beneficial with cost " << ore::NV("Cost", Cost)
1799118000
<< " and threshold "
@@ -17999,9 +18008,8 @@ class HorizontalReduction {
1799918008
LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
1800018009
<< Cost << ". (HorRdx)\n");
1800118010
V.getORE()->emit([&]() {
18002-
return OptimizationRemark(
18003-
SV_NAME, "VectorizedHorizontalReduction",
18004-
ReducedValsToOps.find(VL[0])->second.front())
18011+
return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
18012+
ReducedValsToOps.at(VL[0]).front())
1800518013
<< "Vectorized horizontal reduction with cost "
1800618014
<< ore::NV("Cost", Cost) << " and with tree size "
1800718015
<< ore::NV("TreeSize", V.getTreeSize());
@@ -18083,12 +18091,12 @@ class HorizontalReduction {
1808318091
VectorizedTree = GetNewVectorizedTree(VectorizedTree, ReducedSubTree);
1808418092
// Count vectorized reduced values to exclude them from final reduction.
1808518093
for (Value *RdxVal : VL) {
18086-
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
18094+
Value *OrigV = TrackedToOrig.at(RdxVal);
1808718095
if (IsSupportedHorRdxIdentityOp) {
18088-
VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal]);
18096+
VectorizedVals.try_emplace(OrigV, At(SameValuesCounter, OrigV));
1808918097
continue;
1809018098
}
18091-
++VectorizedVals.try_emplace(OrigV, 0).first->getSecond();
18099+
++VectorizedVals.try_emplace(OrigV).first->getSecond();
1809218100
if (!V.isVectorized(RdxVal))
1809318101
RequiredExtract.insert(RdxVal);
1809418102
}
@@ -18099,10 +18107,10 @@ class HorizontalReduction {
1809918107
}
1810018108
if (OptReusedScalars && !AnyVectorized) {
1810118109
for (const std::pair<Value *, unsigned> &P : SameValuesCounter) {
18102-
Value *RedVal = emitScaleForReusedOps(P.first, Builder, P.second);
18110+
Value *RdxVal = TrackedVals.at(P.first);
18111+
Value *RedVal = emitScaleForReusedOps(RdxVal, Builder, P.second);
1810318112
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
18104-
Value *OrigV = TrackedToOrig.find(P.first)->second;
18105-
VectorizedVals.try_emplace(OrigV, P.second);
18113+
VectorizedVals.try_emplace(P.first, P.second);
1810618114
}
1810718115
continue;
1810818116
}
@@ -18190,8 +18198,7 @@ class HorizontalReduction {
1819018198
continue;
1819118199
unsigned NumOps = VectorizedVals.lookup(RdxVal);
1819218200
for (Instruction *RedOp :
18193-
ArrayRef(ReducedValsToOps.find(RdxVal)->second)
18194-
.drop_back(NumOps))
18201+
ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
1819518202
ExtraReductions.emplace_back(RedOp, RdxVal);
1819618203
}
1819718204
}
@@ -18430,7 +18437,7 @@ class HorizontalReduction {
1843018437
// root = mul prev_root, <1, 1, n, 1>
1843118438
SmallVector<Constant *> Vals;
1843218439
for (Value *V : VL) {
18433-
unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
18440+
unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.at(V));
1843418441
Vals.push_back(ConstantInt::get(V->getType(), Cnt, /*IsSigned=*/false));
1843518442
}
1843618443
auto *Scale = ConstantVector::get(Vals);
@@ -18468,7 +18475,7 @@ class HorizontalReduction {
1846818475
bool NeedShuffle = false;
1846918476
for (unsigned I = 0, VF = VL.size(); I < VF; ++I) {
1847018477
Value *V = VL[I];
18471-
unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
18478+
unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.at(V));
1847218479
if (Cnt % 2 == 0) {
1847318480
Mask[I] = VF;
1847418481
NeedShuffle = true;
@@ -18488,7 +18495,7 @@ class HorizontalReduction {
1848818495
// root = fmul prev_root, <1.0, 1.0, n.0, 1.0>
1848918496
SmallVector<Constant *> Vals;
1849018497
for (Value *V : VL) {
18491-
unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
18498+
unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.at(V));
1849218499
Vals.push_back(ConstantFP::get(V->getType(), Cnt));
1849318500
}
1849418501
auto *Scale = ConstantVector::get(Vals);
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i8 @test() {
5+
; CHECK-LABEL: define i8 @test() {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 0 to i8
8+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 0 to i8
9+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 0 to i8
10+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 0 to i8
11+
; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> zeroinitializer)
12+
; CHECK-NEXT: [[OP_RDX:%.*]] = or i8 [[TMP4]], [[TMP0]]
13+
; CHECK-NEXT: [[OP_RDX1:%.*]] = or i8 [[OP_RDX]], [[TMP2]]
14+
; CHECK-NEXT: [[OP_RDX2:%.*]] = or i8 [[OP_RDX1]], [[TMP0]]
15+
; CHECK-NEXT: [[OP_RDX3:%.*]] = or i8 [[OP_RDX2]], [[TMP1]]
16+
; CHECK-NEXT: [[OP_RDX4:%.*]] = or i8 [[OP_RDX3]], [[TMP3]]
17+
; CHECK-NEXT: ret i8 [[OP_RDX4]]
18+
;
19+
entry:
20+
%0 = trunc i32 0 to i8
21+
%1 = add i8 %0, 0
22+
%2 = add i8 %0, 0
23+
%3 = add i8 %0, 0
24+
%4 = add i8 %0, 0
25+
%5 = trunc i32 0 to i8
26+
%6 = or i8 %5, %0
27+
%7 = or i8 %6, %2
28+
%8 = or i8 %7, %3
29+
%9 = or i8 %8, %0
30+
%10 = or i8 %9, %4
31+
%conv4 = or i8 %10, %1
32+
%11 = trunc i32 0 to i8
33+
%12 = add i8 %11, 0
34+
%conv7 = or i8 %conv4, %12
35+
%13 = add i8 %11, 0
36+
%14 = add i8 %11, 0
37+
%15 = add i8 %11, 0
38+
%16 = trunc i32 0 to i8
39+
%17 = or i8 %13, %16
40+
%18 = or i8 %17, %14
41+
%19 = or i8 %18, %11
42+
%20 = or i8 %19, %15
43+
%conv5 = or i8 %20, %conv7
44+
%21 = trunc i32 0 to i8
45+
%conv6 = or i8 %21, %conv5
46+
ret i8 %conv6
47+
}

0 commit comments

Comments
 (0)