@@ -17721,6 +17721,12 @@ class HorizontalReduction {
17721
17721
for (Value *V : Candidates)
17722
17722
TrackedVals.try_emplace(V, V);
17723
17723
17724
+ auto At = [](MapVector<Value *, unsigned> &MV, Value *V) -> unsigned & {
17725
+ auto *It = MV.find(V);
17726
+ assert(It != MV.end() && "Unable to find given key.");
17727
+ return It->second;
17728
+ };
17729
+
17724
17730
DenseMap<Value *, unsigned> VectorizedVals(ReducedVals.size());
17725
17731
// List of the values that were reduced in other trees as part of gather
17726
17732
// nodes and thus requiring extract if fully vectorized in other trees.
@@ -17738,7 +17744,7 @@ class HorizontalReduction {
17738
17744
Candidates.reserve(2 * OrigReducedVals.size());
17739
17745
DenseMap<Value *, Value *> TrackedToOrig(2 * OrigReducedVals.size());
17740
17746
for (unsigned Cnt = 0, Sz = OrigReducedVals.size(); Cnt < Sz; ++Cnt) {
17741
- Value *RdxVal = TrackedVals.find (OrigReducedVals[Cnt])->second ;
17747
+ Value *RdxVal = TrackedVals.at (OrigReducedVals[Cnt]);
17742
17748
// Check if the reduction value was not overriden by the extractelement
17743
17749
// instruction because of the vectorization and exclude it, if it is not
17744
17750
// compatible with other values.
@@ -17757,7 +17763,7 @@ class HorizontalReduction {
17757
17763
I + 1 < E) {
17758
17764
SmallVector<Value *> CommonCandidates(Candidates);
17759
17765
for (Value *RV : ReducedVals[I + 1]) {
17760
- Value *RdxVal = TrackedVals.find (RV)->second ;
17766
+ Value *RdxVal = TrackedVals.at (RV);
17761
17767
// Check if the reduction value was not overriden by the
17762
17768
// extractelement instruction because of the vectorization and
17763
17769
// exclude it, if it is not compatible with other values.
@@ -17778,10 +17784,12 @@ class HorizontalReduction {
17778
17784
// Emit code for constant values.
17779
17785
if (Candidates.size() > 1 && allConstant(Candidates)) {
17780
17786
Value *Res = Candidates.front();
17781
- ++VectorizedVals.try_emplace(Candidates.front(), 0).first->getSecond();
17787
+ Value *OrigV = TrackedToOrig.at(Candidates.front());
17788
+ ++VectorizedVals.try_emplace(OrigV).first->getSecond();
17782
17789
for (Value *VC : ArrayRef(Candidates).drop_front()) {
17783
17790
Res = createOp(Builder, RdxKind, Res, VC, "const.rdx", ReductionOps);
17784
- ++VectorizedVals.try_emplace(VC, 0).first->getSecond();
17791
+ Value *OrigV = TrackedToOrig.at(VC);
17792
+ ++VectorizedVals.try_emplace(OrigV).first->getSecond();
17785
17793
if (auto *ResI = dyn_cast<Instruction>(Res))
17786
17794
V.analyzedReductionRoot(ResI);
17787
17795
}
@@ -17802,8 +17810,10 @@ class HorizontalReduction {
17802
17810
// Gather same values.
17803
17811
MapVector<Value *, unsigned> SameValuesCounter;
17804
17812
if (IsSupportedHorRdxIdentityOp)
17805
- for (Value *V : Candidates)
17806
- ++SameValuesCounter.insert(std::make_pair(V, 0)).first->second;
17813
+ for (Value *V : Candidates) {
17814
+ Value *OrigV = TrackedToOrig.at(V);
17815
+ ++SameValuesCounter.try_emplace(OrigV).first->second;
17816
+ }
17807
17817
// Used to check if the reduced values used same number of times. In this
17808
17818
// case the compiler may produce better code. E.g. if reduced values are
17809
17819
// aabbccdd (8 x values), then the first node of the tree will have a node
@@ -17827,12 +17837,12 @@ class HorizontalReduction {
17827
17837
});
17828
17838
Candidates.resize(SameValuesCounter.size());
17829
17839
transform(SameValuesCounter, Candidates.begin(),
17830
- [](const auto &P) { return P.first; });
17840
+ [& ](const auto &P) { return TrackedVals.at( P.first) ; });
17831
17841
NumReducedVals = Candidates.size();
17832
17842
// Have a reduction of the same element.
17833
17843
if (NumReducedVals == 1) {
17834
- Value *OrigV = TrackedToOrig.find (Candidates.front())->second ;
17835
- unsigned Cnt = SameValuesCounter.lookup( OrigV);
17844
+ Value *OrigV = TrackedToOrig.at (Candidates.front());
17845
+ unsigned Cnt = At(SameValuesCounter, OrigV);
17836
17846
Value *RedVal =
17837
17847
emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
17838
17848
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
@@ -17936,8 +17946,8 @@ class HorizontalReduction {
17936
17946
if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
17937
17947
continue;
17938
17948
Value *V = Candidates[Cnt];
17939
- Value *OrigV = TrackedToOrig.find (V)->second ;
17940
- ++SameValuesCounter[ OrigV] ;
17949
+ Value *OrigV = TrackedToOrig.at (V);
17950
+ ++SameValuesCounter.try_emplace( OrigV).first->second ;
17941
17951
}
17942
17952
}
17943
17953
SmallPtrSet<Value *, 4> VLScalars(VL.begin(), VL.end());
@@ -17955,10 +17965,10 @@ class HorizontalReduction {
17955
17965
LocalExternallyUsedValues[RdxVal];
17956
17966
continue;
17957
17967
}
17958
- Value *OrigV = TrackedToOrig.find (RdxVal)->second ;
17968
+ Value *OrigV = TrackedToOrig.at (RdxVal);
17959
17969
unsigned NumOps =
17960
- VectorizedVals.lookup(RdxVal ) + SameValuesCounter[ OrigV] ;
17961
- if (NumOps != ReducedValsToOps.find (OrigV)->second .size())
17970
+ VectorizedVals.lookup(OrigV ) + At( SameValuesCounter, OrigV) ;
17971
+ if (NumOps != ReducedValsToOps.at (OrigV).size())
17962
17972
LocalExternallyUsedValues[RdxVal];
17963
17973
}
17964
17974
// Do not need the list of reused scalars in regular mode anymore.
@@ -17983,9 +17993,8 @@ class HorizontalReduction {
17983
17993
break;
17984
17994
if (Cost >= -SLPCostThreshold) {
17985
17995
V.getORE()->emit([&]() {
17986
- return OptimizationRemarkMissed(
17987
- SV_NAME, "HorSLPNotBeneficial",
17988
- ReducedValsToOps.find(VL[0])->second.front())
17996
+ return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
17997
+ ReducedValsToOps.at(VL[0]).front())
17989
17998
<< "Vectorizing horizontal reduction is possible "
17990
17999
<< "but not beneficial with cost " << ore::NV("Cost", Cost)
17991
18000
<< " and threshold "
@@ -17999,9 +18008,8 @@ class HorizontalReduction {
17999
18008
LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
18000
18009
<< Cost << ". (HorRdx)\n");
18001
18010
V.getORE()->emit([&]() {
18002
- return OptimizationRemark(
18003
- SV_NAME, "VectorizedHorizontalReduction",
18004
- ReducedValsToOps.find(VL[0])->second.front())
18011
+ return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
18012
+ ReducedValsToOps.at(VL[0]).front())
18005
18013
<< "Vectorized horizontal reduction with cost "
18006
18014
<< ore::NV("Cost", Cost) << " and with tree size "
18007
18015
<< ore::NV("TreeSize", V.getTreeSize());
@@ -18083,12 +18091,12 @@ class HorizontalReduction {
18083
18091
VectorizedTree = GetNewVectorizedTree(VectorizedTree, ReducedSubTree);
18084
18092
// Count vectorized reduced values to exclude them from final reduction.
18085
18093
for (Value *RdxVal : VL) {
18086
- Value *OrigV = TrackedToOrig.find (RdxVal)->second ;
18094
+ Value *OrigV = TrackedToOrig.at (RdxVal);
18087
18095
if (IsSupportedHorRdxIdentityOp) {
18088
- VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal] );
18096
+ VectorizedVals.try_emplace(OrigV, At( SameValuesCounter, OrigV) );
18089
18097
continue;
18090
18098
}
18091
- ++VectorizedVals.try_emplace(OrigV, 0 ).first->getSecond();
18099
+ ++VectorizedVals.try_emplace(OrigV).first->getSecond();
18092
18100
if (!V.isVectorized(RdxVal))
18093
18101
RequiredExtract.insert(RdxVal);
18094
18102
}
@@ -18099,10 +18107,10 @@ class HorizontalReduction {
18099
18107
}
18100
18108
if (OptReusedScalars && !AnyVectorized) {
18101
18109
for (const std::pair<Value *, unsigned> &P : SameValuesCounter) {
18102
- Value *RedVal = emitScaleForReusedOps(P.first, Builder, P.second);
18110
+ Value *RdxVal = TrackedVals.at(P.first);
18111
+ Value *RedVal = emitScaleForReusedOps(RdxVal, Builder, P.second);
18103
18112
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
18104
- Value *OrigV = TrackedToOrig.find(P.first)->second;
18105
- VectorizedVals.try_emplace(OrigV, P.second);
18113
+ VectorizedVals.try_emplace(P.first, P.second);
18106
18114
}
18107
18115
continue;
18108
18116
}
@@ -18190,8 +18198,7 @@ class HorizontalReduction {
18190
18198
continue;
18191
18199
unsigned NumOps = VectorizedVals.lookup(RdxVal);
18192
18200
for (Instruction *RedOp :
18193
- ArrayRef(ReducedValsToOps.find(RdxVal)->second)
18194
- .drop_back(NumOps))
18201
+ ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
18195
18202
ExtraReductions.emplace_back(RedOp, RdxVal);
18196
18203
}
18197
18204
}
@@ -18430,7 +18437,7 @@ class HorizontalReduction {
18430
18437
// root = mul prev_root, <1, 1, n, 1>
18431
18438
SmallVector<Constant *> Vals;
18432
18439
for (Value *V : VL) {
18433
- unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find (V)->second );
18440
+ unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.at (V));
18434
18441
Vals.push_back(ConstantInt::get(V->getType(), Cnt, /*IsSigned=*/false));
18435
18442
}
18436
18443
auto *Scale = ConstantVector::get(Vals);
@@ -18468,7 +18475,7 @@ class HorizontalReduction {
18468
18475
bool NeedShuffle = false;
18469
18476
for (unsigned I = 0, VF = VL.size(); I < VF; ++I) {
18470
18477
Value *V = VL[I];
18471
- unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find (V)->second );
18478
+ unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.at (V));
18472
18479
if (Cnt % 2 == 0) {
18473
18480
Mask[I] = VF;
18474
18481
NeedShuffle = true;
@@ -18488,7 +18495,7 @@ class HorizontalReduction {
18488
18495
// root = fmul prev_root, <1.0, 1.0, n.0, 1.0>
18489
18496
SmallVector<Constant *> Vals;
18490
18497
for (Value *V : VL) {
18491
- unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find (V)->second );
18498
+ unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.at (V));
18492
18499
Vals.push_back(ConstantFP::get(V->getType(), Cnt));
18493
18500
}
18494
18501
auto *Scale = ConstantVector::get(Vals);
0 commit comments