Skip to content

Commit 98bb354

Browse files
committed
[SLP]Fix PR107037: correctly track origonal/modified after vectorizations reduced values
Need to correctly track reduced values with multiple uses in the same reduction emission attempt. Otherwise, the number of the reuses might be calculated incorrectly, and may cause compiler crash. Fixes #107037
1 parent 7d3b81d commit 98bb354

File tree

2 files changed

+66
-14
lines changed

2 files changed

+66
-14
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17778,10 +17778,12 @@ class HorizontalReduction {
1777817778
// Emit code for constant values.
1777917779
if (Candidates.size() > 1 && allConstant(Candidates)) {
1778017780
Value *Res = Candidates.front();
17781-
++VectorizedVals.try_emplace(Candidates.front(), 0).first->getSecond();
17781+
Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
17782+
++VectorizedVals.try_emplace(OrigV).first->getSecond();
1778217783
for (Value *VC : ArrayRef(Candidates).drop_front()) {
1778317784
Res = createOp(Builder, RdxKind, Res, VC, "const.rdx", ReductionOps);
17784-
++VectorizedVals.try_emplace(VC, 0).first->getSecond();
17785+
Value *OrigV = TrackedToOrig.find(VC)->second;
17786+
++VectorizedVals.try_emplace(OrigV).first->getSecond();
1778517787
if (auto *ResI = dyn_cast<Instruction>(Res))
1778617788
V.analyzedReductionRoot(ResI);
1778717789
}
@@ -17802,8 +17804,10 @@ class HorizontalReduction {
1780217804
// Gather same values.
1780317805
MapVector<Value *, unsigned> SameValuesCounter;
1780417806
if (IsSupportedHorRdxIdentityOp)
17805-
for (Value *V : Candidates)
17806-
++SameValuesCounter.insert(std::make_pair(V, 0)).first->second;
17807+
for (Value *V : Candidates) {
17808+
Value *OrigV = TrackedToOrig.find(V)->second;
17809+
++SameValuesCounter.try_emplace(OrigV).first->second;
17810+
}
1780717811
// Used to check if the reduced values used same number of times. In this
1780817812
// case the compiler may produce better code. E.g. if reduced values are
1780917813
// aabbccdd (8 x values), then the first node of the tree will have a node
@@ -17827,12 +17831,12 @@ class HorizontalReduction {
1782717831
});
1782817832
Candidates.resize(SameValuesCounter.size());
1782917833
transform(SameValuesCounter, Candidates.begin(),
17830-
[](const auto &P) { return P.first; });
17834+
[&](const auto &P) { return TrackedVals.at(P.first); });
1783117835
NumReducedVals = Candidates.size();
1783217836
// Have a reduction of the same element.
1783317837
if (NumReducedVals == 1) {
1783417838
Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
17835-
unsigned Cnt = SameValuesCounter.lookup(OrigV);
17839+
unsigned Cnt = SameValuesCounter.find(OrigV)->second;
1783617840
Value *RedVal =
1783717841
emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
1783817842
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
@@ -17937,7 +17941,7 @@ class HorizontalReduction {
1793717941
continue;
1793817942
Value *V = Candidates[Cnt];
1793917943
Value *OrigV = TrackedToOrig.find(V)->second;
17940-
++SameValuesCounter[OrigV];
17944+
++SameValuesCounter.find(OrigV)->second;
1794117945
}
1794217946
}
1794317947
SmallPtrSet<Value *, 4> VLScalars(VL.begin(), VL.end());
@@ -17956,8 +17960,8 @@ class HorizontalReduction {
1795617960
continue;
1795717961
}
1795817962
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
17959-
unsigned NumOps =
17960-
VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
17963+
unsigned NumOps = VectorizedVals.lookup(OrigV) +
17964+
SameValuesCounter.find(OrigV)->second;
1796117965
if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
1796217966
LocalExternallyUsedValues[RdxVal];
1796317967
}
@@ -18085,10 +18089,11 @@ class HorizontalReduction {
1808518089
for (Value *RdxVal : VL) {
1808618090
Value *OrigV = TrackedToOrig.find(RdxVal)->second;
1808718091
if (IsSupportedHorRdxIdentityOp) {
18088-
VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal]);
18092+
VectorizedVals.try_emplace(OrigV,
18093+
SameValuesCounter.find(OrigV)->second);
1808918094
continue;
1809018095
}
18091-
++VectorizedVals.try_emplace(OrigV, 0).first->getSecond();
18096+
++VectorizedVals.try_emplace(OrigV).first->getSecond();
1809218097
if (!V.isVectorized(RdxVal))
1809318098
RequiredExtract.insert(RdxVal);
1809418099
}
@@ -18099,10 +18104,10 @@ class HorizontalReduction {
1809918104
}
1810018105
if (OptReusedScalars && !AnyVectorized) {
1810118106
for (const std::pair<Value *, unsigned> &P : SameValuesCounter) {
18102-
Value *RedVal = emitScaleForReusedOps(P.first, Builder, P.second);
18107+
Value *RdxVal = TrackedVals.find(P.first)->second;
18108+
Value *RedVal = emitScaleForReusedOps(RdxVal, Builder, P.second);
1810318109
VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
18104-
Value *OrigV = TrackedToOrig.find(P.first)->second;
18105-
VectorizedVals.try_emplace(OrigV, P.second);
18110+
VectorizedVals.try_emplace(P.first, P.second);
1810618111
}
1810718112
continue;
1810818113
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i8 @test() {
5+
; CHECK-LABEL: define i8 @test() {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 0 to i8
8+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 0 to i8
9+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 0 to i8
10+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 0 to i8
11+
; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> zeroinitializer)
12+
; CHECK-NEXT: [[OP_RDX:%.*]] = or i8 [[TMP4]], [[TMP0]]
13+
; CHECK-NEXT: [[OP_RDX1:%.*]] = or i8 [[OP_RDX]], [[TMP2]]
14+
; CHECK-NEXT: [[OP_RDX2:%.*]] = or i8 [[OP_RDX1]], [[TMP0]]
15+
; CHECK-NEXT: [[OP_RDX3:%.*]] = or i8 [[OP_RDX2]], [[TMP1]]
16+
; CHECK-NEXT: [[OP_RDX4:%.*]] = or i8 [[OP_RDX3]], [[TMP3]]
17+
; CHECK-NEXT: ret i8 [[OP_RDX4]]
18+
;
19+
entry:
20+
%0 = trunc i32 0 to i8
21+
%1 = add i8 %0, 0
22+
%2 = add i8 %0, 0
23+
%3 = add i8 %0, 0
24+
%4 = add i8 %0, 0
25+
%5 = trunc i32 0 to i8
26+
%6 = or i8 %5, %0
27+
%7 = or i8 %6, %2
28+
%8 = or i8 %7, %3
29+
%9 = or i8 %8, %0
30+
%10 = or i8 %9, %4
31+
%conv4 = or i8 %10, %1
32+
%11 = trunc i32 0 to i8
33+
%12 = add i8 %11, 0
34+
%conv7 = or i8 %conv4, %12
35+
%13 = add i8 %11, 0
36+
%14 = add i8 %11, 0
37+
%15 = add i8 %11, 0
38+
%16 = trunc i32 0 to i8
39+
%17 = or i8 %13, %16
40+
%18 = or i8 %17, %14
41+
%19 = or i8 %18, %11
42+
%20 = or i8 %19, %15
43+
%conv5 = or i8 %20, %conv7
44+
%21 = trunc i32 0 to i8
45+
%conv6 = or i8 %21, %conv5
46+
ret i8 %conv6
47+
}

0 commit comments

Comments
 (0)