Skip to content

Commit e8e6795

Browse files
committed
[SLP]Fix PR88123: use vectorized operands consistently.
Need to use vectorized operands, not the vecop of the extractelement instructions, to avoid false detection of the extra vector operand in the extractelements shuffling.
1 parent 4ac2721 commit e8e6795

File tree

2 files changed

+36
-4
lines changed

2 files changed

+36
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11755,8 +11755,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1175511755
VecOp = TE->VectorizedValue;
1175611756
if (!Vec1) {
1175711757
Vec1 = VecOp;
11758-
} else if (Vec1 != EI->getVectorOperand()) {
11759-
assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
11758+
} else if (Vec1 != VecOp) {
11759+
assert((!Vec2 || Vec2 == VecOp) &&
1176011760
"Expected only 1 or 2 vectors shuffle.");
1176111761
Vec2 = VecOp;
1176211762
}
@@ -11796,8 +11796,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1179611796
VecMask.assign(VecMask.size(), PoisonMaskElem);
1179711797
copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
1179811798
if (TEs.size() == 1) {
11799-
IsUsedInExpr &=
11800-
FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
11799+
IsUsedInExpr &= FindReusedSplat(
11800+
VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
1180111801
ShuffleBuilder.add(*TEs.front(), VecMask);
1180211802
if (TEs.front()->VectorizedValue)
1180311803
IsNonPoisoned &=
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt --passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s
3+
4+
define i32 @test() {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: br label [[TMP1:%.*]]
8+
; CHECK: 1:
9+
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[TMP1]] ]
10+
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
11+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
12+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> [[TMP4]], <4 x i32> <i32 0, i32 8, i32 poison, i32 8>
13+
; CHECK-NEXT: [[TMP6]] = shufflevector <4 x double> [[TMP5]], <4 x double> <double poison, double poison, double 0.000000e+00, double poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
14+
; CHECK-NEXT: br label [[TMP1]]
15+
;
16+
br label %1
17+
18+
1:
19+
%.i489 = phi double [ 0.000000e+00, %0 ], [ 0.000000e+00, %1 ]
20+
%.i1102 = phi double [ 0.000000e+00, %0 ], [ %.i1110, %1 ]
21+
%.i4105 = phi double [ 0.000000e+00, %0 ], [ %.i4113, %1 ]
22+
%.i14525 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i1102)
23+
%.i24526 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i489)
24+
%.i44529 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i4105)
25+
%.upto16034 = insertelement <8 x double> zeroinitializer, double %.i14525, i64 1
26+
%.upto26035 = insertelement <8 x double> %.upto16034, double %.i24526, i64 2
27+
%.upto36036 = insertelement <8 x double> %.upto26035, double %.i14525, i64 3
28+
%.upto46037 = insertelement <8 x double> %.upto36036, double %.i44529, i64 0
29+
%.i1110 = extractelement <8 x double> %.upto46037, i64 0
30+
%.i4113 = extractelement <8 x double> zeroinitializer, i64 0
31+
br label %1
32+
}

0 commit comments

Comments
 (0)