Skip to content

Commit 6ca5a41

Browse files
committed
[SLP]Fix PR87358: broken module, Instruction does not dominate all uses.
If the first node is a gather node with extractelement instructions, still need to put the vector value after all instructions, not after the very first one.
1 parent 2bf4889 commit 6ca5a41

File tree

2 files changed

+25
-3
lines changed

2 files changed

+25
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10736,9 +10736,13 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1073610736
[](Value *V) {
1073710737
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
1073810738
})) ||
10739-
all_of(E->Scalars, [](Value *V) {
10740-
return !isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V);
10741-
}))
10739+
all_of(E->Scalars,
10740+
[](Value *V) {
10741+
return !isVectorLikeInstWithConstOps(V) &&
10742+
isUsedOutsideBlock(V);
10743+
}) ||
10744+
(E->State == TreeEntry::NeedToGather && E->Idx == 0 &&
10745+
all_of(E->Scalars, IsaPred<ExtractElementInst, UndefValue>)))
1074210746
Res.second = FindLastInst();
1074310747
else
1074410748
Res.second = FindFirstInst();
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: bb:
7+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> zeroinitializer, i32 0
8+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0
9+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 0, i32 undef>, i32 [[TMP1]], i32 1
10+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[TMP0]], [[TMP1]]
11+
; CHECK-NEXT: ret void
12+
;
13+
bb:
14+
%0 = extractelement <4 x i32> zeroinitializer, i32 0
15+
%1 = extractelement <2 x i32> zeroinitializer, i32 0
16+
%icmp = icmp ult i32 %0, %1
17+
ret void
18+
}

0 commit comments

Comments
 (0)