Skip to content

Commit 8d7a6e2

Browse files
committed
[SLP]Fix a crash for gather node with instructions from different bbs,
if cost threshold is very low.
1 parent 6898147 commit 8d7a6e2

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9275,11 +9275,16 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
92759275

92769276
// Check if any of the gather node forms an insertelement buildvector
92779277
// somewhere.
9278-
if (any_of(VectorizableTree, [](const std::unique_ptr<TreeEntry> &TE) {
9278+
bool IsAllowedSingleBVNode =
9279+
VectorizableTree.size() > 1 ||
9280+
(VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() &&
9281+
allSameBlock(VectorizableTree.front()->Scalars));
9282+
if (any_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
92799283
return TE->State == TreeEntry::NeedToGather &&
9280-
all_of(TE->Scalars, [](Value *V) {
9284+
all_of(TE->Scalars, [&](Value *V) {
92819285
return isa<ExtractElementInst, UndefValue>(V) ||
9282-
(!V->hasNUsesOrMore(UsesLimit) &&
9286+
(IsAllowedSingleBVNode &&
9287+
!V->hasNUsesOrMore(UsesLimit) &&
92839288
any_of(V->users(), [](User *U) {
92849289
return isa<InsertElementInst>(U);
92859290
}));
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux -mattr="-avx512pf,+avx512f,+avx512bw" -slp-threshold=-100 < %s | FileCheck %s
3+
4+
define i1 @foo(i32 %a) {
5+
; CHECK-LABEL: @foo(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[A:%.*]]
8+
; CHECK-NEXT: br label [[BB4:%.*]]
9+
; CHECK: bb1:
10+
; CHECK-NEXT: [[LOCAL:%.*]] = sub nsw i32 0, 0
11+
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
12+
; CHECK-NEXT: [[ADD:%.*]] = icmp eq i32 [[TMP0]], [[LOCAL]]
13+
; CHECK-NEXT: ret i1 [[ADD]]
14+
;
15+
entry:
16+
%0 = sub nsw i32 0, %a
17+
br label %bb1
18+
19+
bb1:
20+
%local = sub nsw i32 0, 0
21+
%ins1 = insertelement <2 x i32> poison, i32 %0, i32 0
22+
%add = icmp eq i32 %0, %local
23+
ret i1 %add
24+
}

0 commit comments

Comments
 (0)