Skip to content

Commit 6fbd1ad

Browse files
committed
add hasState check
1 parent c8e7c61 commit 6fbd1ad

File tree

2 files changed

+29
-3
lines changed

2 files changed

+29
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9897,6 +9897,7 @@ void BoUpSLP::transformNodes() {
98979897
buildTree_rec(Slice, 0, EdgeInfo(&E, UINT_MAX));
98989898
if (PrevSize + 1 == VectorizableTree.size() &&
98999899
VectorizableTree[PrevSize]->isGather() &&
9900+
VectorizableTree[PrevSize]->hasState() &&
99009901
VectorizableTree[PrevSize]->getOpcode() !=
99019902
Instruction::ExtractElement &&
99029903
!isSplat(Slice)) {
@@ -13608,9 +13609,11 @@ BoUpSLP::isGatherShuffledEntry(
1360813609
if (!TE->UserTreeIndices.empty() &&
1360913610
TE->UserTreeIndices.front().UserTE->isGather() &&
1361013611
TE->UserTreeIndices.front().EdgeIdx == UINT_MAX) {
13611-
assert((TE->Idx == 0 || TE->getOpcode() == Instruction::ExtractElement ||
13612-
isSplat(TE->Scalars)) &&
13613-
"Expected splat or extractelements only node.");
13612+
assert(
13613+
(TE->Idx == 0 ||
13614+
(TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) ||
13615+
isSplat(TE->Scalars)) &&
13616+
"Expected splat or extractelements only node.");
1361413617
return {};
1361513618
}
1361613619
unsigned SliceSize = getPartNumElems(VL.size(), NumParts);

llvm/test/Transforms/SLPVectorizer/InstructionsState-is-invalid-0.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,29 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i6
55
target triple = "aarch64-unknown-linux-gnu"
66

77
define void @foo(ptr %0) {
8+
; CHECK-LABEL: @foo(
9+
; CHECK-NEXT: vector.scevcheck:
10+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0:%.*]], i64 4
11+
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr null, i64 4
12+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 1
13+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[SCEVGEP]], i32 0
14+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
15+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x ptr> [[TMP3]], zeroinitializer
16+
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP4]], zeroinitializer
17+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
18+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[SCEVGEP3]], i32 1
19+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
20+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x ptr> [[TMP8]], zeroinitializer
21+
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP9]], zeroinitializer
22+
; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
23+
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
24+
; CHECK-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP11]], [[TMP12]]
25+
; CHECK-NEXT: br i1 [[OP_RDX]], label [[DOTLR_PH:%.*]], label [[VECTOR_PH:%.*]]
26+
; CHECK: vector.ph:
27+
; CHECK-NEXT: ret void
28+
; CHECK: .lr.ph:
29+
; CHECK-NEXT: ret void
30+
;
831
vector.scevcheck:
932
%scevgep = getelementptr i8, ptr %0, i64 4
1033
%scevgep3 = getelementptr i8, ptr null, i64 4

0 commit comments

Comments
 (0)