Skip to content

Commit e7080fd

Browse files
committed
[SLP]Extra check if the intruction matked for removal, must be replaced in reduction ops
If the instruction is vectorized and it is a part of the reduced values gather/buildvector node, it should replaced in reduced operation instructions before removal properly, to avoid compiler crash. Fixes #114371
1 parent 71cfa38 commit e7080fd

File tree

2 files changed

+57
-5
lines changed

2 files changed

+57
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16127,11 +16127,13 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
1612716127
if (IE->Idx != 0 &&
1612816128
!(VectorizableTree.front()->isGather() &&
1612916129
!IE->UserTreeIndices.empty() &&
16130-
any_of(IE->UserTreeIndices,
16131-
[&](const EdgeInfo &EI) {
16132-
return EI.UserTE == VectorizableTree.front().get() &&
16133-
EI.EdgeIdx == UINT_MAX;
16134-
})) &&
16130+
(ValueToGatherNodes.lookup(I).contains(
16131+
VectorizableTree.front().get()) ||
16132+
any_of(IE->UserTreeIndices,
16133+
[&](const EdgeInfo &EI) {
16134+
return EI.UserTE == VectorizableTree.front().get() &&
16135+
EI.EdgeIdx == UINT_MAX;
16136+
}))) &&
1613516137
!(GatheredLoadsEntriesFirst.has_value() &&
1613616138
IE->Idx >= *GatheredLoadsEntriesFirst &&
1613716139
VectorizableTree.front()->isGather() &&
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test(ptr %c, i16 %a, i16 %0) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: ptr [[C:%.*]], i16 [[A:%.*]], i16 [[TMP0:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
9+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
11+
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i16>
12+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <2 x i32> <i32 poison, i32 0>
13+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[TMP0]], i32 0
14+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
15+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i16> [[TMP7]], [[TMP4]]
16+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i16 [[A]], -2
17+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 poison>
18+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP9]], i32 7
19+
; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP11]], <4 x i1> [[TMP8]], i64 0)
20+
; CHECK-NEXT: [[TMP13:%.*]] = freeze <8 x i1> [[TMP12]]
21+
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP13]])
22+
; CHECK-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
23+
; CHECK-NEXT: store i32 [[TMP15]], ptr [[C]], align 4
24+
; CHECK-NEXT: ret i32 0
25+
;
26+
entry:
27+
%tobool = icmp ne i16 %a, 0
28+
%1 = zext i1 %tobool to i16
29+
%cmp3 = icmp ugt i16 %0, %1
30+
%2 = and i1 %tobool, %cmp3
31+
%tobool.1 = icmp ne i16 %a, 0
32+
%3 = zext i1 %tobool.1 to i16
33+
%cmp3.1 = icmp ugt i16 %0, %3
34+
%4 = and i1 %tobool.1, %cmp3.1
35+
%5 = select i1 %2, i1 %4, i1 false
36+
%tobool.2 = icmp ne i16 %a, 0
37+
%6 = zext i1 %tobool.2 to i16
38+
%cmp3.2 = icmp ugt i16 %0, %6
39+
%7 = and i1 %tobool.2, %cmp3.2
40+
%8 = select i1 %5, i1 %7, i1 false
41+
%tobool.3 = icmp ne i16 %a, 0
42+
%9 = zext i1 %tobool.3 to i16
43+
%cmp3.3 = icmp ugt i16 %a, %9
44+
%10 = icmp ult i16 %a, -2
45+
%11 = and i1 %10, %cmp3.3
46+
%12 = select i1 %8, i1 %11, i1 false
47+
%13 = zext i1 %12 to i32
48+
store i32 %13, ptr %c, align 4
49+
ret i32 0
50+
}

0 commit comments

Comments
 (0)