Skip to content

Commit 4a76f88

Browse files
alexey-bataevjoaosaffran
authored andcommitted
[SLP]Check the first instruction instead the first scalar for subvectors
Need to check the first instruction instead of first scalar for subvectors, when trying to find full matched vectorized node in the graph. Fixes llvm#126909.
1 parent 1161cbe commit 4a76f88

File tree

2 files changed

+115
-6
lines changed

2 files changed

+115
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9956,12 +9956,16 @@ void BoUpSLP::transformNodes() {
99569956
};
99579957
for (auto [Cnt, Sz] : Slices) {
99589958
ArrayRef<Value *> Slice = VL.slice(Cnt, Sz);
9959-
// If any instruction is vectorized already - do not try again.
9960-
if (TreeEntry *SE = getSameValuesTreeEntry(Slice.front(), Slice,
9961-
/*SameVF=*/true)) {
9962-
SE->UserTreeIndices.emplace_back(&E, UINT_MAX);
9963-
AddCombinedNode(SE->Idx, Cnt, Sz);
9964-
continue;
9959+
if (const auto *It = find_if(Slice, IsaPred<Instruction>);
9960+
It != Slice.end()) {
9961+
// If any instruction is vectorized already - do not try again.
9962+
if (TreeEntry *SE = getSameValuesTreeEntry(*It, Slice)) {
9963+
if (SE->getVectorFactor() != Sz)
9964+
continue;
9965+
SE->UserTreeIndices.emplace_back(&E, UINT_MAX);
9966+
AddCombinedNode(SE->Idx, Cnt, Sz);
9967+
continue;
9968+
}
99659969
}
99669970
unsigned PrevSize = VectorizableTree.size();
99679971
[[maybe_unused]] unsigned PrevEntriesSize =
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-10 < %s | FileCheck %s
3+
4+
define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: ptr [[F:%.*]], i1 [[TOBOOL_I_4:%.*]], i32 [[RETVAL_0_I_219:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F]], align 4
9+
; CHECK-NEXT: br label %[[D_EXIT_3:.*]]
10+
; CHECK: [[IF_END_I_1:.*]]:
11+
; CHECK-NEXT: br label %[[D_EXIT_3]]
12+
; CHECK: [[IF_END_I_2:.*]]:
13+
; CHECK-NEXT: br i1 false, label %[[D_EXIT_3]], label %[[D_EXIT_6:.*]]
14+
; CHECK: [[D_EXIT_3]]:
15+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[ENTRY]] ], [ poison, %[[IF_END_I_1]] ]
16+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 1, i32 0, i32 0>, i32 [[RETVAL_0_I_219]], i32 0
17+
; CHECK-NEXT: br i1 [[TOBOOL_I_4]], label %[[D_EXIT_4:.*]], label %[[D_EXIT_6]]
18+
; CHECK: [[D_EXIT_4]]:
19+
; CHECK-NEXT: br label %[[D_EXIT_6]]
20+
; CHECK: [[IF_END_I_5:.*]]:
21+
; CHECK-NEXT: br i1 false, label %[[D_EXIT_6]], label %[[D_EXIT_7:.*]]
22+
; CHECK: [[D_EXIT_6]]:
23+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP1]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ [[TMP1]], %[[D_EXIT_4]] ]
24+
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ poison, %[[IF_END_I_5]] ], [ [[TMP2]], %[[D_EXIT_3]] ], [ poison, %[[IF_END_I_2]] ], [ zeroinitializer, %[[D_EXIT_4]] ]
25+
; CHECK-NEXT: br label %[[D_EXIT_7]]
26+
; CHECK: [[D_EXIT_7]]:
27+
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP3]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
28+
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ [[TMP4]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ]
29+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
30+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
31+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> <i32 1, i32 poison, i32 poison, i32 1, i32 poison, i32 1, i32 1, i32 poison>, <8 x i32> <i32 8, i32 1, i32 2, i32 11, i32 poison, i32 13, i32 14, i32 poison>
32+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP0]], i32 4
33+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[RETVAL_0_I_219]], i32 7
34+
; CHECK-NEXT: [[TMP12:%.*]] = add <8 x i32> [[TMP11]], [[TMP7]]
35+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
36+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 1, i32 1>, i32 [[RETVAL_0_I_219]], i32 0
37+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
38+
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[TMP13]]
39+
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP12]])
40+
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP16]])
41+
; CHECK-NEXT: [[OP_RDX4:%.*]] = or i32 [[TMP18]], [[TMP17]]
42+
; CHECK-NEXT: ret i32 [[OP_RDX4]]
43+
;
44+
entry:
45+
%0 = load i32, ptr %f, align 4
46+
br label %d.exit.3
47+
48+
if.end.i.1:
49+
br label %d.exit.3
50+
51+
if.end.i.2:
52+
br i1 false, label %d.exit.3, label %d.exit.6
53+
54+
d.exit.3:
55+
%retval.0.i.2191 = phi i32 [ 1, %if.end.i.2 ], [ 0, %entry ], [ 0, %if.end.i.1 ]
56+
%retval.0.i91117 = phi i32 [ 0, %if.end.i.2 ], [ 0, %entry ], [ 1, %if.end.i.1 ]
57+
br i1 %tobool.i.4, label %d.exit.4, label %d.exit.6
58+
59+
d.exit.4:
60+
br label %d.exit.6
61+
62+
if.end.i.5:
63+
br i1 false, label %d.exit.6, label %d.exit.7
64+
65+
d.exit.6:
66+
%retval.0.i.549 = phi i32 [ -1, %if.end.i.5 ], [ 0, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
67+
%retval.0.i.3272947 = phi i32 [ 0, %if.end.i.5 ], [ 1, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
68+
%retval.0.i.11315253145 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i.2191, %d.exit.3 ], [ 0, %if.end.i.2 ], [ %retval.0.i.2191, %d.exit.4 ]
69+
%retval.0.i91117233343 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i91117, %d.exit.3 ], [ 0, %if.end.i.2 ], [ %retval.0.i91117, %d.exit.4 ]
70+
%retval.0.i.219213541 = phi i32 [ 0, %if.end.i.5 ], [ %retval.0.i.219, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
71+
%retval.0.i.43739 = phi i32 [ 1, %if.end.i.5 ], [ 0, %d.exit.3 ], [ 0, %if.end.i.2 ], [ 0, %d.exit.4 ]
72+
br label %d.exit.7
73+
74+
d.exit.7:
75+
%retval.0.i.4373961 = phi i32 [ %retval.0.i.43739, %d.exit.6 ], [ 0, %if.end.i.5 ]
76+
%retval.0.i.21921354159 = phi i32 [ %retval.0.i.219213541, %d.exit.6 ], [ 0, %if.end.i.5 ]
77+
%retval.0.i9111723334357 = phi i32 [ %retval.0.i91117233343, %d.exit.6 ], [ 0, %if.end.i.5 ]
78+
%retval.0.i.1131525314555 = phi i32 [ %retval.0.i.11315253145, %d.exit.6 ], [ 0, %if.end.i.5 ]
79+
%retval.0.i.327294753 = phi i32 [ %retval.0.i.3272947, %d.exit.6 ], [ 0, %if.end.i.5 ]
80+
%retval.0.i.54951 = phi i32 [ %retval.0.i.549, %d.exit.6 ], [ 0, %if.end.i.5 ]
81+
%add.5 = add nsw i32 %retval.0.i.54951, 1
82+
%add.4 = add i32 %0, %retval.0.i.4373961
83+
%add.3 = add i32 %retval.0.i.219, %retval.0.i.327294753
84+
%add.2 = add i32 %retval.0.i.21921354159, 1
85+
%add.1 = add i32 %retval.0.i.219, %retval.0.i.1131525314555
86+
%add = add i32 %retval.0.i.219, %retval.0.i9111723334357
87+
%add1 = add nsw i32 %retval.0.i9111723334357, 1
88+
%1 = or i32 %add, %add1
89+
%2 = or i32 %add.1, %1
90+
%add1.1 = add nsw i32 %retval.0.i.1131525314555, 1
91+
%or2.1 = or i32 %2, %add1.1
92+
%3 = or i32 %add.2, %or2.1
93+
%add1.2 = add i32 %retval.0.i.219, %retval.0.i.21921354159
94+
%or2.2 = or i32 %3, %add1.2
95+
%4 = or i32 %add.3, %or2.2
96+
%add1.3 = add nsw i32 %retval.0.i.327294753, 1
97+
%or2.3 = or i32 %4, %add1.3
98+
%5 = or i32 %add.4, %or2.3
99+
%add1.4 = add nsw i32 %retval.0.i.4373961, 1
100+
%or2.4 = or i32 %5, %add1.4
101+
%6 = or i32 %add.5, %or2.4
102+
%add1.5 = add i32 %retval.0.i.219, %retval.0.i.54951
103+
%or2.5 = or i32 %6, %add1.5
104+
ret i32 %or2.5
105+
}

0 commit comments

Comments
 (0)