Skip to content

Commit af524de

Browse files
committed
[SLP]Do not include subvectors for fully matched buildvectors
If the buildvector node fully matched another node, need to exclude subvectors, when building final shuffle, just a shuffle of the original node must be emitted. Fixes #122584
1 parent 34ba84f commit af524de

File tree

3 files changed

+102
-5
lines changed

3 files changed

+102
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14935,8 +14935,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1493514935
}
1493614936
}
1493714937
ShuffleBuilder.add(*FrontTE, Mask);
14938-
Res = ShuffleBuilder.finalize(E->getCommonMask(), SubVectors,
14939-
SubVectorsMask);
14938+
// Full matched entry found, no need to insert subvectors.
14939+
Res = ShuffleBuilder.finalize(E->getCommonMask(), {}, {});
1494014940
return Res;
1494114941
}
1494214942
if (!Resized) {

llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,11 +259,9 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) {
259259
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0)
260260
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12)
261261
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1)
262-
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0)
263-
; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12)
264262
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0
265263
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> zeroinitializer
266-
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP11]], <16 x i8> [[TMP13]]
264+
; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP13]]
267265
; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr [[PTR]], align 2
268266
; CHECK-NEXT: ret void
269267
;
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test(i64 %l.549) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: i64 [[L_549:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[CONV3:%.*]] = sext i32 0 to i64
9+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
10+
; CHECK-NEXT: br label %[[IF_THEN19:.*]]
11+
; CHECK: [[P:.*]]:
12+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[IF_END29:.*]] ], [ [[TMP13:%.*]], %[[IF_END25:.*]] ]
13+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
14+
; CHECK-NEXT: br i1 false, label %[[S:.*]], label %[[Q:.*]]
15+
; CHECK: [[Q]]:
16+
; CHECK-NEXT: [[XOR39:%.*]] = phi i64 [ 0, %[[P]] ], [ 0, %[[LAND_LHS_TRUE:.*]] ]
17+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[P]] ], [ zeroinitializer, %[[LAND_LHS_TRUE]] ]
18+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[XOR39]], i32 2
19+
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP4]], <2 x i64> [[TMP3]], i64 0)
20+
; CHECK-NEXT: br i1 false, label %[[LOR_LHS_FALSE:.*]], label %[[R:.*]]
21+
; CHECK: [[LOR_LHS_FALSE]]:
22+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
23+
; CHECK-NEXT: br i1 false, label %[[LAND_LHS_TRUE]], label %[[S]]
24+
; CHECK: [[R]]:
25+
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP5]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ]
26+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
27+
; CHECK-NEXT: br i1 false, label %[[S]], label %[[LAND_LHS_TRUE]]
28+
; CHECK: [[LAND_LHS_TRUE]]:
29+
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP8]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ]
30+
; CHECK-NEXT: br i1 false, label %[[Q]], label %[[S]]
31+
; CHECK: [[S]]:
32+
; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP9]], %[[LAND_LHS_TRUE]] ], [ [[TMP8]], %[[R]] ], [ [[TMP6]], %[[LOR_LHS_FALSE]] ], [ [[TMP2]], %[[P]] ]
33+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
34+
; CHECK-NEXT: br label %[[IF_THEN19]]
35+
; CHECK: [[IF_THEN19]]:
36+
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP11]], %[[S]] ]
37+
; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
38+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
39+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[L_549]], i32 1
40+
; CHECK-NEXT: [[TMP16]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP15]], <2 x i64> zeroinitializer, i64 2)
41+
; CHECK-NEXT: br i1 false, label %[[R]], label %[[IF_END25]]
42+
; CHECK: [[IF_END25]]:
43+
; CHECK-NEXT: br i1 false, label %[[IF_END29]], label %[[P]]
44+
; CHECK: [[IF_END29]]:
45+
; CHECK-NEXT: br label %[[P]]
46+
;
47+
entry:
48+
%conv3 = sext i32 0 to i64
49+
br label %if.then19
50+
51+
p:
52+
%l.0 = phi i64 [ %xor, %if.end29 ], [ %l.5493, %if.end25 ]
53+
%m.0 = phi i64 [ %not21, %if.end29 ], [ %m.550, %if.end25 ]
54+
br i1 false, label %s, label %q
55+
56+
q:
57+
%xor39 = phi i64 [ 0, %p ], [ 0, %land.lhs.true ]
58+
%l.1 = phi i64 [ 0, %p ], [ 0, %land.lhs.true ]
59+
%m.1 = phi i64 [ 0, %p ], [ 0, %land.lhs.true ]
60+
br i1 false, label %lor.lhs.false, label %r
61+
62+
lor.lhs.false:
63+
br i1 false, label %land.lhs.true, label %s
64+
65+
r:
66+
%xor38 = phi i64 [ %xor39, %q ], [ %xor, %if.then19 ]
67+
%j.0 = phi i64 [ %conv3, %q ], [ %not21, %if.then19 ]
68+
%l.2 = phi i64 [ %l.1, %q ], [ %l.549, %if.then19 ]
69+
%m.2 = phi i64 [ %m.1, %q ], [ %m.550, %if.then19 ]
70+
br i1 false, label %s, label %land.lhs.true
71+
72+
land.lhs.true:
73+
%xor37 = phi i64 [ %xor38, %r ], [ 0, %lor.lhs.false ]
74+
%j.1 = phi i64 [ %j.0, %r ], [ 0, %lor.lhs.false ]
75+
%l.3 = phi i64 [ %l.2, %r ], [ 0, %lor.lhs.false ]
76+
%m.3 = phi i64 [ %m.2, %r ], [ 0, %lor.lhs.false ]
77+
br i1 false, label %q, label %s
78+
79+
s:
80+
%xor36 = phi i64 [ %xor37, %land.lhs.true ], [ %xor38, %r ], [ %xor39, %lor.lhs.false ], [ %l.0, %p ]
81+
%j.2 = phi i64 [ %j.1, %land.lhs.true ], [ %j.0, %r ], [ %conv3, %lor.lhs.false ], [ %m.0, %p ]
82+
%l.4 = phi i64 [ %l.3, %land.lhs.true ], [ %l.2, %r ], [ %l.1, %lor.lhs.false ], [ %l.0, %p ]
83+
%m.4 = phi i64 [ %m.3, %land.lhs.true ], [ %m.2, %r ], [ %m.1, %lor.lhs.false ], [ %m.0, %p ]
84+
br label %if.then19
85+
86+
if.then19:
87+
%m.550 = phi i64 [ 0, %entry ], [ %m.4, %s ]
88+
%l.5493 = phi i64 [ 0, %entry ], [ %l.4, %s ]
89+
%xor = xor i64 0, 0
90+
%not21 = xor i64 0, 0
91+
br i1 false, label %r, label %if.end25
92+
93+
if.end25:
94+
br i1 false, label %if.end29, label %p
95+
96+
if.end29:
97+
br label %p
98+
}
99+

0 commit comments

Comments
 (0)