Skip to content

Commit 7152bf3

Browse files
committed
[SLP]Do not create new vector node if scalars fully overlap with the existing one
If the list of scalars vectorized as the part of the same vector node, no need to generate vector node again, it will be handled as part of overlapping matching. Fixes #113810
1 parent ce0368e commit 7152bf3

File tree

2 files changed

+100
-2
lines changed

2 files changed

+100
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7947,8 +7947,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
79477947
Nodes.insert(E);
79487948
SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
79497949
if (any_of(Nodes, [&](const TreeEntry *E) {
7950-
return all_of(E->Scalars,
7951-
[&](Value *V) { return Values.contains(V); });
7950+
if (all_of(E->Scalars,
7951+
[&](Value *V) { return Values.contains(V); }))
7952+
return true;
7953+
SmallPtrSet<Value *, 8> EValues(E->Scalars.begin(),
7954+
E->Scalars.end());
7955+
return (
7956+
all_of(VL, [&](Value *V) { return EValues.contains(V); }));
79527957
})) {
79537958
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
79547959
if (TryToFindDuplicates(S))
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
3+
4+
define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) {
7+
; CHECK-NEXT: [[TOP:.*:]]
8+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
9+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
10+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
11+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 16, i64 20>
12+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 2
13+
; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]]
14+
; CHECK: [[L41]]:
15+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x ptr> [[TMP5]], zeroinitializer
16+
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
17+
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> [[TMP8]]
18+
; CHECK-NEXT: br label %[[L112:.*]]
19+
; CHECK: [[L42]]:
20+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4
21+
; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP10]], 0
22+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP1]], i32 2
23+
; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]]
24+
; CHECK: [[L47]]:
25+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1
26+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
27+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> <i32 2, i32 3>
28+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer
29+
; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4
30+
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]]
31+
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP13]], i32 1
32+
; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP18]], <2 x i32> [[TMP17]], i64 2)
33+
; CHECK-NEXT: br label %[[L112]]
34+
; CHECK: [[L112]]:
35+
; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ [[TMP11]], %[[L42]] ]
36+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0
37+
; CHECK-NEXT: store i32 [[TMP21]], ptr [[P2]], align 4
38+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1
39+
; CHECK-NEXT: store i32 [[TMP22]], ptr [[P1]], align 4
40+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2
41+
; CHECK-NEXT: store i32 [[TMP23]], ptr [[P2]], align 4
42+
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3
43+
; CHECK-NEXT: store i32 [[TMP24]], ptr [[P1]], align 4
44+
; CHECK-NEXT: ret void
45+
;
46+
top:
47+
%2 = getelementptr i8, ptr %0, i64 8
48+
%3 = getelementptr i8, ptr %0, i64 12
49+
%4 = getelementptr i8, ptr %0, i64 16
50+
%5 = getelementptr i8, ptr %0, i64 20
51+
br i1 %c1, label %L42, label %L41
52+
53+
L41:
54+
%.not276 = icmp eq ptr %2, null
55+
%6 = load i32, ptr %2, align 4
56+
%7 = select i1 %.not276, i32 0, i32 %6
57+
%.not277 = icmp eq ptr %3, null
58+
%8 = load i32, ptr %3, align 4
59+
%9 = select i1 %.not277, i32 0, i32 %8
60+
%.not278 = icmp eq ptr %4, null
61+
%10 = load i32, ptr %4, align 4
62+
%11 = select i1 %.not278, i32 0, i32 %10
63+
%.not279 = icmp eq ptr %5, null
64+
%12 = load i32, ptr %5, align 4
65+
%13 = select i1 %.not279, i32 0, i32 %12
66+
br label %L112
67+
68+
L42:
69+
%14 = load i32, ptr %2, align 4
70+
%.not280 = icmp eq i32 %14, 0
71+
br i1 %.not280, label %L112, label %L47
72+
73+
L47:
74+
%15 = load i32, ptr %3, align 4
75+
%.not282 = icmp eq ptr %4, null
76+
%16 = load i32, ptr %4, align 4
77+
%17 = select i1 %.not282, i32 0, i32 %16
78+
%.not283 = icmp eq ptr %5, null
79+
%18 = load i32, ptr %5, align 4
80+
%19 = select i1 %.not283, i32 0, i32 %18
81+
br label %L112
82+
83+
L112:
84+
%value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ]
85+
%value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ]
86+
%value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ]
87+
%value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ]
88+
store i32 %value_phi10333, ptr %p2, align 4
89+
store i32 %value_phi11334, ptr %p1, align 4
90+
store i32 %value_phi12335, ptr %p2, align 4
91+
store i32 %value_phi13336, ptr %p1, align 4
92+
ret void
93+
}

0 commit comments

Comments
 (0)