Skip to content

Commit 0307797

Browse files
author
git apple-llvm automerger
committed
Merge commit '529aa6eadb27' from llvm.org/release/17.x into stable/20230725
2 parents 4ad5186 + 529aa6e commit 0307797

File tree

3 files changed

+66
-21
lines changed

3 files changed

+66
-21
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10118,7 +10118,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1011810118
}
1011910119

1012010120
if (E->State == TreeEntry::NeedToGather) {
10121-
if (E->getMainOp() && E->Idx == 0)
10121+
// Set insert point for non-reduction initial nodes.
10122+
if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
1012210123
setInsertPointAfterBundle(E);
1012310124
Value *Vec = createBuildVector(E);
1012410125
E->VectorizedValue = Vec;
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s
3+
4+
define void @tes() {
5+
; CHECK-LABEL: define void @tes() {
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <2 x double> zeroinitializer, zeroinitializer
8+
; CHECK-NEXT: br label [[TMP1:%.*]]
9+
; CHECK: 1:
10+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 false, i1 false, i1 false
11+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
12+
; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
13+
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
14+
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP5]], i1 false, i1 false
15+
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP2]], i1 [[OP_RDX]], i1 false
16+
; CHECK-NEXT: br i1 [[OP_RDX1]], label [[TMP6:%.*]], label [[TMP7:%.*]]
17+
; CHECK: 6:
18+
; CHECK-NEXT: ret void
19+
; CHECK: 7:
20+
; CHECK-NEXT: ret void
21+
;
22+
entry:
23+
%0 = extractelement <2 x i1> zeroinitializer, i64 0
24+
%1 = extractelement <2 x i1> zeroinitializer, i64 0
25+
%2 = fcmp ole <2 x double> zeroinitializer, zeroinitializer
26+
%3 = extractelement <2 x i1> %2, i64 0
27+
%4 = extractelement <2 x i1> zeroinitializer, i64 0
28+
br label %5
29+
30+
5:
31+
%6 = select i1 false, i1 false, i1 false
32+
%7 = select i1 %6, i1 %0, i1 false
33+
%8 = select i1 %7, i1 %1, i1 false
34+
%9 = select i1 %8, i1 false, i1 false
35+
%10 = select i1 %9, i1 %3, i1 false
36+
%11 = select i1 %10, i1 %4, i1 false
37+
br i1 %11, label %12, label %13
38+
39+
12:
40+
ret void
41+
42+
13:
43+
ret void
44+
}

llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) {
1919
; SSE2-LABEL: @reduce_and4(
2020
; SSE2-NEXT: entry:
21-
; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
22-
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
23-
; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
24-
; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
25-
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]]
21+
; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
22+
; SSE2-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
23+
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
24+
; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
25+
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]]
2626
; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
2727
; SSE2-NEXT: ret i32 [[OP_RDX1]]
2828
;
@@ -40,11 +40,11 @@ define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <
4040
;
4141
; AVX-LABEL: @reduce_and4(
4242
; AVX-NEXT: entry:
43-
; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
44-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
45-
; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
46-
; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
47-
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]]
43+
; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
44+
; AVX-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
45+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
46+
; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
47+
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]]
4848
; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
4949
; AVX-NEXT: ret i32 [[OP_RDX1]]
5050
;
@@ -94,11 +94,11 @@ entry:
9494

9595
define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) {
9696
; SSE2-LABEL: @reduce_and4_transpose(
97-
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
98-
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
99-
; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
100-
; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
101-
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]]
97+
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
98+
; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
99+
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
100+
; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
101+
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]]
102102
; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
103103
; SSE2-NEXT: ret i32 [[OP_RDX1]]
104104
;
@@ -114,11 +114,11 @@ define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i
114114
; SSE42-NEXT: ret i32 [[OP_RDX3]]
115115
;
116116
; AVX-LABEL: @reduce_and4_transpose(
117-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
118-
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
119-
; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
120-
; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
121-
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]]
117+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
118+
; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
119+
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
120+
; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
121+
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]]
122122
; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
123123
; AVX-NEXT: ret i32 [[OP_RDX1]]
124124
;

0 commit comments

Comments
 (0)