1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2
- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK
3
- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK
4
- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK
5
- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK
2
+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3
+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
4
+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
5
+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
6
6
7
7
declare void @use (<4 x i1 >)
8
8
9
9
; icmp - eq v4i32 is cheap
10
10
11
11
define <4 x i32 > @shuf_icmp_eq_v4i32 (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z , <4 x i32 > %w ) {
12
- ; CHECK-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
13
- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
14
- ; CHECK-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
15
- ; CHECK-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
16
- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
17
- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
18
- ; CHECK-NEXT: ret <4 x i32> [[R]]
12
+ ; SSE-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
13
+ ; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
14
+ ; SSE-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
15
+ ; SSE-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
16
+ ; SSE-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
17
+ ; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
18
+ ; SSE-NEXT: ret <4 x i32> [[R]]
19
+ ;
20
+ ; AVX2-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
21
+ ; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
22
+ ; AVX2-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
23
+ ; AVX2-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
24
+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
25
+ ; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
26
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
27
+ ;
28
+ ; AVX512-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
29
+ ; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
30
+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
31
+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
32
+ ; AVX512-NEXT: [[S:%.*]] = icmp eq <4 x i32> [[TMP1]], [[TMP2]]
33
+ ; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
34
+ ; AVX512-NEXT: ret <4 x i32> [[R]]
19
35
;
20
36
%c0 = icmp eq <4 x i32 > %x , %y
21
37
%c1 = icmp eq <4 x i32 > %z , %w
@@ -27,13 +43,37 @@ define <4 x i32> @shuf_icmp_eq_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <
27
43
; icmp - eq v2i64 is only cheap on SSE4+ targets with PCMPEQQ
28
44
29
45
define <2 x i64 > @shuf_icmp_eq_v2i64 (<2 x i64 > %x , <2 x i64 > %y , <2 x i64 > %z , <2 x i64 > %w ) {
30
- ; CHECK-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
31
- ; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
32
- ; CHECK-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
33
- ; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
34
- ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
35
- ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
36
- ; CHECK-NEXT: ret <2 x i64> [[R]]
46
+ ; SSE2-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
47
+ ; SSE2-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
48
+ ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Z]], <2 x i32> <i32 1, i32 3>
49
+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[Y]], <2 x i64> [[W]], <2 x i32> <i32 1, i32 3>
50
+ ; SSE2-NEXT: [[S:%.*]] = icmp eq <2 x i64> [[TMP1]], [[TMP2]]
51
+ ; SSE2-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
52
+ ; SSE2-NEXT: ret <2 x i64> [[R]]
53
+ ;
54
+ ; SSE4-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
55
+ ; SSE4-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
56
+ ; SSE4-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
57
+ ; SSE4-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
58
+ ; SSE4-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
59
+ ; SSE4-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
60
+ ; SSE4-NEXT: ret <2 x i64> [[R]]
61
+ ;
62
+ ; AVX2-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
63
+ ; AVX2-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
64
+ ; AVX2-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
65
+ ; AVX2-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
66
+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
67
+ ; AVX2-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
68
+ ; AVX2-NEXT: ret <2 x i64> [[R]]
69
+ ;
70
+ ; AVX512-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
71
+ ; AVX512-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
72
+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Z]], <2 x i32> <i32 1, i32 3>
73
+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[Y]], <2 x i64> [[W]], <2 x i32> <i32 1, i32 3>
74
+ ; AVX512-NEXT: [[S:%.*]] = icmp eq <2 x i64> [[TMP1]], [[TMP2]]
75
+ ; AVX512-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
76
+ ; AVX512-NEXT: ret <2 x i64> [[R]]
37
77
;
38
78
%c0 = icmp eq <2 x i64 > %x , %y
39
79
%c1 = icmp eq <2 x i64 > %z , %w
@@ -46,10 +86,10 @@ define <2 x i64> @shuf_icmp_eq_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z, <
46
86
47
87
define <4 x i32 > @shuf_icmp_ugt_v4i32 (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z , <4 x i32 > %w ) {
48
88
; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32(
49
- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
50
- ; CHECK-NEXT: [[C0 :%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
51
- ; CHECK-NEXT: [[C1 :%.*]] = icmp ugt <4 x i32> [[Z ]], [[W]]
52
- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1 > [[C0 ]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
89
+ ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+ ]] {
90
+ ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
91
+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <4 x i32> [[Y ]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
92
+ ; CHECK-NEXT: [[S:%.*]] = icmp ugt <4 x i32 > [[TMP1 ]], [[TMP2]]
53
93
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
54
94
; CHECK-NEXT: ret <4 x i32> [[R]]
55
95
;
@@ -60,16 +100,32 @@ define <4 x i32> @shuf_icmp_ugt_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z,
60
100
ret <4 x i32 > %r
61
101
}
62
102
63
- ; Common operand is op0 of the fcmps.
103
+ ; Common operand is op0 of the fcmps (CMPPS cheaper on SSE4+) .
64
104
65
105
define <4 x i32 > @shuf_fcmp_oeq_v4i32 (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
66
- ; CHECK-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
67
- ; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
68
- ; CHECK-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
69
- ; CHECK-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
70
- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
71
- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
72
- ; CHECK-NEXT: ret <4 x i32> [[R]]
106
+ ; SSE2-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
107
+ ; SSE2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
108
+ ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0>
109
+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
110
+ ; SSE2-NEXT: [[S:%.*]] = fcmp oeq <4 x float> [[TMP1]], [[TMP2]]
111
+ ; SSE2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
112
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
113
+ ;
114
+ ; SSE4-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
115
+ ; SSE4-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
116
+ ; SSE4-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
117
+ ; SSE4-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
118
+ ; SSE4-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
119
+ ; SSE4-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
120
+ ; SSE4-NEXT: ret <4 x i32> [[R]]
121
+ ;
122
+ ; AVX-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
123
+ ; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
124
+ ; AVX-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
125
+ ; AVX-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
126
+ ; AVX-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
127
+ ; AVX-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
128
+ ; AVX-NEXT: ret <4 x i32> [[R]]
73
129
;
74
130
%b0 = fcmp oeq <4 x float > %x , %y
75
131
%b1 = fcmp oeq <4 x float > %x , %z
@@ -81,13 +137,29 @@ define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float
81
137
; For commutative instructions, common operand may be swapped
82
138
83
139
define <4 x i32 > @shuf_fcmp_one_v4f32_swap (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
84
- ; CHECK-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
85
- ; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
86
- ; CHECK-NEXT: [[B0:%.*]] = fcmp one <4 x float> [[X]], [[Y]]
87
- ; CHECK-NEXT: [[B1:%.*]] = fcmp one <4 x float> [[Z]], [[X]]
88
- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
89
- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
90
- ; CHECK-NEXT: ret <4 x i32> [[R]]
140
+ ; SSE-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
141
+ ; SSE-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
142
+ ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
143
+ ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
144
+ ; SSE-NEXT: [[S:%.*]] = fcmp one <4 x float> [[TMP1]], [[TMP2]]
145
+ ; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
146
+ ; SSE-NEXT: ret <4 x i32> [[R]]
147
+ ;
148
+ ; AVX2-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
149
+ ; AVX2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
150
+ ; AVX2-NEXT: [[B0:%.*]] = fcmp one <4 x float> [[X]], [[Y]]
151
+ ; AVX2-NEXT: [[B1:%.*]] = fcmp one <4 x float> [[Z]], [[X]]
152
+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
153
+ ; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
154
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
155
+ ;
156
+ ; AVX512-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
157
+ ; AVX512-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
158
+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
159
+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
160
+ ; AVX512-NEXT: [[S:%.*]] = fcmp one <4 x float> [[TMP1]], [[TMP2]]
161
+ ; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
162
+ ; AVX512-NEXT: ret <4 x i32> [[R]]
91
163
;
92
164
%b0 = fcmp one <4 x float > %x , %y
93
165
%b1 = fcmp one <4 x float > %z , %x
@@ -99,13 +171,29 @@ define <4 x i32> @shuf_fcmp_one_v4f32_swap(<4 x float> %x, <4 x float> %y, <4 x
99
171
; non-commutative pred, but common op0
100
172
101
173
define <4 x i32 > @shuf_icmp_sgt_v4i32_swap (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z ) {
102
- ; CHECK-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
103
- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
104
- ; CHECK-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
105
- ; CHECK-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
106
- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
107
- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
108
- ; CHECK-NEXT: ret <4 x i32> [[R]]
174
+ ; SSE-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
175
+ ; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
176
+ ; SSE-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
177
+ ; SSE-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
178
+ ; SSE-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
179
+ ; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
180
+ ; SSE-NEXT: ret <4 x i32> [[R]]
181
+ ;
182
+ ; AVX2-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
183
+ ; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
184
+ ; AVX2-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
185
+ ; AVX2-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
186
+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
187
+ ; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
188
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
189
+ ;
190
+ ; AVX512-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
191
+ ; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
192
+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 2>
193
+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
194
+ ; AVX512-NEXT: [[S:%.*]] = icmp sgt <4 x i32> [[TMP1]], [[TMP2]]
195
+ ; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
196
+ ; AVX512-NEXT: ret <4 x i32> [[R]]
109
197
;
110
198
%b0 = icmp sgt <4 x i32 > %x , %y
111
199
%b1 = icmp sgt <4 x i32 > %x , %z
0 commit comments