Skip to content

Commit 8dd27d4

Browse files
committed
[VectorCombine] Add test coverage for shuffle(cmp,cmp) fold patterns
1 parent 24238aa commit 8dd27d4

File tree

1 file changed

+189
-0
lines changed

1 file changed

+189
-0
lines changed
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK
3+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK
4+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK
5+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK
6+
7+
declare void @use(<4 x i1>)
8+
9+
; icmp - eq v4i32 is cheap
10+
11+
define <4 x i32> @shuf_icmp_eq_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
12+
; CHECK-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
13+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
14+
; CHECK-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
15+
; CHECK-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
16+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
17+
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
18+
; CHECK-NEXT: ret <4 x i32> [[R]]
19+
;
20+
%c0 = icmp eq <4 x i32> %x, %y
21+
%c1 = icmp eq <4 x i32> %z, %w
22+
%s = shufflevector <4 x i1> %c0, <4 x i1> %c1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23+
%r = sext <4 x i1> %s to <4 x i32>
24+
ret <4 x i32> %r
25+
}
26+
27+
; icmp - eq v2i64 is only cheap on SSE4+ targets with PCMPEQQ
28+
29+
define <2 x i64> @shuf_icmp_eq_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z, <2 x i64> %w) {
30+
; CHECK-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
31+
; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
32+
; CHECK-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
33+
; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
34+
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
35+
; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
36+
; CHECK-NEXT: ret <2 x i64> [[R]]
37+
;
38+
%c0 = icmp eq <2 x i64> %x, %y
39+
%c1 = icmp eq <2 x i64> %z, %w
40+
%s = shufflevector <2 x i1> %c0, <2 x i1> %c1, <2 x i32> <i32 1, i32 3>
41+
%r = sext <2 x i1> %s to <2 x i64>
42+
ret <2 x i64> %r
43+
}
44+
45+
; icmp - ugt v4i32 is expensive
46+
47+
define <4 x i32> @shuf_icmp_ugt_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
48+
; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32(
49+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
50+
; CHECK-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
51+
; CHECK-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
52+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
53+
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
54+
; CHECK-NEXT: ret <4 x i32> [[R]]
55+
;
56+
%c0 = icmp ugt <4 x i32> %x, %y
57+
%c1 = icmp ugt <4 x i32> %z, %w
58+
%s = shufflevector <4 x i1> %c0, <4 x i1> %c1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
59+
%r = sext <4 x i1> %s to <4 x i32>
60+
ret <4 x i32> %r
61+
}
62+
63+
; Common operand is op0 of the fcmps.
64+
65+
define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
66+
; CHECK-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
67+
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
68+
; CHECK-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
69+
; CHECK-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
70+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
71+
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
72+
; CHECK-NEXT: ret <4 x i32> [[R]]
73+
;
74+
%b0 = fcmp oeq <4 x float> %x, %y
75+
%b1 = fcmp oeq <4 x float> %x, %z
76+
%s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
77+
%r = sext <4 x i1> %s to <4 x i32>
78+
ret <4 x i32> %r
79+
}
80+
81+
; For commutative instructions, common operand may be swapped
82+
83+
define <4 x i32> @shuf_fcmp_one_v4f32_swap(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
84+
; CHECK-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
85+
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
86+
; CHECK-NEXT: [[B0:%.*]] = fcmp one <4 x float> [[X]], [[Y]]
87+
; CHECK-NEXT: [[B1:%.*]] = fcmp one <4 x float> [[Z]], [[X]]
88+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
89+
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
90+
; CHECK-NEXT: ret <4 x i32> [[R]]
91+
;
92+
%b0 = fcmp one <4 x float> %x, %y
93+
%b1 = fcmp one <4 x float> %z, %x
94+
%s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
95+
%r = sext <4 x i1> %s to <4 x i32>
96+
ret <4 x i32> %r
97+
}
98+
99+
; non-commutative pred, but common op0
100+
101+
define <4 x i32> @shuf_icmp_sgt_v4i32_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
102+
; CHECK-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
103+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
104+
; CHECK-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
105+
; CHECK-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
106+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
107+
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
108+
; CHECK-NEXT: ret <4 x i32> [[R]]
109+
;
110+
%b0 = icmp sgt <4 x i32> %x, %y
111+
%b1 = icmp sgt <4 x i32> %x, %z
112+
%s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 3, i32 1, i32 1, i32 6>
113+
%r = sext <4 x i1> %s to <4 x i32>
114+
ret <4 x i32> %r
115+
}
116+
117+
; negative test - mismatched opcodes
118+
119+
define <2 x i64> @shuf_icmp_fcmp_v2i64_mismatch_opcode(<2 x i64> %x, <2 x i64> %y, <2 x double> %z, <2 x double> %w) {
120+
; CHECK-LABEL: define <2 x i64> @shuf_icmp_fcmp_v2i64_mismatch_opcode(
121+
; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x double> [[Z:%.*]], <2 x double> [[W:%.*]]) #[[ATTR0]] {
122+
; CHECK-NEXT: [[B0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
123+
; CHECK-NEXT: [[B1:%.*]] = fcmp oeq <2 x double> [[Z]], [[W]]
124+
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[B0]], <2 x i1> [[B1]], <2 x i32> <i32 3, i32 0>
125+
; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
126+
; CHECK-NEXT: ret <2 x i64> [[R]]
127+
;
128+
%b0 = icmp eq <2 x i64> %x, %y
129+
%b1 = fcmp oeq <2 x double> %z, %w
130+
%s = shufflevector <2 x i1> %b0, <2 x i1> %b1, <2 x i32> <i32 3, i32 0>
131+
%r = sext <2 x i1> %s to <2 x i64>
132+
ret <2 x i64> %r
133+
}
134+
135+
; negative test - mismatched predicates
136+
137+
define <2 x i64> @shuf_icmp_eq_sgt_v2i64_mismatch_predicate(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
138+
; CHECK-LABEL: define <2 x i64> @shuf_icmp_eq_sgt_v2i64_mismatch_predicate(
139+
; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
140+
; CHECK-NEXT: [[B0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
141+
; CHECK-NEXT: [[B1:%.*]] = icmp sgt <2 x i64> [[Z]], [[Y]]
142+
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[B0]], <2 x i1> [[B1]], <2 x i32> <i32 3, i32 0>
143+
; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
144+
; CHECK-NEXT: ret <2 x i64> [[R]]
145+
;
146+
%b0 = icmp eq <2 x i64> %x, %y
147+
%b1 = icmp sgt <2 x i64> %z, %y
148+
%s = shufflevector <2 x i1> %b0, <2 x i1> %b1, <2 x i32> <i32 3, i32 0>
149+
%r = sext <2 x i1> %s to <2 x i64>
150+
ret <2 x i64> %r
151+
}
152+
153+
; negative test - mismatched types
154+
155+
define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64> %y, <4 x i32> %z, <4 x i32> %w) {
156+
; CHECK-LABEL: define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(
157+
; CHECK-SAME: <4 x i64> [[X:%.*]], <4 x i64> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
158+
; CHECK-NEXT: [[B0:%.*]] = icmp eq <4 x i64> [[X]], [[Y]]
159+
; CHECK-NEXT: [[B1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
160+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
161+
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
162+
; CHECK-NEXT: ret <4 x i32> [[R]]
163+
;
164+
%b0 = icmp eq <4 x i64> %x, %y
165+
%b1 = icmp eq <4 x i32> %z, %w
166+
%s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 3, i32 1, i32 1, i32 6>
167+
%r = sext <4 x i1> %s to <4 x i32>
168+
ret <4 x i32> %r
169+
}
170+
171+
; negative test - uses
172+
173+
define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
174+
; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
175+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
176+
; CHECK-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
177+
; CHECK-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
178+
; CHECK-NEXT: call void @use(<4 x i1> [[C0]])
179+
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
180+
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
181+
; CHECK-NEXT: ret <4 x i32> [[R]]
182+
;
183+
%c0 = icmp ugt <4 x i32> %x, %y
184+
%c1 = icmp ugt <4 x i32> %z, %w
185+
call void @use(<4 x i1> %c0)
186+
%s = shufflevector <4 x i1> %c0, <4 x i1> %c1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
187+
%r = sext <4 x i1> %s to <4 x i32>
188+
ret <4 x i32> %r
189+
}

0 commit comments

Comments
 (0)