Skip to content

Commit 254df2e

Browse files
committed
[VectorCombine][X86] shuffle-of-binops.ll - split off foldShuffleOfBinops tests from shuffle.ll
1 parent e84b2fb commit 254df2e

File tree

2 files changed

+206
-198
lines changed

2 files changed

+206
-198
lines changed
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
4+
5+
declare void @use(<4 x i32>)
6+
7+
; Shuffle is much cheaper than fdiv. FMF are intersected.
8+
9+
define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
10+
; CHECK-LABEL: define <4 x float> @shuf_fdiv_v4f32_yy(
11+
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
13+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> poison, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
14+
; CHECK-NEXT: [[R:%.*]] = fdiv arcp <4 x float> [[TMP2]], [[TMP3]]
15+
; CHECK-NEXT: ret <4 x float> [[R]]
16+
;
17+
%b0 = fdiv fast <4 x float> %x, %y
18+
%b1 = fdiv arcp <4 x float> %z, %y
19+
%r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20+
ret <4 x float> %r
21+
}
22+
23+
; Common operand is op0 of the binops.
24+
25+
define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
26+
; CHECK-LABEL: define <4 x i32> @shuf_add_v4i32_xx(
27+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
28+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0>
29+
; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
30+
; CHECK-NEXT: [[R2:%.*]] = add <4 x i32> [[TMP1]], [[R1]]
31+
; CHECK-NEXT: ret <4 x i32> [[R2]]
32+
;
33+
%b0 = add <4 x i32> %x, %y
34+
%b1 = add <4 x i32> %x, %z
35+
%r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
36+
ret <4 x i32> %r
37+
}
38+
39+
; For commutative instructions, common operand may be swapped.
40+
41+
define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
42+
; CHECK-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap(
43+
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
44+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
45+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
46+
; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
47+
; CHECK-NEXT: ret <4 x float> [[R]]
48+
;
49+
%b0 = fmul <4 x float> %x, %y
50+
%b1 = fmul <4 x float> %z, %x
51+
%r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
52+
ret <4 x float> %r
53+
}
54+
55+
; For commutative instructions, common operand may be swapped.
56+
57+
define <2 x i64> @shuf_and_v2i64_yy_swap(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
58+
; CHECK-LABEL: define <2 x i64> @shuf_and_v2i64_yy_swap(
59+
; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
60+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[Y]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
61+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Z]], <2 x i32> <i32 3, i32 0>
62+
; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]]
63+
; CHECK-NEXT: ret <2 x i64> [[R]]
64+
;
65+
%b0 = and <2 x i64> %x, %y
66+
%b1 = and <2 x i64> %y, %z
67+
%r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> <i32 3, i32 0>
68+
ret <2 x i64> %r
69+
}
70+
71+
; non-commutative binop, but common op0
72+
73+
define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
74+
; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx(
75+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
76+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 2>
77+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
78+
; CHECK-NEXT: [[R:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]]
79+
; CHECK-NEXT: ret <4 x i32> [[R]]
80+
;
81+
%b0 = shl <4 x i32> %x, %y
82+
%b1 = shl <4 x i32> %x, %z
83+
%r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 3, i32 1, i32 1, i32 6>
84+
ret <4 x i32> %r
85+
}
86+
87+
; negative test - common operand, but not commutable
88+
89+
define <4 x i32> @shuf_shl_v4i32_xx_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
90+
; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap(
91+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
92+
; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]]
93+
; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]]
94+
; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5>
95+
; CHECK-NEXT: ret <4 x i32> [[R1]]
96+
;
97+
%b0 = shl <4 x i32> %x, %y
98+
%b1 = shl <4 x i32> %z, %x
99+
%r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 3, i32 2, i32 2, i32 5>
100+
ret <4 x i32> %r
101+
}
102+
103+
; negative test - mismatched opcodes
104+
105+
define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
106+
; CHECK-LABEL: define <2 x i64> @shuf_sub_add_v2i64_yy(
107+
; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
108+
; CHECK-NEXT: [[B0:%.*]] = sub <2 x i64> [[X]], [[Y]]
109+
; CHECK-NEXT: [[B1:%.*]] = add <2 x i64> [[Z]], [[Y]]
110+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[B0]], <2 x i64> [[B1]], <2 x i32> <i32 3, i32 0>
111+
; CHECK-NEXT: ret <2 x i64> [[R]]
112+
;
113+
%b0 = sub <2 x i64> %x, %y
114+
%b1 = add <2 x i64> %z, %y
115+
%r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> <i32 3, i32 0>
116+
ret <2 x i64> %r
117+
}
118+
119+
; negative test - type change via shuffle
120+
121+
define <8 x float> @shuf_fmul_v4f32_xx_type(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
122+
; CHECK-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type(
123+
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
124+
; CHECK-NEXT: [[B0:%.*]] = fmul <4 x float> [[X]], [[Y]]
125+
; CHECK-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z]], [[X]]
126+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6>
127+
; CHECK-NEXT: ret <8 x float> [[R]]
128+
;
129+
%b0 = fmul <4 x float> %x, %y
130+
%b1 = fmul <4 x float> %z, %x
131+
%r = shufflevector <4 x float> %b0, <4 x float> %b1, <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6>
132+
ret <8 x float> %r
133+
}
134+
135+
; negative test - uses
136+
137+
define <4 x i32> @shuf_lshr_v4i32_yy_use1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
138+
; CHECK-LABEL: define <4 x i32> @shuf_lshr_v4i32_yy_use1(
139+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
140+
; CHECK-NEXT: [[B0:%.*]] = lshr <4 x i32> [[X]], [[Y]]
141+
; CHECK-NEXT: call void @use(<4 x i32> [[B0]])
142+
; CHECK-NEXT: [[B1:%.*]] = lshr <4 x i32> [[Z]], [[Y]]
143+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
144+
; CHECK-NEXT: ret <4 x i32> [[R]]
145+
;
146+
%b0 = lshr <4 x i32> %x, %y
147+
call void @use(<4 x i32> %b0)
148+
%b1 = lshr <4 x i32> %z, %y
149+
%r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
150+
ret <4 x i32> %r
151+
}
152+
153+
; negative test - uses
154+
155+
define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
156+
; CHECK-LABEL: define <4 x i32> @shuf_mul_v4i32_yy_use2(
157+
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
158+
; CHECK-NEXT: [[B0:%.*]] = mul <4 x i32> [[X]], [[Y]]
159+
; CHECK-NEXT: [[B1:%.*]] = mul <4 x i32> [[Z]], [[Y]]
160+
; CHECK-NEXT: call void @use(<4 x i32> [[B1]])
161+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
162+
; CHECK-NEXT: ret <4 x i32> [[R]]
163+
;
164+
%b0 = mul <4 x i32> %x, %y
165+
%b1 = mul <4 x i32> %z, %y
166+
call void @use(<4 x i32> %b1)
167+
%r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
168+
ret <4 x i32> %r
169+
}
170+
171+
; negative test - must have matching operand
172+
173+
define <4 x float> @shuf_fadd_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) {
174+
; CHECK-LABEL: define <4 x float> @shuf_fadd_v4f32_no_common_op(
175+
; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] {
176+
; CHECK-NEXT: [[B0:%.*]] = fadd <4 x float> [[X]], [[Y]]
177+
; CHECK-NEXT: [[B1:%.*]] = fadd <4 x float> [[Z]], [[W]]
178+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
179+
; CHECK-NEXT: ret <4 x float> [[R]]
180+
;
181+
%b0 = fadd <4 x float> %x, %y
182+
%b1 = fadd <4 x float> %z, %w
183+
%r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
184+
ret <4 x float> %r
185+
}
186+
187+
; negative test - binops may be relatively cheap
188+
189+
define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
190+
; CHECK-LABEL: define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf(
191+
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]], <16 x i16> [[Z:%.*]]) #[[ATTR0]] {
192+
; CHECK-NEXT: [[B0:%.*]] = and <16 x i16> [[X]], [[Y]]
193+
; CHECK-NEXT: [[B1:%.*]] = and <16 x i16> [[Y]], [[Z]]
194+
; CHECK-NEXT: [[R:%.*]] = shufflevector <16 x i16> [[B0]], <16 x i16> [[B1]], <16 x i32> <i32 15, i32 22, i32 25, i32 13, i32 28, i32 0, i32 poison, i32 3, i32 0, i32 30, i32 3, i32 7, i32 9, i32 19, i32 2, i32 22>
195+
; CHECK-NEXT: ret <16 x i16> [[R]]
196+
;
197+
%b0 = and <16 x i16> %x, %y
198+
%b1 = and <16 x i16> %y, %z
199+
%r = shufflevector <16 x i16> %b0, <16 x i16> %b1, <16 x i32> <i32 15, i32 22, i32 25, i32 13, i32 28, i32 0, i32 poison, i32 3, i32 0, i32 30, i32 3, i32 7, i32 9, i32 19, i32 2, i32 22>
200+
ret <16 x i16> %r
201+
}
202+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
203+
; AVX: {{.*}}
204+
; SSE: {{.*}}

0 commit comments

Comments
 (0)