Skip to content

Commit c5a21c1

Browse files
committed
[PhaseOrdering][X86] Add test coverage based off #111431
Add tests for the concatenation of boolean vectors bitcast to integers - similar to the MOVMSK pattern.
1 parent 5041d06 commit c5a21c1

File tree

1 file changed

+252
-0
lines changed

1 file changed

+252
-0
lines changed
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s
3+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s
4+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s
5+
; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
6+
7+
define i32 @movmsk_i32_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) {
8+
; CHECK-LABEL: @movmsk_i32_v32i8_v16i8(
9+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
10+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
11+
; CHECK-NEXT: [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
12+
; CHECK-NEXT: [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
13+
; CHECK-NEXT: [[Z0:%.*]] = zext i16 [[B0]] to i32
14+
; CHECK-NEXT: [[Z1:%.*]] = zext i16 [[B1]] to i32
15+
; CHECK-NEXT: [[S0:%.*]] = shl nuw i32 [[Z0]], 16
16+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
17+
; CHECK-NEXT: ret i32 [[OR]]
18+
;
19+
%c0 = icmp slt <16 x i8> %v0, zeroinitializer
20+
%c1 = icmp slt <16 x i8> %v1, zeroinitializer
21+
%b0 = bitcast <16 x i1> %c0 to i16
22+
%b1 = bitcast <16 x i1> %c1 to i16
23+
%z0 = zext i16 %b0 to i32
24+
%z1 = zext i16 %b1 to i32
25+
%s0 = shl nuw i32 %z0, 16
26+
%or = or disjoint i32 %s0, %z1
27+
ret i32 %or
28+
}
29+
30+
define i32 @movmsk_i32_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
31+
; CHECK-LABEL: @movmsk_i32_v8i32_v4i32(
32+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
33+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
34+
; CHECK-NEXT: [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
35+
; CHECK-NEXT: [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
36+
; CHECK-NEXT: [[Z0:%.*]] = zext i4 [[B0]] to i32
37+
; CHECK-NEXT: [[Z1:%.*]] = zext i4 [[B1]] to i32
38+
; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i32 [[Z0]], 4
39+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
40+
; CHECK-NEXT: ret i32 [[OR]]
41+
;
42+
%c0 = icmp slt <4 x i32> %v0, zeroinitializer
43+
%c1 = icmp slt <4 x i32> %v1, zeroinitializer
44+
%b0 = bitcast <4 x i1> %c0 to i4
45+
%b1 = bitcast <4 x i1> %c1 to i4
46+
%z0 = zext i4 %b0 to i32
47+
%z1 = zext i4 %b1 to i32
48+
%s0 = shl nuw i32 %z0, 4
49+
%or = or disjoint i32 %s0, %z1
50+
ret i32 %or
51+
}
52+
53+
define i64 @movmsk_i64_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) {
54+
; CHECK-LABEL: @movmsk_i64_v32i8_v16i8(
55+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
56+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
57+
; CHECK-NEXT: [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
58+
; CHECK-NEXT: [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
59+
; CHECK-NEXT: [[Z0:%.*]] = zext i16 [[B0]] to i64
60+
; CHECK-NEXT: [[Z1:%.*]] = zext i16 [[B1]] to i64
61+
; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 16
62+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
63+
; CHECK-NEXT: ret i64 [[OR]]
64+
;
65+
%c0 = icmp slt <16 x i8> %v0, zeroinitializer
66+
%c1 = icmp slt <16 x i8> %v1, zeroinitializer
67+
%b0 = bitcast <16 x i1> %c0 to i16
68+
%b1 = bitcast <16 x i1> %c1 to i16
69+
%z0 = zext i16 %b0 to i64
70+
%z1 = zext i16 %b1 to i64
71+
%s0 = shl nuw i64 %z0, 16
72+
%or = or disjoint i64 %s0, %z1
73+
ret i64 %or
74+
}
75+
76+
define i64 @movmsk_i64_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
77+
; CHECK-LABEL: @movmsk_i64_v8i32_v4i32(
78+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
79+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
80+
; CHECK-NEXT: [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
81+
; CHECK-NEXT: [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
82+
; CHECK-NEXT: [[Z0:%.*]] = zext i4 [[B0]] to i64
83+
; CHECK-NEXT: [[Z1:%.*]] = zext i4 [[B1]] to i64
84+
; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 4
85+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
86+
; CHECK-NEXT: ret i64 [[OR]]
87+
;
88+
%c0 = icmp slt <4 x i32> %v0, zeroinitializer
89+
%c1 = icmp slt <4 x i32> %v1, zeroinitializer
90+
%b0 = bitcast <4 x i1> %c0 to i4
91+
%b1 = bitcast <4 x i1> %c1 to i4
92+
%z0 = zext i4 %b0 to i64
93+
%z1 = zext i4 %b1 to i64
94+
%s0 = shl nuw i64 %z0, 4
95+
%or = or disjoint i64 %s0, %z1
96+
ret i64 %or
97+
}
98+
99+
define i64 @movmsk_i64_v64i8_v16i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
100+
; CHECK-LABEL: @movmsk_i64_v64i8_v16i8(
101+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
102+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
103+
; CHECK-NEXT: [[C2:%.*]] = icmp slt <16 x i8> [[V2:%.*]], zeroinitializer
104+
; CHECK-NEXT: [[C3:%.*]] = icmp slt <16 x i8> [[V3:%.*]], zeroinitializer
105+
; CHECK-NEXT: [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
106+
; CHECK-NEXT: [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
107+
; CHECK-NEXT: [[B2:%.*]] = bitcast <16 x i1> [[C2]] to i16
108+
; CHECK-NEXT: [[B3:%.*]] = bitcast <16 x i1> [[C3]] to i16
109+
; CHECK-NEXT: [[Z0:%.*]] = zext i16 [[B0]] to i64
110+
; CHECK-NEXT: [[Z1:%.*]] = zext i16 [[B1]] to i64
111+
; CHECK-NEXT: [[Z2:%.*]] = zext i16 [[B2]] to i64
112+
; CHECK-NEXT: [[Z3:%.*]] = zext i16 [[B3]] to i64
113+
; CHECK-NEXT: [[S0:%.*]] = shl nuw i64 [[Z0]], 48
114+
; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 32
115+
; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16
116+
; CHECK-NEXT: [[OR0:%.*]] = or disjoint i64 [[S1]], [[S0]]
117+
; CHECK-NEXT: [[OR1:%.*]] = or disjoint i64 [[S2]], [[Z3]]
118+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[OR1]], [[OR0]]
119+
; CHECK-NEXT: ret i64 [[OR]]
120+
;
121+
%c0 = icmp slt <16 x i8> %v0, zeroinitializer
122+
%c1 = icmp slt <16 x i8> %v1, zeroinitializer
123+
%c2 = icmp slt <16 x i8> %v2, zeroinitializer
124+
%c3 = icmp slt <16 x i8> %v3, zeroinitializer
125+
%b0 = bitcast <16 x i1> %c0 to i16
126+
%b1 = bitcast <16 x i1> %c1 to i16
127+
%b2 = bitcast <16 x i1> %c2 to i16
128+
%b3 = bitcast <16 x i1> %c3 to i16
129+
%z0 = zext i16 %b0 to i64
130+
%z1 = zext i16 %b1 to i64
131+
%z2 = zext i16 %b2 to i64
132+
%z3 = zext i16 %b3 to i64
133+
%s0 = shl nuw i64 %z0, 48
134+
%s1 = shl nuw i64 %z1, 32
135+
%s2 = shl nuw i64 %z2, 16
136+
%or0 = or disjoint i64 %s0, %s1
137+
%or1 = or disjoint i64 %s2, %z3
138+
%or = or disjoint i64 %or0, %or1
139+
ret i64 %or
140+
}
141+
142+
define i64 @movmsk_i64_v32i32_v4i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
143+
; CHECK-LABEL: @movmsk_i64_v32i32_v4i32(
144+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
145+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
146+
; CHECK-NEXT: [[C2:%.*]] = icmp slt <4 x i32> [[V2:%.*]], zeroinitializer
147+
; CHECK-NEXT: [[C3:%.*]] = icmp slt <4 x i32> [[V3:%.*]], zeroinitializer
148+
; CHECK-NEXT: [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
149+
; CHECK-NEXT: [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
150+
; CHECK-NEXT: [[B2:%.*]] = bitcast <4 x i1> [[C2]] to i4
151+
; CHECK-NEXT: [[B3:%.*]] = bitcast <4 x i1> [[C3]] to i4
152+
; CHECK-NEXT: [[Z0:%.*]] = zext i4 [[B0]] to i64
153+
; CHECK-NEXT: [[Z1:%.*]] = zext i4 [[B1]] to i64
154+
; CHECK-NEXT: [[Z2:%.*]] = zext i4 [[B2]] to i64
155+
; CHECK-NEXT: [[Z3:%.*]] = zext i4 [[B3]] to i64
156+
; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 12
157+
; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8
158+
; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 4
159+
; CHECK-NEXT: [[OR0:%.*]] = or disjoint i64 [[S1]], [[S0]]
160+
; CHECK-NEXT: [[OR1:%.*]] = or disjoint i64 [[S2]], [[Z3]]
161+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[OR1]], [[OR0]]
162+
; CHECK-NEXT: ret i64 [[OR]]
163+
;
164+
%c0 = icmp slt <4 x i32> %v0, zeroinitializer
165+
%c1 = icmp slt <4 x i32> %v1, zeroinitializer
166+
%c2 = icmp slt <4 x i32> %v2, zeroinitializer
167+
%c3 = icmp slt <4 x i32> %v3, zeroinitializer
168+
%b0 = bitcast <4 x i1> %c0 to i4
169+
%b1 = bitcast <4 x i1> %c1 to i4
170+
%b2 = bitcast <4 x i1> %c2 to i4
171+
%b3 = bitcast <4 x i1> %c3 to i4
172+
%z0 = zext i4 %b0 to i64
173+
%z1 = zext i4 %b1 to i64
174+
%z2 = zext i4 %b2 to i64
175+
%z3 = zext i4 %b3 to i64
176+
%s0 = shl nuw i64 %z0, 12
177+
%s1 = shl nuw i64 %z1, 8
178+
%s2 = shl nuw i64 %z2, 4
179+
%or0 = or disjoint i64 %s0, %s1
180+
%or1 = or disjoint i64 %s2, %z3
181+
%or = or disjoint i64 %or0, %or1
182+
ret i64 %or
183+
}
184+
185+
define i64 @movmsk_i64_v64i8_v32i8(<32 x i8> %v0, <32 x i8> %v1) {
186+
; CHECK-LABEL: @movmsk_i64_v64i8_v32i8(
187+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <32 x i8> [[V0:%.*]], zeroinitializer
188+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <32 x i8> [[V1:%.*]], zeroinitializer
189+
; CHECK-NEXT: [[B0:%.*]] = bitcast <32 x i1> [[C0]] to i32
190+
; CHECK-NEXT: [[B1:%.*]] = bitcast <32 x i1> [[C1]] to i32
191+
; CHECK-NEXT: [[Z0:%.*]] = zext i32 [[B0]] to i64
192+
; CHECK-NEXT: [[Z1:%.*]] = zext i32 [[B1]] to i64
193+
; CHECK-NEXT: [[S0:%.*]] = shl nuw i64 [[Z0]], 32
194+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
195+
; CHECK-NEXT: ret i64 [[OR]]
196+
;
197+
%c0 = icmp slt <32 x i8> %v0, zeroinitializer
198+
%c1 = icmp slt <32 x i8> %v1, zeroinitializer
199+
%b0 = bitcast <32 x i1> %c0 to i32
200+
%b1 = bitcast <32 x i1> %c1 to i32
201+
%z0 = zext i32 %b0 to i64
202+
%z1 = zext i32 %b1 to i64
203+
%s0 = shl nuw i64 %z0, 32
204+
%or = or disjoint i64 %s0, %z1
205+
ret i64 %or
206+
}
207+
208+
define i32 @movmsk_i32_v16i32_v8i32(<8 x i32> %v0, <8 x i32> %v1) {
209+
; CHECK-LABEL: @movmsk_i32_v16i32_v8i32(
210+
; CHECK-NEXT: [[C0:%.*]] = icmp slt <8 x i32> [[V0:%.*]], zeroinitializer
211+
; CHECK-NEXT: [[C1:%.*]] = icmp slt <8 x i32> [[V1:%.*]], zeroinitializer
212+
; CHECK-NEXT: [[B0:%.*]] = bitcast <8 x i1> [[C0]] to i8
213+
; CHECK-NEXT: [[B1:%.*]] = bitcast <8 x i1> [[C1]] to i8
214+
; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[B0]] to i32
215+
; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[B1]] to i32
216+
; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i32 [[Z0]], 8
217+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
218+
; CHECK-NEXT: ret i32 [[OR]]
219+
;
220+
%c0 = icmp slt <8 x i32> %v0, zeroinitializer
221+
%c1 = icmp slt <8 x i32> %v1, zeroinitializer
222+
%b0 = bitcast <8 x i1> %c0 to i8
223+
%b1 = bitcast <8 x i1> %c1 to i8
224+
%z0 = zext i8 %b0 to i32
225+
%z1 = zext i8 %b1 to i32
226+
%s0 = shl nuw i32 %z0, 8
227+
%or = or disjoint i32 %s0, %z1
228+
ret i32 %or
229+
}
230+
231+
define i64 @PR111431(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
232+
; CHECK-LABEL: @PR111431(
233+
; CHECK-NEXT: [[C01:%.*]] = icmp eq <32 x i8> [[A0:%.*]], [[A1:%.*]]
234+
; CHECK-NEXT: [[C02:%.*]] = icmp eq <32 x i8> [[A0]], [[A2:%.*]]
235+
; CHECK-NEXT: [[B01:%.*]] = bitcast <32 x i1> [[C01]] to i32
236+
; CHECK-NEXT: [[B02:%.*]] = bitcast <32 x i1> [[C02]] to i32
237+
; CHECK-NEXT: [[Z01:%.*]] = zext i32 [[B01]] to i64
238+
; CHECK-NEXT: [[Z02:%.*]] = zext i32 [[B02]] to i64
239+
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[Z01]], 32
240+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[SHL]], [[Z02]]
241+
; CHECK-NEXT: ret i64 [[OR]]
242+
;
243+
%c01 = icmp eq <32 x i8> %a0, %a1
244+
%c02 = icmp eq <32 x i8> %a0, %a2
245+
%b01 = bitcast <32 x i1> %c01 to i32
246+
%b02 = bitcast <32 x i1> %c02 to i32
247+
%z01 = zext i32 %b01 to i64
248+
%z02 = zext i32 %b02 to i64
249+
%shl = shl nuw i64 %z01, 32
250+
%or = or disjoint i64 %shl, %z02
251+
ret i64 %or
252+
}

0 commit comments

Comments
 (0)