Skip to content

Commit 0e3cbe2

Browse files
committed
Add tests
1 parent 4a7a27c commit 0e3cbe2

File tree

2 files changed

+273
-0
lines changed

2 files changed

+273
-0
lines changed
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
7+
define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) {
8+
; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: movi v1.4s, #5
11+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
12+
; CHECK-NEXT: xtn v0.4h, v0.4s
13+
; CHECK-NEXT: umov w8, v0.h[1]
14+
; CHECK-NEXT: and w0, w8, #0x1
15+
; CHECK-NEXT: ret
16+
%icmp = icmp ult <4 x i32> %a, splat (i32 5)
17+
%ext = extractelement <4 x i1> %icmp, i32 1
18+
ret i1 %ext
19+
}
20+
21+
define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) {
22+
; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs:
23+
; CHECK: // %bb.0:
24+
; CHECK-NEXT: movi v1.4s, #7
25+
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
26+
; CHECK-NEXT: xtn v0.4h, v0.4s
27+
; CHECK-NEXT: umov w8, v0.h[1]
28+
; CHECK-NEXT: and w0, w8, #0x1
29+
; CHECK-NEXT: ret
30+
%icmp = icmp ult <4 x i32> splat(i32 7), %a
31+
%ext = extractelement <4 x i1> %icmp, i32 1
32+
ret i1 %ext
33+
}
34+
35+
define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) {
36+
; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs:
37+
; CHECK: // %bb.0:
38+
; CHECK-NEXT: adrp x8, .LCPI2_0
39+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
40+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
41+
; CHECK-NEXT: xtn v0.4h, v0.4s
42+
; CHECK-NEXT: umov w8, v0.h[1]
43+
; CHECK-NEXT: and w0, w8, #0x1
44+
; CHECK-NEXT: ret
45+
%icmp = icmp ult <4 x i32> %a, <i32 5, i32 234, i32 -1, i32 7>
46+
%ext = extractelement <4 x i1> %icmp, i32 1
47+
ret i1 %ext
48+
}
49+
50+
define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) {
51+
; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs:
52+
; CHECK: // %bb.0:
53+
; CHECK-NEXT: fmov v1.4s, #4.00000000
54+
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
55+
; CHECK-NEXT: mvn v0.16b, v0.16b
56+
; CHECK-NEXT: xtn v0.4h, v0.4s
57+
; CHECK-NEXT: umov w8, v0.h[1]
58+
; CHECK-NEXT: and w0, w8, #0x1
59+
; CHECK-NEXT: ret
60+
%fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0)
61+
%ext = extractelement <4 x i1> %fcmp, i32 1
62+
ret i1 %ext
63+
}
64+
65+
define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
66+
; CHECK-LABEL: vector_loop_with_icmp:
67+
; CHECK: // %bb.0: // %entry
68+
; CHECK-NEXT: index z0.d, #0, #1
69+
; CHECK-NEXT: mov w8, #15 // =0xf
70+
; CHECK-NEXT: mov w9, #4 // =0x4
71+
; CHECK-NEXT: dup v2.2d, x8
72+
; CHECK-NEXT: dup v3.2d, x9
73+
; CHECK-NEXT: add x9, x0, #8
74+
; CHECK-NEXT: mov w10, #16 // =0x10
75+
; CHECK-NEXT: mov w11, #1 // =0x1
76+
; CHECK-NEXT: mov z1.d, z0.d
77+
; CHECK-NEXT: add z1.d, z1.d, #2 // =0x2
78+
; CHECK-NEXT: b .LBB4_2
79+
; CHECK-NEXT: .LBB4_1: // %pred.store.continue18
80+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
81+
; CHECK-NEXT: add v1.2d, v1.2d, v3.2d
82+
; CHECK-NEXT: add v0.2d, v0.2d, v3.2d
83+
; CHECK-NEXT: subs x10, x10, #4
84+
; CHECK-NEXT: add x9, x9, #16
85+
; CHECK-NEXT: b.eq .LBB4_10
86+
; CHECK-NEXT: .LBB4_2: // %vector.body
87+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
88+
; CHECK-NEXT: cmhi v4.2d, v2.2d, v0.2d
89+
; CHECK-NEXT: xtn v4.2s, v4.2d
90+
; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
91+
; CHECK-NEXT: umov w12, v4.h[0]
92+
; CHECK-NEXT: tbz w12, #0, .LBB4_4
93+
; CHECK-NEXT: // %bb.3: // %pred.store.if
94+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
95+
; CHECK-NEXT: stur w11, [x9, #-8]
96+
; CHECK-NEXT: .LBB4_4: // %pred.store.continue
97+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
98+
; CHECK-NEXT: dup v4.2d, x8
99+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v0.2d
100+
; CHECK-NEXT: xtn v4.2s, v4.2d
101+
; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
102+
; CHECK-NEXT: umov w12, v4.h[1]
103+
; CHECK-NEXT: tbz w12, #0, .LBB4_6
104+
; CHECK-NEXT: // %bb.5: // %pred.store.if5
105+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
106+
; CHECK-NEXT: stur w11, [x9, #-4]
107+
; CHECK-NEXT: .LBB4_6: // %pred.store.continue6
108+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
109+
; CHECK-NEXT: dup v4.2d, x8
110+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v1.2d
111+
; CHECK-NEXT: xtn v4.2s, v4.2d
112+
; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
113+
; CHECK-NEXT: umov w12, v4.h[2]
114+
; CHECK-NEXT: tbz w12, #0, .LBB4_8
115+
; CHECK-NEXT: // %bb.7: // %pred.store.if7
116+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
117+
; CHECK-NEXT: str w11, [x9]
118+
; CHECK-NEXT: .LBB4_8: // %pred.store.continue8
119+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
120+
; CHECK-NEXT: dup v4.2d, x8
121+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v1.2d
122+
; CHECK-NEXT: xtn v4.2s, v4.2d
123+
; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
124+
; CHECK-NEXT: umov w12, v4.h[3]
125+
; CHECK-NEXT: tbz w12, #0, .LBB4_1
126+
; CHECK-NEXT: // %bb.9: // %pred.store.if9
127+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
128+
; CHECK-NEXT: str w11, [x9, #4]
129+
; CHECK-NEXT: b .LBB4_1
130+
; CHECK-NEXT: .LBB4_10: // %for.cond.cleanup
131+
; CHECK-NEXT: ret
132+
entry:
133+
br label %vector.body
134+
135+
vector.body:
136+
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue18 ]
137+
%vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %entry ], [ %vec.ind.next, %pred.store.continue18 ]
138+
%0 = icmp ult <4 x i64> %vec.ind, <i64 15, i64 15, i64 15, i64 15>
139+
%1 = extractelement <4 x i1> %0, i64 0
140+
br i1 %1, label %pred.store.if, label %pred.store.continue
141+
142+
pred.store.if:
143+
%2 = getelementptr inbounds i32, ptr %dest, i64 %index
144+
store i32 1, ptr %2, align 4
145+
br label %pred.store.continue
146+
147+
pred.store.continue:
148+
%3 = extractelement <4 x i1> %0, i64 1
149+
br i1 %3, label %pred.store.if5, label %pred.store.continue6
150+
151+
pred.store.if5:
152+
%4 = or disjoint i64 %index, 1
153+
%5 = getelementptr inbounds i32, ptr %dest, i64 %4
154+
store i32 1, ptr %5, align 4
155+
br label %pred.store.continue6
156+
157+
pred.store.continue6:
158+
%6 = extractelement <4 x i1> %0, i64 2
159+
br i1 %6, label %pred.store.if7, label %pred.store.continue8
160+
161+
pred.store.if7:
162+
%7 = or disjoint i64 %index, 2
163+
%8 = getelementptr inbounds i32, ptr %dest, i64 %7
164+
store i32 1, ptr %8, align 4
165+
br label %pred.store.continue8
166+
167+
pred.store.continue8:
168+
%9 = extractelement <4 x i1> %0, i64 3
169+
br i1 %9, label %pred.store.if9, label %pred.store.continue18
170+
171+
pred.store.if9:
172+
%10 = or disjoint i64 %index, 3
173+
%11 = getelementptr inbounds i32, ptr %dest, i64 %10
174+
store i32 1, ptr %11, align 4
175+
br label %pred.store.continue18
176+
177+
pred.store.continue18:
178+
%index.next = add i64 %index, 4
179+
%vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
180+
%24 = icmp eq i64 %index.next, 16
181+
br i1 %24, label %for.cond.cleanup, label %vector.body
182+
183+
for.cond.cleanup:
184+
ret void
185+
}
186+
187+
188+
; Negative tests
189+
190+
define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
191+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs:
192+
; CHECK: // %bb.0:
193+
; CHECK-NEXT: dup v1.4s, w0
194+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
195+
; CHECK-NEXT: xtn v0.4h, v0.4s
196+
; CHECK-NEXT: umov w8, v0.h[1]
197+
; CHECK-NEXT: and w0, w8, #0x1
198+
; CHECK-NEXT: ret
199+
%ins = insertelement <4 x i32> poison, i32 %b, i32 0
200+
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
201+
%icmp = icmp ult <4 x i32> %a, %splat
202+
%ext = extractelement <4 x i1> %icmp, i32 1
203+
ret i1 %ext
204+
}
205+
206+
define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
207+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
208+
; CHECK: // %bb.0:
209+
; CHECK-NEXT: movi v1.4s, #235
210+
; CHECK-NEXT: adrp x9, .LCPI6_0
211+
; CHECK-NEXT: mov x8, x0
212+
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_0]
213+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
214+
; CHECK-NEXT: xtn v1.4h, v0.4s
215+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
216+
; CHECK-NEXT: addv s0, v0.4s
217+
; CHECK-NEXT: umov w9, v1.h[1]
218+
; CHECK-NEXT: fmov w10, s0
219+
; CHECK-NEXT: and w0, w9, #0x1
220+
; CHECK-NEXT: strb w10, [x8]
221+
; CHECK-NEXT: ret
222+
%icmp = icmp ult <4 x i32> %a, splat(i32 235)
223+
%ext = extractelement <4 x i1> %icmp, i32 1
224+
store <4 x i1> %icmp, ptr %p, align 4
225+
ret i1 %ext
226+
}
227+
228+
define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) {
229+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx:
230+
; CHECK: // %bb.0:
231+
; CHECK-NEXT: sub sp, sp, #16
232+
; CHECK-NEXT: .cfi_def_cfa_offset 16
233+
; CHECK-NEXT: movi v1.4s, #127
234+
; CHECK-NEXT: add x8, sp, #8
235+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
236+
; CHECK-NEXT: bfi x8, x0, #1, #2
237+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
238+
; CHECK-NEXT: xtn v0.4h, v0.4s
239+
; CHECK-NEXT: str d0, [sp, #8]
240+
; CHECK-NEXT: ldrh w8, [x8]
241+
; CHECK-NEXT: and w0, w8, #0x1
242+
; CHECK-NEXT: add sp, sp, #16
243+
; CHECK-NEXT: ret
244+
%icmp = icmp ult <4 x i32> %a, splat(i32 127)
245+
%ext = extractelement <4 x i1> %icmp, i32 %c
246+
ret i1 %ext
247+
}

llvm/test/CodeGen/X86/vselect.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,3 +796,29 @@ define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) {
796796
ret i64 %4
797797
}
798798

799+
; Tests the scalarizeBinOp code in DAGCombiner
800+
define void @scalarize_binop(<1 x i1> %a) {
801+
; SSE-LABEL: scalarize_binop:
802+
; SSE: # %bb.0: # %bb0
803+
; SSE-NEXT: .p2align 4
804+
; SSE-NEXT: .LBB35_1: # %bb1
805+
; SSE-NEXT: # =>This Inner Loop Header: Depth=1
806+
; SSE-NEXT: jmp .LBB35_1
807+
;
808+
; AVX-LABEL: scalarize_binop:
809+
; AVX: # %bb.0: # %bb0
810+
; AVX-NEXT: .p2align 4
811+
; AVX-NEXT: .LBB35_1: # %bb1
812+
; AVX-NEXT: # =>This Inner Loop Header: Depth=1
813+
; AVX-NEXT: jmp .LBB35_1
814+
bb0:
815+
br label %bb1
816+
817+
bb1:
818+
%b = select <1 x i1> %a, <1 x i1> zeroinitializer, <1 x i1> splat (i1 true)
819+
br label %bb2
820+
821+
bb2:
822+
%c = extractelement <1 x i1> %b, i32 0
823+
br label %bb1
824+
}

0 commit comments

Comments
 (0)