Skip to content

Commit 608fee8

Browse files
committed
Add tests
1 parent c72a751 commit 608fee8

File tree

2 files changed

+298
-0
lines changed

2 files changed

+298
-0
lines changed
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define <4 x i32> @no_sink_simple(<4 x i32> %a, <4 x i32> %b, i1 %c, ptr %p) {
7+
; CHECK-LABEL: no_sink_simple:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: tbz w0, #0, .LBB0_2
10+
; CHECK-NEXT: // %bb.1: // %s
11+
; CHECK-NEXT: cmgt v1.4s, v1.4s, v0.4s
12+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
13+
; CHECK-NEXT: str q0, [x1]
14+
; CHECK-NEXT: ret
15+
; CHECK-NEXT: .LBB0_2: // %t
16+
; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
17+
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
18+
; CHECK-NEXT: ret
19+
%d = icmp slt <4 x i32> %a, %b
20+
br i1 %c, label %s, label %t
21+
22+
s:
23+
%s1 = select <4 x i1> %d, <4 x i32> %a, <4 x i32> zeroinitializer
24+
store <4 x i32> %s1, ptr %p
25+
ret <4 x i32> %s1
26+
27+
t:
28+
%s2 = select <4 x i1> %d, <4 x i32> %b, <4 x i32> zeroinitializer
29+
ret <4 x i32> %s2
30+
}
31+
32+
define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
33+
; CHECK-LABEL: vector_loop_with_icmp:
34+
; CHECK: // %bb.0: // %entry
35+
; CHECK-NEXT: mov w8, #15 // =0xf
36+
; CHECK-NEXT: mov w10, #4 // =0x4
37+
; CHECK-NEXT: adrp x9, .LCPI1_0
38+
; CHECK-NEXT: adrp x11, .LCPI1_1
39+
; CHECK-NEXT: dup v0.2d, x8
40+
; CHECK-NEXT: dup v1.2d, x10
41+
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_0]
42+
; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI1_1]
43+
; CHECK-NEXT: add x9, x0, #8
44+
; CHECK-NEXT: mov w10, #16 // =0x10
45+
; CHECK-NEXT: mov w11, #1 // =0x1
46+
; CHECK-NEXT: b .LBB1_2
47+
; CHECK-NEXT: .LBB1_1: // %pred.store.continue18
48+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
49+
; CHECK-NEXT: add v2.2d, v2.2d, v1.2d
50+
; CHECK-NEXT: add v3.2d, v3.2d, v1.2d
51+
; CHECK-NEXT: subs x10, x10, #4
52+
; CHECK-NEXT: add x9, x9, #16
53+
; CHECK-NEXT: b.eq .LBB1_10
54+
; CHECK-NEXT: .LBB1_2: // %vector.body
55+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
56+
; CHECK-NEXT: cmhi v4.2d, v0.2d, v3.2d
57+
; CHECK-NEXT: xtn v4.2s, v4.2d
58+
; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
59+
; CHECK-NEXT: umov w12, v4.h[0]
60+
; CHECK-NEXT: tbz w12, #0, .LBB1_4
61+
; CHECK-NEXT: // %bb.3: // %pred.store.if
62+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
63+
; CHECK-NEXT: stur w11, [x9, #-8]
64+
; CHECK-NEXT: .LBB1_4: // %pred.store.continue
65+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
66+
; CHECK-NEXT: dup v4.2d, x8
67+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v3.2d
68+
; CHECK-NEXT: xtn v4.2s, v4.2d
69+
; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
70+
; CHECK-NEXT: umov w12, v4.h[1]
71+
; CHECK-NEXT: tbz w12, #0, .LBB1_6
72+
; CHECK-NEXT: // %bb.5: // %pred.store.if5
73+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
74+
; CHECK-NEXT: stur w11, [x9, #-4]
75+
; CHECK-NEXT: .LBB1_6: // %pred.store.continue6
76+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
77+
; CHECK-NEXT: dup v4.2d, x8
78+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
79+
; CHECK-NEXT: xtn v4.2s, v4.2d
80+
; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
81+
; CHECK-NEXT: umov w12, v4.h[2]
82+
; CHECK-NEXT: tbz w12, #0, .LBB1_8
83+
; CHECK-NEXT: // %bb.7: // %pred.store.if7
84+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
85+
; CHECK-NEXT: str w11, [x9]
86+
; CHECK-NEXT: .LBB1_8: // %pred.store.continue8
87+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
88+
; CHECK-NEXT: dup v4.2d, x8
89+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
90+
; CHECK-NEXT: xtn v4.2s, v4.2d
91+
; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
92+
; CHECK-NEXT: umov w12, v4.h[3]
93+
; CHECK-NEXT: tbz w12, #0, .LBB1_1
94+
; CHECK-NEXT: // %bb.9: // %pred.store.if9
95+
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
96+
; CHECK-NEXT: str w11, [x9, #4]
97+
; CHECK-NEXT: b .LBB1_1
98+
; CHECK-NEXT: .LBB1_10: // %for.cond.cleanup
99+
; CHECK-NEXT: ret
100+
entry:
101+
br label %vector.body
102+
103+
vector.body:
104+
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue18 ]
105+
%vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %entry ], [ %vec.ind.next, %pred.store.continue18 ]
106+
%0 = icmp ult <4 x i64> %vec.ind, <i64 15, i64 15, i64 15, i64 15>
107+
%1 = extractelement <4 x i1> %0, i64 0
108+
br i1 %1, label %pred.store.if, label %pred.store.continue
109+
110+
pred.store.if:
111+
%2 = getelementptr inbounds i32, ptr %dest, i64 %index
112+
store i32 1, ptr %2, align 4
113+
br label %pred.store.continue
114+
115+
pred.store.continue:
116+
%3 = extractelement <4 x i1> %0, i64 1
117+
br i1 %3, label %pred.store.if5, label %pred.store.continue6
118+
119+
pred.store.if5:
120+
%4 = or disjoint i64 %index, 1
121+
%5 = getelementptr inbounds i32, ptr %dest, i64 %4
122+
store i32 1, ptr %5, align 4
123+
br label %pred.store.continue6
124+
125+
pred.store.continue6:
126+
%6 = extractelement <4 x i1> %0, i64 2
127+
br i1 %6, label %pred.store.if7, label %pred.store.continue8
128+
129+
pred.store.if7:
130+
%7 = or disjoint i64 %index, 2
131+
%8 = getelementptr inbounds i32, ptr %dest, i64 %7
132+
store i32 1, ptr %8, align 4
133+
br label %pred.store.continue8
134+
135+
pred.store.continue8:
136+
%9 = extractelement <4 x i1> %0, i64 3
137+
br i1 %9, label %pred.store.if9, label %pred.store.continue18
138+
139+
pred.store.if9:
140+
%10 = or disjoint i64 %index, 3
141+
%11 = getelementptr inbounds i32, ptr %dest, i64 %10
142+
store i32 1, ptr %11, align 4
143+
br label %pred.store.continue18
144+
145+
pred.store.continue18:
146+
%index.next = add i64 %index, 4
147+
%vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
148+
%24 = icmp eq i64 %index.next, 16
149+
br i1 %24, label %for.cond.cleanup, label %vector.body
150+
151+
for.cond.cleanup:
152+
ret void
153+
}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve < %s | FileCheck %s
3+
4+
define arm_aapcs_vfpcc <4 x i32> @no_sink_simple(<4 x i32> %a, <4 x i32> %b, i1 %c, ptr %p) {
5+
; CHECK-LABEL: no_sink_simple:
6+
; CHECK: @ %bb.0:
7+
; CHECK-NEXT: lsls r0, r0, #31
8+
; CHECK-NEXT: beq .LBB0_2
9+
; CHECK-NEXT: @ %bb.1: @ %s
10+
; CHECK-NEXT: vcmp.s32 gt, q1, q0
11+
; CHECK-NEXT: vmov.i32 q1, #0x0
12+
; CHECK-NEXT: vpsel q0, q0, q1
13+
; CHECK-NEXT: vstrw.32 q0, [r1]
14+
; CHECK-NEXT: bx lr
15+
; CHECK-NEXT: .LBB0_2: @ %t
16+
; CHECK-NEXT: vcmp.s32 gt, q1, q0
17+
; CHECK-NEXT: vmov.i32 q0, #0x0
18+
; CHECK-NEXT: vpsel q0, q1, q0
19+
; CHECK-NEXT: bx lr
20+
%d = icmp slt <4 x i32> %a, %b
21+
br i1 %c, label %s, label %t
22+
23+
s:
24+
%s1 = select <4 x i1> %d, <4 x i32> %a, <4 x i32> zeroinitializer
25+
store <4 x i32> %s1, ptr %p
26+
ret <4 x i32> %s1
27+
28+
t:
29+
%s2 = select <4 x i1> %d, <4 x i32> %b, <4 x i32> zeroinitializer
30+
ret <4 x i32> %s2
31+
}
32+
33+
define arm_aapcs_vfpcc void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
34+
; CHECK-LABEL: vector_loop_with_icmp:
35+
; CHECK: @ %bb.0: @ %entry
36+
; CHECK-NEXT: push {r7, lr}
37+
; CHECK-NEXT: vpush {d8, d9}
38+
; CHECK-NEXT: adr r1, .LCPI1_0
39+
; CHECK-NEXT: vmov.i32 q1, #0xf
40+
; CHECK-NEXT: vldrw.u32 q0, [r1]
41+
; CHECK-NEXT: movs r1, #0
42+
; CHECK-NEXT: mov.w lr, #1
43+
; CHECK-NEXT: vmov.i32 q2, #0xf
44+
; CHECK-NEXT: vmov.i32 q3, #0xf
45+
; CHECK-NEXT: vmov.i32 q4, #0xf
46+
; CHECK-NEXT: mov.w r12, #4
47+
; CHECK-NEXT: movs r3, #0
48+
; CHECK-NEXT: .LBB1_1: @ %vector.body
49+
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
50+
; CHECK-NEXT: vcmp.u32 hi, q1, q0
51+
; CHECK-NEXT: vmrs r2, p0
52+
; CHECK-NEXT: vcmp.u32 hi, q2, q0
53+
; CHECK-NEXT: lsls r2, r2, #31
54+
; CHECK-NEXT: it ne
55+
; CHECK-NEXT: strne.w lr, [r0, r1, lsl #2]
56+
; CHECK-NEXT: vmrs r2, p0
57+
; CHECK-NEXT: vcmp.u32 hi, q3, q0
58+
; CHECK-NEXT: lsrs r2, r2, #4
59+
; CHECK-NEXT: lsls r2, r2, #31
60+
; CHECK-NEXT: itt ne
61+
; CHECK-NEXT: orrne r2, r1, #1
62+
; CHECK-NEXT: strne.w lr, [r0, r2, lsl #2]
63+
; CHECK-NEXT: vmrs r2, p0
64+
; CHECK-NEXT: vcmp.u32 hi, q4, q0
65+
; CHECK-NEXT: vadd.i32 q0, q0, r12
66+
; CHECK-NEXT: lsrs r2, r2, #8
67+
; CHECK-NEXT: lsls r2, r2, #31
68+
; CHECK-NEXT: itt ne
69+
; CHECK-NEXT: orrne r2, r1, #2
70+
; CHECK-NEXT: strne.w lr, [r0, r2, lsl #2]
71+
; CHECK-NEXT: vmrs r2, p0
72+
; CHECK-NEXT: lsrs r2, r2, #12
73+
; CHECK-NEXT: lsls r2, r2, #31
74+
; CHECK-NEXT: itt ne
75+
; CHECK-NEXT: orrne r2, r1, #3
76+
; CHECK-NEXT: strne.w lr, [r0, r2, lsl #2]
77+
; CHECK-NEXT: adds r1, #4
78+
; CHECK-NEXT: adc r3, r3, #0
79+
; CHECK-NEXT: eor r2, r1, #16
80+
; CHECK-NEXT: orrs r2, r3
81+
; CHECK-NEXT: bne .LBB1_1
82+
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
83+
; CHECK-NEXT: vpop {d8, d9}
84+
; CHECK-NEXT: pop {r7, pc}
85+
; CHECK-NEXT: .p2align 4
86+
; CHECK-NEXT: @ %bb.3:
87+
; CHECK-NEXT: .LCPI1_0:
88+
; CHECK-NEXT: .long 0 @ 0x0
89+
; CHECK-NEXT: .long 1 @ 0x1
90+
; CHECK-NEXT: .long 2 @ 0x2
91+
; CHECK-NEXT: .long 3 @ 0x3
92+
entry:
93+
br label %vector.body
94+
95+
vector.body:
96+
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue18 ]
97+
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %vec.ind.next, %pred.store.continue18 ]
98+
%0 = icmp ult <4 x i32> %vec.ind, <i32 15, i32 15, i32 15, i32 15>
99+
%1 = extractelement <4 x i1> %0, i64 0
100+
br i1 %1, label %pred.store.if, label %pred.store.continue
101+
102+
pred.store.if:
103+
%2 = getelementptr inbounds i32, ptr %dest, i64 %index
104+
store i32 1, ptr %2, align 4
105+
br label %pred.store.continue
106+
107+
pred.store.continue:
108+
%3 = extractelement <4 x i1> %0, i64 1
109+
br i1 %3, label %pred.store.if5, label %pred.store.continue6
110+
111+
pred.store.if5:
112+
%4 = or disjoint i64 %index, 1
113+
%5 = getelementptr inbounds i32, ptr %dest, i64 %4
114+
store i32 1, ptr %5, align 4
115+
br label %pred.store.continue6
116+
117+
pred.store.continue6:
118+
%6 = extractelement <4 x i1> %0, i64 2
119+
br i1 %6, label %pred.store.if7, label %pred.store.continue8
120+
121+
pred.store.if7:
122+
%7 = or disjoint i64 %index, 2
123+
%8 = getelementptr inbounds i32, ptr %dest, i64 %7
124+
store i32 1, ptr %8, align 4
125+
br label %pred.store.continue8
126+
127+
pred.store.continue8:
128+
%9 = extractelement <4 x i1> %0, i64 3
129+
br i1 %9, label %pred.store.if9, label %pred.store.continue18
130+
131+
pred.store.if9:
132+
%10 = or disjoint i64 %index, 3
133+
%11 = getelementptr inbounds i32, ptr %dest, i64 %10
134+
store i32 1, ptr %11, align 4
135+
br label %pred.store.continue18
136+
137+
pred.store.continue18:
138+
%index.next = add i64 %index, 4
139+
%vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
140+
%24 = icmp eq i64 %index.next, 16
141+
br i1 %24, label %for.cond.cleanup, label %vector.body
142+
143+
for.cond.cleanup:
144+
ret void
145+
}

0 commit comments

Comments
 (0)