1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
2
+ ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
3
3
4
4
define arm_aapcs_vfpcc <4 x i32 > @sext_v4i1_v4i32 (<4 x i32 > %src ) {
5
5
; CHECK-LABEL: sext_v4i1_v4i32:
6
6
; CHECK: @ %bb.0: @ %entry
7
+ ; CHECK-NEXT: vmov.i32 q1, #0x0
8
+ ; CHECK-NEXT: vmov.i8 q2, #0xff
7
9
; CHECK-NEXT: vcmp.s32 gt, q0, zr
8
- ; CHECK-NEXT: vmov.i32 q0, #0x0
9
- ; CHECK-NEXT: vmov.i8 q1, #0xff
10
- ; CHECK-NEXT: vpsel q0, q1, q0
10
+ ; CHECK-NEXT: vpsel q0, q2, q1
11
11
; CHECK-NEXT: bx lr
12
12
entry:
13
13
%c = icmp sgt <4 x i32 > %src , zeroinitializer
@@ -18,10 +18,10 @@ entry:
18
18
define arm_aapcs_vfpcc <8 x i16 > @sext_v8i1_v8i16 (<8 x i16 > %src ) {
19
19
; CHECK-LABEL: sext_v8i1_v8i16:
20
20
; CHECK: @ %bb.0: @ %entry
21
+ ; CHECK-NEXT: vmov.i16 q1, #0x0
22
+ ; CHECK-NEXT: vmov.i8 q2, #0xff
21
23
; CHECK-NEXT: vcmp.s16 gt, q0, zr
22
- ; CHECK-NEXT: vmov.i16 q0, #0x0
23
- ; CHECK-NEXT: vmov.i8 q1, #0xff
24
- ; CHECK-NEXT: vpsel q0, q1, q0
24
+ ; CHECK-NEXT: vpsel q0, q2, q1
25
25
; CHECK-NEXT: bx lr
26
26
entry:
27
27
%c = icmp sgt <8 x i16 > %src , zeroinitializer
@@ -32,10 +32,10 @@ entry:
32
32
define arm_aapcs_vfpcc <16 x i8 > @sext_v16i1_v16i8 (<16 x i8 > %src ) {
33
33
; CHECK-LABEL: sext_v16i1_v16i8:
34
34
; CHECK: @ %bb.0: @ %entry
35
+ ; CHECK-NEXT: vmov.i8 q1, #0x0
36
+ ; CHECK-NEXT: vmov.i8 q2, #0xff
35
37
; CHECK-NEXT: vcmp.s8 gt, q0, zr
36
- ; CHECK-NEXT: vmov.i8 q0, #0x0
37
- ; CHECK-NEXT: vmov.i8 q1, #0xff
38
- ; CHECK-NEXT: vpsel q0, q1, q0
38
+ ; CHECK-NEXT: vpsel q0, q2, q1
39
39
; CHECK-NEXT: bx lr
40
40
entry:
41
41
%c = icmp sgt <16 x i8 > %src , zeroinitializer
@@ -46,31 +46,30 @@ entry:
46
46
define arm_aapcs_vfpcc <2 x i64 > @sext_v2i1_v2i64 (<2 x i64 > %src ) {
47
47
; CHECK-LABEL: sext_v2i1_v2i64:
48
48
; CHECK: @ %bb.0: @ %entry
49
- ; CHECK-NEXT: vmov r1, s0
49
+ ; CHECK-NEXT: vmov r1, s2
50
50
; CHECK-NEXT: movs r2, #0
51
- ; CHECK-NEXT: vmov r0, s1
51
+ ; CHECK-NEXT: vmov r0, s3
52
+ ; CHECK-NEXT: vmov r3, s0
52
53
; CHECK-NEXT: rsbs r1, r1, #0
54
+ ; CHECK-NEXT: vmov r1, s1
53
55
; CHECK-NEXT: sbcs.w r0, r2, r0
54
- ; CHECK-NEXT: vmov r1, s2
55
56
; CHECK-NEXT: mov.w r0, #0
56
57
; CHECK-NEXT: it lt
57
58
; CHECK-NEXT: movlt r0, #1
58
59
; CHECK-NEXT: cmp r0, #0
59
60
; CHECK-NEXT: it ne
60
61
; CHECK-NEXT: movne.w r0, #-1
61
- ; CHECK-NEXT: vmov.32 q1[0], r0
62
- ; CHECK-NEXT: vmov.32 q1[1], r0
63
- ; CHECK-NEXT: vmov r0, s3
64
- ; CHECK-NEXT: rsbs r1, r1, #0
65
- ; CHECK-NEXT: sbcs.w r0, r2, r0
62
+ ; CHECK-NEXT: rsbs r3, r3, #0
63
+ ; CHECK-NEXT: sbcs.w r1, r2, r1
66
64
; CHECK-NEXT: it lt
67
65
; CHECK-NEXT: movlt r2, #1
68
66
; CHECK-NEXT: cmp r2, #0
69
67
; CHECK-NEXT: it ne
70
68
; CHECK-NEXT: movne.w r2, #-1
71
- ; CHECK-NEXT: vmov.32 q1[2], r2
72
- ; CHECK-NEXT: vmov.32 q1[3], r2
73
- ; CHECK-NEXT: vmov q0, q1
69
+ ; CHECK-NEXT: vmov.32 q0[0], r2
70
+ ; CHECK-NEXT: vmov.32 q0[1], r2
71
+ ; CHECK-NEXT: vmov.32 q0[2], r0
72
+ ; CHECK-NEXT: vmov.32 q0[3], r0
74
73
; CHECK-NEXT: bx lr
75
74
entry:
76
75
%c = icmp sgt <2 x i64 > %src , zeroinitializer
@@ -82,10 +81,10 @@ entry:
82
81
define arm_aapcs_vfpcc <4 x i32 > @zext_v4i1_v4i32 (<4 x i32 > %src ) {
83
82
; CHECK-LABEL: zext_v4i1_v4i32:
84
83
; CHECK: @ %bb.0: @ %entry
84
+ ; CHECK-NEXT: vmov.i32 q1, #0x0
85
+ ; CHECK-NEXT: vmov.i32 q2, #0x1
85
86
; CHECK-NEXT: vcmp.s32 gt, q0, zr
86
- ; CHECK-NEXT: vmov.i32 q0, #0x0
87
- ; CHECK-NEXT: vmov.i32 q1, #0x1
88
- ; CHECK-NEXT: vpsel q0, q1, q0
87
+ ; CHECK-NEXT: vpsel q0, q2, q1
89
88
; CHECK-NEXT: bx lr
90
89
entry:
91
90
%c = icmp sgt <4 x i32 > %src , zeroinitializer
@@ -96,10 +95,10 @@ entry:
96
95
define arm_aapcs_vfpcc <8 x i16 > @zext_v8i1_v8i16 (<8 x i16 > %src ) {
97
96
; CHECK-LABEL: zext_v8i1_v8i16:
98
97
; CHECK: @ %bb.0: @ %entry
98
+ ; CHECK-NEXT: vmov.i16 q1, #0x0
99
+ ; CHECK-NEXT: vmov.i16 q2, #0x1
99
100
; CHECK-NEXT: vcmp.s16 gt, q0, zr
100
- ; CHECK-NEXT: vmov.i16 q0, #0x0
101
- ; CHECK-NEXT: vmov.i16 q1, #0x1
102
- ; CHECK-NEXT: vpsel q0, q1, q0
101
+ ; CHECK-NEXT: vpsel q0, q2, q1
103
102
; CHECK-NEXT: bx lr
104
103
entry:
105
104
%c = icmp sgt <8 x i16 > %src , zeroinitializer
@@ -110,10 +109,10 @@ entry:
110
109
define arm_aapcs_vfpcc <16 x i8 > @zext_v16i1_v16i8 (<16 x i8 > %src ) {
111
110
; CHECK-LABEL: zext_v16i1_v16i8:
112
111
; CHECK: @ %bb.0: @ %entry
112
+ ; CHECK-NEXT: vmov.i8 q1, #0x0
113
+ ; CHECK-NEXT: vmov.i8 q2, #0x1
113
114
; CHECK-NEXT: vcmp.s8 gt, q0, zr
114
- ; CHECK-NEXT: vmov.i8 q0, #0x0
115
- ; CHECK-NEXT: vmov.i8 q1, #0x1
116
- ; CHECK-NEXT: vpsel q0, q1, q0
115
+ ; CHECK-NEXT: vpsel q0, q2, q1
117
116
; CHECK-NEXT: bx lr
118
117
entry:
119
118
%c = icmp sgt <16 x i8 > %src , zeroinitializer
@@ -124,31 +123,31 @@ entry:
124
123
define arm_aapcs_vfpcc <2 x i64 > @zext_v2i1_v2i64 (<2 x i64 > %src ) {
125
124
; CHECK-LABEL: zext_v2i1_v2i64:
126
125
; CHECK: @ %bb.0: @ %entry
127
- ; CHECK-NEXT: vmov r1, s0
128
- ; CHECK-NEXT: movs r2, #0
129
- ; CHECK-NEXT: vmov r0, s1
130
- ; CHECK-NEXT: rsbs r1, r1, #0
131
- ; CHECK-NEXT: sbcs.w r0, r2, r0
132
- ; CHECK-NEXT: vmov r1, s2
133
- ; CHECK-NEXT: mov.w r0, #0
126
+ ; CHECK-NEXT: vmov r2, s2
127
+ ; CHECK-NEXT: adr r1, .LCPI7_0
128
+ ; CHECK-NEXT: vldrw.u32 q1, [r1]
129
+ ; CHECK-NEXT: vmov r1, s3
130
+ ; CHECK-NEXT: vmov r3, s0
131
+ ; CHECK-NEXT: movs r0, #0
132
+ ; CHECK-NEXT: rsbs r2, r2, #0
133
+ ; CHECK-NEXT: vmov r2, s1
134
+ ; CHECK-NEXT: sbcs.w r1, r0, r1
135
+ ; CHECK-NEXT: mov.w r1, #0
136
+ ; CHECK-NEXT: it lt
137
+ ; CHECK-NEXT: movlt r1, #1
138
+ ; CHECK-NEXT: cmp r1, #0
139
+ ; CHECK-NEXT: it ne
140
+ ; CHECK-NEXT: movne.w r1, #-1
141
+ ; CHECK-NEXT: rsbs r3, r3, #0
142
+ ; CHECK-NEXT: sbcs.w r2, r0, r2
134
143
; CHECK-NEXT: it lt
135
144
; CHECK-NEXT: movlt r0, #1
136
145
; CHECK-NEXT: cmp r0, #0
137
146
; CHECK-NEXT: it ne
138
147
; CHECK-NEXT: movne.w r0, #-1
139
- ; CHECK-NEXT: vmov.32 q1[0], r0
140
- ; CHECK-NEXT: vmov r0, s3
141
- ; CHECK-NEXT: rsbs r1, r1, #0
142
- ; CHECK-NEXT: sbcs.w r0, r2, r0
143
- ; CHECK-NEXT: it lt
144
- ; CHECK-NEXT: movlt r2, #1
145
- ; CHECK-NEXT: adr r0, .LCPI7_0
146
- ; CHECK-NEXT: cmp r2, #0
147
- ; CHECK-NEXT: vldrw.u32 q0, [r0]
148
- ; CHECK-NEXT: it ne
149
- ; CHECK-NEXT: movne.w r2, #-1
150
- ; CHECK-NEXT: vmov.32 q1[2], r2
151
- ; CHECK-NEXT: vand q0, q1, q0
148
+ ; CHECK-NEXT: vmov.32 q0[0], r0
149
+ ; CHECK-NEXT: vmov.32 q0[2], r1
150
+ ; CHECK-NEXT: vand q0, q0, q1
152
151
; CHECK-NEXT: bx lr
153
152
; CHECK-NEXT: .p2align 4
154
153
; CHECK-NEXT: @ %bb.1:
@@ -162,3 +161,115 @@ entry:
162
161
%0 = zext <2 x i1 > %c to <2 x i64 >
163
162
ret <2 x i64 > %0
164
163
}
164
+
165
+
166
+ define arm_aapcs_vfpcc <4 x float > @uitofp_v4i1_v4f32 (<4 x i32 > %src ) {
167
+ ; CHECK-LABEL: uitofp_v4i1_v4f32:
168
+ ; CHECK: @ %bb.0: @ %entry
169
+ ; CHECK-NEXT: vcmp.s32 gt, q0, zr
170
+ ; CHECK-NEXT: vmrs r0, p0
171
+ ; CHECK-NEXT: ubfx r1, r0, #8, #1
172
+ ; CHECK-NEXT: ubfx r2, r0, #12, #1
173
+ ; CHECK-NEXT: vmov s0, r2
174
+ ; CHECK-NEXT: vmov s4, r1
175
+ ; CHECK-NEXT: vcvt.f32.u32 s3, s0
176
+ ; CHECK-NEXT: ubfx r2, r0, #4, #1
177
+ ; CHECK-NEXT: vcvt.f32.u32 s2, s4
178
+ ; CHECK-NEXT: and r0, r0, #1
179
+ ; CHECK-NEXT: vmov s4, r2
180
+ ; CHECK-NEXT: vcvt.f32.u32 s1, s4
181
+ ; CHECK-NEXT: vmov s4, r0
182
+ ; CHECK-NEXT: vcvt.f32.u32 s0, s4
183
+ ; CHECK-NEXT: bx lr
184
+ entry:
185
+ %c = icmp sgt <4 x i32 > %src , zeroinitializer
186
+ %0 = uitofp <4 x i1 > %c to <4 x float >
187
+ ret <4 x float > %0
188
+ }
189
+
190
+ define arm_aapcs_vfpcc <4 x float > @sitofp_v4i1_v4f32 (<4 x i32 > %src ) {
191
+ ; CHECK-LABEL: sitofp_v4i1_v4f32:
192
+ ; CHECK: @ %bb.0: @ %entry
193
+ ; CHECK-NEXT: vcmp.s32 gt, q0, zr
194
+ ; CHECK-NEXT: vmrs r0, p0
195
+ ; CHECK-NEXT: and r1, r0, #1
196
+ ; CHECK-NEXT: ubfx r2, r0, #8, #1
197
+ ; CHECK-NEXT: ubfx r3, r0, #4, #1
198
+ ; CHECK-NEXT: ubfx r0, r0, #12, #1
199
+ ; CHECK-NEXT: rsbs r2, r2, #0
200
+ ; CHECK-NEXT: rsbs r0, r0, #0
201
+ ; CHECK-NEXT: vmov s4, r2
202
+ ; CHECK-NEXT: vmov s0, r0
203
+ ; CHECK-NEXT: rsbs r0, r3, #0
204
+ ; CHECK-NEXT: vcvt.f32.s32 s3, s0
205
+ ; CHECK-NEXT: vcvt.f32.s32 s2, s4
206
+ ; CHECK-NEXT: vmov s4, r0
207
+ ; CHECK-NEXT: rsbs r0, r1, #0
208
+ ; CHECK-NEXT: vcvt.f32.s32 s1, s4
209
+ ; CHECK-NEXT: vmov s4, r0
210
+ ; CHECK-NEXT: vcvt.f32.s32 s0, s4
211
+ ; CHECK-NEXT: bx lr
212
+ entry:
213
+ %c = icmp sgt <4 x i32 > %src , zeroinitializer
214
+ %0 = sitofp <4 x i1 > %c to <4 x float >
215
+ ret <4 x float > %0
216
+ }
217
+
218
+ define arm_aapcs_vfpcc <4 x float > @fptoui_v4i1_v4f32 (<4 x float > %src ) {
219
+ ; CHECK-LABEL: fptoui_v4i1_v4f32:
220
+ ; CHECK: @ %bb.0: @ %entry
221
+ ; CHECK-NEXT: vcvt.s32.f32 s4, s0
222
+ ; CHECK-NEXT: movs r0, #0
223
+ ; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
224
+ ; CHECK-NEXT: vmov r1, s4
225
+ ; CHECK-NEXT: vcvt.s32.f32 s4, s1
226
+ ; CHECK-NEXT: rsbs r1, r1, #0
227
+ ; CHECK-NEXT: bfi r0, r1, #0, #4
228
+ ; CHECK-NEXT: vmov r1, s4
229
+ ; CHECK-NEXT: vcvt.s32.f32 s4, s2
230
+ ; CHECK-NEXT: vcvt.s32.f32 s0, s3
231
+ ; CHECK-NEXT: rsbs r1, r1, #0
232
+ ; CHECK-NEXT: bfi r0, r1, #4, #4
233
+ ; CHECK-NEXT: vmov r1, s4
234
+ ; CHECK-NEXT: vmov.i32 q1, #0x0
235
+ ; CHECK-NEXT: rsbs r1, r1, #0
236
+ ; CHECK-NEXT: bfi r0, r1, #8, #4
237
+ ; CHECK-NEXT: vmov r1, s0
238
+ ; CHECK-NEXT: rsbs r1, r1, #0
239
+ ; CHECK-NEXT: bfi r0, r1, #12, #4
240
+ ; CHECK-NEXT: vmsr p0, r0
241
+ ; CHECK-NEXT: vpsel q0, q2, q1
242
+ ; CHECK-NEXT: bx lr
243
+ entry:
244
+ %0 = fptoui <4 x float > %src to <4 x i1 >
245
+ %s = select <4 x i1 > %0 , <4 x float > <float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 >, <4 x float > zeroinitializer
246
+ ret <4 x float > %s
247
+ }
248
+
249
+ define arm_aapcs_vfpcc <4 x float > @fptosi_v4i1_v4f32 (<4 x float > %src ) {
250
+ ; CHECK-LABEL: fptosi_v4i1_v4f32:
251
+ ; CHECK: @ %bb.0: @ %entry
252
+ ; CHECK-NEXT: vcvt.s32.f32 s4, s0
253
+ ; CHECK-NEXT: movs r0, #0
254
+ ; CHECK-NEXT: vmov.f32 q2, #1.000000e+00
255
+ ; CHECK-NEXT: vmov r1, s4
256
+ ; CHECK-NEXT: vcvt.s32.f32 s4, s1
257
+ ; CHECK-NEXT: bfi r0, r1, #0, #4
258
+ ; CHECK-NEXT: vmov r1, s4
259
+ ; CHECK-NEXT: vcvt.s32.f32 s4, s2
260
+ ; CHECK-NEXT: bfi r0, r1, #4, #4
261
+ ; CHECK-NEXT: vcvt.s32.f32 s0, s3
262
+ ; CHECK-NEXT: vmov r1, s4
263
+ ; CHECK-NEXT: vmov.i32 q1, #0x0
264
+ ; CHECK-NEXT: bfi r0, r1, #8, #4
265
+ ; CHECK-NEXT: vmov r1, s0
266
+ ; CHECK-NEXT: bfi r0, r1, #12, #4
267
+ ; CHECK-NEXT: vmsr p0, r0
268
+ ; CHECK-NEXT: vpsel q0, q2, q1
269
+ ; CHECK-NEXT: bx lr
270
+ entry:
271
+ %0 = fptosi <4 x float > %src to <4 x i1 >
272
+ %s = select <4 x i1 > %0 , <4 x float > <float 1 .0 , float 1 .0 , float 1 .0 , float 1 .0 >, <4 x float > zeroinitializer
273
+ ret <4 x float > %s
274
+ }
275
+
0 commit comments