1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
3
-
4
- ; rdar://12471808
2
+ ; RUN: llc -mtriple=armv7-eabihf -mattr=+neon %s -o - | FileCheck %s
5
3
6
4
define <8 x i8 > @v_bsli8 (ptr %A , ptr %B , ptr %C ) nounwind {
7
5
; CHECK-LABEL: v_bsli8:
8
6
; CHECK: @ %bb.0:
9
7
; CHECK-NEXT: vldr d18, [r0]
10
8
; CHECK-NEXT: vldr d16, [r2]
9
+ ; CHECK-NEXT: vorr d0, d18, d18
11
10
; CHECK-NEXT: vldr d17, [r1]
12
- ; CHECK-NEXT: vbit d16, d17, d18
13
- ; CHECK-NEXT: vmov r0, r1, d16
14
- ; CHECK-NEXT: mov pc, lr
11
+ ; CHECK-NEXT: vbsl d0, d17, d16
12
+ ; CHECK-NEXT: bx lr
15
13
%tmp1 = load <8 x i8 >, ptr %A
16
14
%tmp2 = load <8 x i8 >, ptr %B
17
15
%tmp3 = load <8 x i8 >, ptr %C
@@ -27,10 +25,10 @@ define <4 x i16> @v_bsli16(ptr %A, ptr %B, ptr %C) nounwind {
27
25
; CHECK: @ %bb.0:
28
26
; CHECK-NEXT: vldr d18, [r0]
29
27
; CHECK-NEXT: vldr d16, [r2]
28
+ ; CHECK-NEXT: vorr d0, d18, d18
30
29
; CHECK-NEXT: vldr d17, [r1]
31
- ; CHECK-NEXT: vbit d16, d17, d18
32
- ; CHECK-NEXT: vmov r0, r1, d16
33
- ; CHECK-NEXT: mov pc, lr
30
+ ; CHECK-NEXT: vbsl d0, d17, d16
31
+ ; CHECK-NEXT: bx lr
34
32
%tmp1 = load <4 x i16 >, ptr %A
35
33
%tmp2 = load <4 x i16 >, ptr %B
36
34
%tmp3 = load <4 x i16 >, ptr %C
@@ -46,10 +44,10 @@ define <2 x i32> @v_bsli32(ptr %A, ptr %B, ptr %C) nounwind {
46
44
; CHECK: @ %bb.0:
47
45
; CHECK-NEXT: vldr d18, [r0]
48
46
; CHECK-NEXT: vldr d16, [r2]
47
+ ; CHECK-NEXT: vorr d0, d18, d18
49
48
; CHECK-NEXT: vldr d17, [r1]
50
- ; CHECK-NEXT: vbit d16, d17, d18
51
- ; CHECK-NEXT: vmov r0, r1, d16
52
- ; CHECK-NEXT: mov pc, lr
49
+ ; CHECK-NEXT: vbsl d0, d17, d16
50
+ ; CHECK-NEXT: bx lr
53
51
%tmp1 = load <2 x i32 >, ptr %A
54
52
%tmp2 = load <2 x i32 >, ptr %B
55
53
%tmp3 = load <2 x i32 >, ptr %C
@@ -65,10 +63,10 @@ define <1 x i64> @v_bsli64(ptr %A, ptr %B, ptr %C) nounwind {
65
63
; CHECK: @ %bb.0:
66
64
; CHECK-NEXT: vldr d18, [r0]
67
65
; CHECK-NEXT: vldr d16, [r2]
66
+ ; CHECK-NEXT: vorr d0, d18, d18
68
67
; CHECK-NEXT: vldr d17, [r1]
69
- ; CHECK-NEXT: vbit d16, d17, d18
70
- ; CHECK-NEXT: vmov r0, r1, d16
71
- ; CHECK-NEXT: mov pc, lr
68
+ ; CHECK-NEXT: vbsl d0, d17, d16
69
+ ; CHECK-NEXT: bx lr
72
70
%tmp1 = load <1 x i64 >, ptr %A
73
71
%tmp2 = load <1 x i64 >, ptr %B
74
72
%tmp3 = load <1 x i64 >, ptr %C
@@ -83,12 +81,11 @@ define <16 x i8> @v_bslQi8(ptr %A, ptr %B, ptr %C) nounwind {
83
81
; CHECK-LABEL: v_bslQi8:
84
82
; CHECK: @ %bb.0:
85
83
; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
84
+ ; CHECK-NEXT: vorr q0, q10, q10
86
85
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
87
86
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
88
- ; CHECK-NEXT: vbit q8, q9, q10
89
- ; CHECK-NEXT: vmov r0, r1, d16
90
- ; CHECK-NEXT: vmov r2, r3, d17
91
- ; CHECK-NEXT: mov pc, lr
87
+ ; CHECK-NEXT: vbsl q0, q9, q8
88
+ ; CHECK-NEXT: bx lr
92
89
%tmp1 = load <16 x i8 >, ptr %A
93
90
%tmp2 = load <16 x i8 >, ptr %B
94
91
%tmp3 = load <16 x i8 >, ptr %C
@@ -103,12 +100,11 @@ define <8 x i16> @v_bslQi16(ptr %A, ptr %B, ptr %C) nounwind {
103
100
; CHECK-LABEL: v_bslQi16:
104
101
; CHECK: @ %bb.0:
105
102
; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
103
+ ; CHECK-NEXT: vorr q0, q10, q10
106
104
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
107
105
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
108
- ; CHECK-NEXT: vbit q8, q9, q10
109
- ; CHECK-NEXT: vmov r0, r1, d16
110
- ; CHECK-NEXT: vmov r2, r3, d17
111
- ; CHECK-NEXT: mov pc, lr
106
+ ; CHECK-NEXT: vbsl q0, q9, q8
107
+ ; CHECK-NEXT: bx lr
112
108
%tmp1 = load <8 x i16 >, ptr %A
113
109
%tmp2 = load <8 x i16 >, ptr %B
114
110
%tmp3 = load <8 x i16 >, ptr %C
@@ -123,12 +119,11 @@ define <4 x i32> @v_bslQi32(ptr %A, ptr %B, ptr %C) nounwind {
123
119
; CHECK-LABEL: v_bslQi32:
124
120
; CHECK: @ %bb.0:
125
121
; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
122
+ ; CHECK-NEXT: vorr q0, q10, q10
126
123
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
127
124
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
128
- ; CHECK-NEXT: vbit q8, q9, q10
129
- ; CHECK-NEXT: vmov r0, r1, d16
130
- ; CHECK-NEXT: vmov r2, r3, d17
131
- ; CHECK-NEXT: mov pc, lr
125
+ ; CHECK-NEXT: vbsl q0, q9, q8
126
+ ; CHECK-NEXT: bx lr
132
127
%tmp1 = load <4 x i32 >, ptr %A
133
128
%tmp2 = load <4 x i32 >, ptr %B
134
129
%tmp3 = load <4 x i32 >, ptr %C
@@ -143,12 +138,11 @@ define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind {
143
138
; CHECK-LABEL: v_bslQi64:
144
139
; CHECK: @ %bb.0:
145
140
; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
141
+ ; CHECK-NEXT: vorr q0, q10, q10
146
142
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
147
143
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
148
- ; CHECK-NEXT: vbit q8, q9, q10
149
- ; CHECK-NEXT: vmov r0, r1, d16
150
- ; CHECK-NEXT: vmov r2, r3, d17
151
- ; CHECK-NEXT: mov pc, lr
144
+ ; CHECK-NEXT: vbsl q0, q9, q8
145
+ ; CHECK-NEXT: bx lr
152
146
%tmp1 = load <2 x i64 >, ptr %A
153
147
%tmp2 = load <2 x i64 >, ptr %B
154
148
%tmp3 = load <2 x i64 >, ptr %C
@@ -162,179 +156,107 @@ define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind {
162
156
define <8 x i8 > @f1 (<8 x i8 > %a , <8 x i8 > %b , <8 x i8 > %c ) nounwind readnone optsize ssp {
163
157
; CHECK-LABEL: f1:
164
158
; CHECK: @ %bb.0:
165
- ; CHECK-NEXT: vldr d16, [sp]
166
- ; CHECK-NEXT: vmov d17, r2, r3
167
- ; CHECK-NEXT: vmov d18, r0, r1
168
- ; CHECK-NEXT: vbit d16, d17, d18
169
- ; CHECK-NEXT: vmov r0, r1, d16
170
- ; CHECK-NEXT: mov pc, lr
159
+ ; CHECK-NEXT: vbsl d0, d1, d2
160
+ ; CHECK-NEXT: bx lr
171
161
%vbsl.i = tail call <8 x i8 > @llvm.arm.neon.vbsl.v8i8 (<8 x i8 > %a , <8 x i8 > %b , <8 x i8 > %c ) nounwind
172
162
ret <8 x i8 > %vbsl.i
173
163
}
174
164
175
165
define <4 x i16 > @f2 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c ) nounwind readnone optsize ssp {
176
166
; CHECK-LABEL: f2:
177
167
; CHECK: @ %bb.0:
178
- ; CHECK-NEXT: vldr d16, [sp]
179
- ; CHECK-NEXT: vmov d17, r2, r3
180
- ; CHECK-NEXT: vmov d18, r0, r1
181
- ; CHECK-NEXT: vbit d16, d17, d18
182
- ; CHECK-NEXT: vmov r0, r1, d16
183
- ; CHECK-NEXT: mov pc, lr
168
+ ; CHECK-NEXT: vbsl d0, d1, d2
169
+ ; CHECK-NEXT: bx lr
184
170
%vbsl3.i = tail call <4 x i16 > @llvm.arm.neon.vbsl.v4i16 (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c ) nounwind
185
171
ret <4 x i16 > %vbsl3.i
186
172
}
187
173
188
174
define <2 x i32 > @f3 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %c ) nounwind readnone optsize ssp {
189
175
; CHECK-LABEL: f3:
190
176
; CHECK: @ %bb.0:
191
- ; CHECK-NEXT: vldr d16, [sp]
192
- ; CHECK-NEXT: vmov d17, r2, r3
193
- ; CHECK-NEXT: vmov d18, r0, r1
194
- ; CHECK-NEXT: vbit d16, d17, d18
195
- ; CHECK-NEXT: vmov r0, r1, d16
196
- ; CHECK-NEXT: mov pc, lr
177
+ ; CHECK-NEXT: vbsl d0, d1, d2
178
+ ; CHECK-NEXT: bx lr
197
179
%vbsl3.i = tail call <2 x i32 > @llvm.arm.neon.vbsl.v2i32 (<2 x i32 > %a , <2 x i32 > %b , <2 x i32 > %c ) nounwind
198
180
ret <2 x i32 > %vbsl3.i
199
181
}
200
182
201
183
define <2 x float > @f4 (<2 x float > %a , <2 x float > %b , <2 x float > %c ) nounwind readnone optsize ssp {
202
184
; CHECK-LABEL: f4:
203
185
; CHECK: @ %bb.0:
204
- ; CHECK-NEXT: vldr d16, [sp]
205
- ; CHECK-NEXT: vmov d17, r2, r3
206
- ; CHECK-NEXT: vmov d18, r0, r1
207
- ; CHECK-NEXT: vbit d16, d17, d18
208
- ; CHECK-NEXT: vmov r0, r1, d16
209
- ; CHECK-NEXT: mov pc, lr
186
+ ; CHECK-NEXT: vbsl d0, d1, d2
187
+ ; CHECK-NEXT: bx lr
210
188
%vbsl4.i = tail call <2 x float > @llvm.arm.neon.vbsl.v2f32 (<2 x float > %a , <2 x float > %b , <2 x float > %c ) nounwind
211
189
ret <2 x float > %vbsl4.i
212
190
}
213
191
214
192
define <16 x i8 > @g1 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c ) nounwind readnone optsize ssp {
215
193
; CHECK-LABEL: g1:
216
194
; CHECK: @ %bb.0:
217
- ; CHECK-NEXT: vmov d19, r2, r3
218
- ; CHECK-NEXT: add r12, sp, #16
219
- ; CHECK-NEXT: vmov d18, r0, r1
220
- ; CHECK-NEXT: mov r0, sp
221
- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
222
- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
223
- ; CHECK-NEXT: vbit q8, q10, q9
224
- ; CHECK-NEXT: vmov r0, r1, d16
225
- ; CHECK-NEXT: vmov r2, r3, d17
226
- ; CHECK-NEXT: mov pc, lr
195
+ ; CHECK-NEXT: vbsl q0, q1, q2
196
+ ; CHECK-NEXT: bx lr
227
197
%vbsl.i = tail call <16 x i8 > @llvm.arm.neon.vbsl.v16i8 (<16 x i8 > %a , <16 x i8 > %b , <16 x i8 > %c ) nounwind
228
198
ret <16 x i8 > %vbsl.i
229
199
}
230
200
231
201
define <8 x i16 > @g2 (<8 x i16 > %a , <8 x i16 > %b , <8 x i16 > %c ) nounwind readnone optsize ssp {
232
202
; CHECK-LABEL: g2:
233
203
; CHECK: @ %bb.0:
234
- ; CHECK-NEXT: vmov d19, r2, r3
235
- ; CHECK-NEXT: add r12, sp, #16
236
- ; CHECK-NEXT: vmov d18, r0, r1
237
- ; CHECK-NEXT: mov r0, sp
238
- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
239
- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
240
- ; CHECK-NEXT: vbit q8, q10, q9
241
- ; CHECK-NEXT: vmov r0, r1, d16
242
- ; CHECK-NEXT: vmov r2, r3, d17
243
- ; CHECK-NEXT: mov pc, lr
204
+ ; CHECK-NEXT: vbsl q0, q1, q2
205
+ ; CHECK-NEXT: bx lr
244
206
%vbsl3.i = tail call <8 x i16 > @llvm.arm.neon.vbsl.v8i16 (<8 x i16 > %a , <8 x i16 > %b , <8 x i16 > %c ) nounwind
245
207
ret <8 x i16 > %vbsl3.i
246
208
}
247
209
248
210
define <4 x i32 > @g3 (<4 x i32 > %a , <4 x i32 > %b , <4 x i32 > %c ) nounwind readnone optsize ssp {
249
211
; CHECK-LABEL: g3:
250
212
; CHECK: @ %bb.0:
251
- ; CHECK-NEXT: vmov d19, r2, r3
252
- ; CHECK-NEXT: add r12, sp, #16
253
- ; CHECK-NEXT: vmov d18, r0, r1
254
- ; CHECK-NEXT: mov r0, sp
255
- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
256
- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
257
- ; CHECK-NEXT: vbit q8, q10, q9
258
- ; CHECK-NEXT: vmov r0, r1, d16
259
- ; CHECK-NEXT: vmov r2, r3, d17
260
- ; CHECK-NEXT: mov pc, lr
213
+ ; CHECK-NEXT: vbsl q0, q1, q2
214
+ ; CHECK-NEXT: bx lr
261
215
%vbsl3.i = tail call <4 x i32 > @llvm.arm.neon.vbsl.v4i32 (<4 x i32 > %a , <4 x i32 > %b , <4 x i32 > %c ) nounwind
262
216
ret <4 x i32 > %vbsl3.i
263
217
}
264
218
265
219
define <4 x float > @g4 (<4 x float > %a , <4 x float > %b , <4 x float > %c ) nounwind readnone optsize ssp {
266
220
; CHECK-LABEL: g4:
267
221
; CHECK: @ %bb.0:
268
- ; CHECK-NEXT: vmov d19, r2, r3
269
- ; CHECK-NEXT: add r12, sp, #16
270
- ; CHECK-NEXT: vmov d18, r0, r1
271
- ; CHECK-NEXT: mov r0, sp
272
- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
273
- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
274
- ; CHECK-NEXT: vbit q8, q10, q9
275
- ; CHECK-NEXT: vmov r0, r1, d16
276
- ; CHECK-NEXT: vmov r2, r3, d17
277
- ; CHECK-NEXT: mov pc, lr
222
+ ; CHECK-NEXT: vbsl q0, q1, q2
223
+ ; CHECK-NEXT: bx lr
278
224
%vbsl4.i = tail call <4 x float > @llvm.arm.neon.vbsl.v4f32 (<4 x float > %a , <4 x float > %b , <4 x float > %c ) nounwind
279
225
ret <4 x float > %vbsl4.i
280
226
}
281
227
282
228
define <1 x i64 > @test_vbsl_s64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind readnone optsize ssp {
283
229
; CHECK-LABEL: test_vbsl_s64:
284
230
; CHECK: @ %bb.0:
285
- ; CHECK-NEXT: vldr d16, [sp]
286
- ; CHECK-NEXT: vmov d17, r2, r3
287
- ; CHECK-NEXT: vmov d18, r0, r1
288
- ; CHECK-NEXT: vbit d16, d17, d18
289
- ; CHECK-NEXT: vmov r0, r1, d16
290
- ; CHECK-NEXT: mov pc, lr
231
+ ; CHECK-NEXT: vbsl d0, d1, d2
232
+ ; CHECK-NEXT: bx lr
291
233
%vbsl3.i = tail call <1 x i64 > @llvm.arm.neon.vbsl.v1i64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind
292
234
ret <1 x i64 > %vbsl3.i
293
235
}
294
236
295
237
define <1 x i64 > @test_vbsl_u64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind readnone optsize ssp {
296
238
; CHECK-LABEL: test_vbsl_u64:
297
239
; CHECK: @ %bb.0:
298
- ; CHECK-NEXT: vldr d16, [sp]
299
- ; CHECK-NEXT: vmov d17, r2, r3
300
- ; CHECK-NEXT: vmov d18, r0, r1
301
- ; CHECK-NEXT: vbit d16, d17, d18
302
- ; CHECK-NEXT: vmov r0, r1, d16
303
- ; CHECK-NEXT: mov pc, lr
240
+ ; CHECK-NEXT: vbsl d0, d1, d2
241
+ ; CHECK-NEXT: bx lr
304
242
%vbsl3.i = tail call <1 x i64 > @llvm.arm.neon.vbsl.v1i64 (<1 x i64 > %a , <1 x i64 > %b , <1 x i64 > %c ) nounwind
305
243
ret <1 x i64 > %vbsl3.i
306
244
}
307
245
308
246
define <2 x i64 > @test_vbslq_s64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind readnone optsize ssp {
309
247
; CHECK-LABEL: test_vbslq_s64:
310
248
; CHECK: @ %bb.0:
311
- ; CHECK-NEXT: vmov d19, r2, r3
312
- ; CHECK-NEXT: add r12, sp, #16
313
- ; CHECK-NEXT: vmov d18, r0, r1
314
- ; CHECK-NEXT: mov r0, sp
315
- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
316
- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
317
- ; CHECK-NEXT: vbit q8, q10, q9
318
- ; CHECK-NEXT: vmov r0, r1, d16
319
- ; CHECK-NEXT: vmov r2, r3, d17
320
- ; CHECK-NEXT: mov pc, lr
249
+ ; CHECK-NEXT: vbsl q0, q1, q2
250
+ ; CHECK-NEXT: bx lr
321
251
%vbsl3.i = tail call <2 x i64 > @llvm.arm.neon.vbsl.v2i64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind
322
252
ret <2 x i64 > %vbsl3.i
323
253
}
324
254
325
255
define <2 x i64 > @test_vbslq_u64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind readnone optsize ssp {
326
256
; CHECK-LABEL: test_vbslq_u64:
327
257
; CHECK: @ %bb.0:
328
- ; CHECK-NEXT: vmov d19, r2, r3
329
- ; CHECK-NEXT: add r12, sp, #16
330
- ; CHECK-NEXT: vmov d18, r0, r1
331
- ; CHECK-NEXT: mov r0, sp
332
- ; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
333
- ; CHECK-NEXT: vld1.64 {d20, d21}, [r0]
334
- ; CHECK-NEXT: vbit q8, q10, q9
335
- ; CHECK-NEXT: vmov r0, r1, d16
336
- ; CHECK-NEXT: vmov r2, r3, d17
337
- ; CHECK-NEXT: mov pc, lr
258
+ ; CHECK-NEXT: vbsl q0, q1, q2
259
+ ; CHECK-NEXT: bx lr
338
260
%vbsl3.i = tail call <2 x i64 > @llvm.arm.neon.vbsl.v2i64 (<2 x i64 > %a , <2 x i64 > %b , <2 x i64 > %c ) nounwind
339
261
ret <2 x i64 > %vbsl3.i
340
262
}
0 commit comments