1
- ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
+ ; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fullfp16 %s -o - | FileCheck %s
2
3
3
4
define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
4
- ;CHECK-LABEL: test_vrev64D8:
5
- ;CHECK: vrev64.8
5
+ ; CHECK-LABEL: test_vrev64D8:
6
+ ; CHECK: @ %bb.0:
7
+ ; CHECK-NEXT: vldr d16, [r0]
8
+ ; CHECK-NEXT: vrev64.8 d16, d16
9
+ ; CHECK-NEXT: vmov r0, r1, d16
10
+ ; CHECK-NEXT: mov pc, lr
6
11
%tmp1 = load <8 x i8>, <8 x i8>* %A
7
12
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
8
13
ret <8 x i8> %tmp2
9
14
}
10
15
11
16
define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
12
- ;CHECK-LABEL: test_vrev64D16:
13
- ;CHECK: vrev64.16
17
+ ; CHECK-LABEL: test_vrev64D16:
18
+ ; CHECK: @ %bb.0:
19
+ ; CHECK-NEXT: vldr d16, [r0]
20
+ ; CHECK-NEXT: vrev64.16 d16, d16
21
+ ; CHECK-NEXT: vmov r0, r1, d16
22
+ ; CHECK-NEXT: mov pc, lr
14
23
%tmp1 = load <4 x i16>, <4 x i16>* %A
15
24
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
16
25
ret <4 x i16> %tmp2
17
26
}
18
27
28
+ define <4 x half> @test_vrev64Df16(<4 x half>* %A) nounwind {
29
+ ; CHECK-LABEL: test_vrev64Df16:
30
+ ; CHECK: @ %bb.0:
31
+ ; CHECK-NEXT: vldr d16, [r0]
32
+ ; CHECK-NEXT: vrev64.16 d16, d16
33
+ ; CHECK-NEXT: vmov r0, r1, d16
34
+ ; CHECK-NEXT: mov pc, lr
35
+ %tmp1 = load <4 x half>, <4 x half>* %A
36
+ %tmp2 = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
37
+ ret <4 x half> %tmp2
38
+ }
39
+
19
40
define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
20
- ;CHECK-LABEL: test_vrev64D32:
21
- ;CHECK: vrev64.32
41
+ ; CHECK-LABEL: test_vrev64D32:
42
+ ; CHECK: @ %bb.0:
43
+ ; CHECK-NEXT: vldr d16, [r0]
44
+ ; CHECK-NEXT: vrev64.32 d16, d16
45
+ ; CHECK-NEXT: vmov r0, r1, d16
46
+ ; CHECK-NEXT: mov pc, lr
22
47
%tmp1 = load <2 x i32>, <2 x i32>* %A
23
48
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
24
49
ret <2 x i32> %tmp2
25
50
}
26
51
27
52
define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
28
- ;CHECK-LABEL: test_vrev64Df:
29
- ;CHECK: vrev64.32
53
+ ; CHECK-LABEL: test_vrev64Df:
54
+ ; CHECK: @ %bb.0:
55
+ ; CHECK-NEXT: vldr d16, [r0]
56
+ ; CHECK-NEXT: vrev64.32 d16, d16
57
+ ; CHECK-NEXT: vmov r0, r1, d16
58
+ ; CHECK-NEXT: mov pc, lr
30
59
%tmp1 = load <2 x float>, <2 x float>* %A
31
60
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
32
61
ret <2 x float> %tmp2
33
62
}
34
63
35
64
define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
36
- ;CHECK-LABEL: test_vrev64Q8:
37
- ;CHECK: vrev64.8
65
+ ; CHECK-LABEL: test_vrev64Q8:
66
+ ; CHECK: @ %bb.0:
67
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
68
+ ; CHECK-NEXT: vrev64.8 q8, q8
69
+ ; CHECK-NEXT: vmov r0, r1, d16
70
+ ; CHECK-NEXT: vmov r2, r3, d17
71
+ ; CHECK-NEXT: mov pc, lr
38
72
%tmp1 = load <16 x i8>, <16 x i8>* %A
39
73
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
40
74
ret <16 x i8> %tmp2
41
75
}
42
76
43
77
define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
44
- ;CHECK-LABEL: test_vrev64Q16:
45
- ;CHECK: vrev64.16
78
+ ; CHECK-LABEL: test_vrev64Q16:
79
+ ; CHECK: @ %bb.0:
80
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
81
+ ; CHECK-NEXT: vrev64.16 q8, q8
82
+ ; CHECK-NEXT: vmov r0, r1, d16
83
+ ; CHECK-NEXT: vmov r2, r3, d17
84
+ ; CHECK-NEXT: mov pc, lr
46
85
%tmp1 = load <8 x i16>, <8 x i16>* %A
47
86
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
48
87
ret <8 x i16> %tmp2
49
88
}
50
89
90
+ define <8 x half> @test_vrev64Qf16(<8 x half>* %A) nounwind {
91
+ ; CHECK-LABEL: test_vrev64Qf16:
92
+ ; CHECK: @ %bb.0:
93
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
94
+ ; CHECK-NEXT: vrev64.16 q8, q8
95
+ ; CHECK-NEXT: vmov r0, r1, d16
96
+ ; CHECK-NEXT: vmov r2, r3, d17
97
+ ; CHECK-NEXT: mov pc, lr
98
+ %tmp1 = load <8 x half>, <8 x half>* %A
99
+ %tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
100
+ ret <8 x half> %tmp2
101
+ }
102
+
51
103
define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
52
- ;CHECK-LABEL: test_vrev64Q32:
53
- ;CHECK: vrev64.32
104
+ ; CHECK-LABEL: test_vrev64Q32:
105
+ ; CHECK: @ %bb.0:
106
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
107
+ ; CHECK-NEXT: vrev64.32 q8, q8
108
+ ; CHECK-NEXT: vmov r0, r1, d16
109
+ ; CHECK-NEXT: vmov r2, r3, d17
110
+ ; CHECK-NEXT: mov pc, lr
54
111
%tmp1 = load <4 x i32>, <4 x i32>* %A
55
112
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
56
113
ret <4 x i32> %tmp2
57
114
}
58
115
59
116
define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
60
- ;CHECK-LABEL: test_vrev64Qf:
61
- ;CHECK: vrev64.32
117
+ ; CHECK-LABEL: test_vrev64Qf:
118
+ ; CHECK: @ %bb.0:
119
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
120
+ ; CHECK-NEXT: vrev64.32 q8, q8
121
+ ; CHECK-NEXT: vmov r0, r1, d16
122
+ ; CHECK-NEXT: vmov r2, r3, d17
123
+ ; CHECK-NEXT: mov pc, lr
62
124
%tmp1 = load <4 x float>, <4 x float>* %A
63
125
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
64
126
ret <4 x float> %tmp2
65
127
}
66
128
67
129
define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
68
- ;CHECK-LABEL: test_vrev32D8:
69
- ;CHECK: vrev32.8
130
+ ; CHECK-LABEL: test_vrev32D8:
131
+ ; CHECK: @ %bb.0:
132
+ ; CHECK-NEXT: vldr d16, [r0]
133
+ ; CHECK-NEXT: vrev32.8 d16, d16
134
+ ; CHECK-NEXT: vmov r0, r1, d16
135
+ ; CHECK-NEXT: mov pc, lr
70
136
%tmp1 = load <8 x i8>, <8 x i8>* %A
71
137
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
72
138
ret <8 x i8> %tmp2
73
139
}
74
140
75
141
define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
76
- ;CHECK-LABEL: test_vrev32D16:
77
- ;CHECK: vrev32.16
142
+ ; CHECK-LABEL: test_vrev32D16:
143
+ ; CHECK: @ %bb.0:
144
+ ; CHECK-NEXT: vldr d16, [r0]
145
+ ; CHECK-NEXT: vrev32.16 d16, d16
146
+ ; CHECK-NEXT: vmov r0, r1, d16
147
+ ; CHECK-NEXT: mov pc, lr
78
148
%tmp1 = load <4 x i16>, <4 x i16>* %A
79
149
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
80
150
ret <4 x i16> %tmp2
81
151
}
82
152
153
+ define <4 x half> @test_vrev32Df16(<4 x half>* %A) nounwind {
154
+ ; CHECK-LABEL: test_vrev32Df16:
155
+ ; CHECK: @ %bb.0:
156
+ ; CHECK-NEXT: vldr d16, [r0]
157
+ ; CHECK-NEXT: vrev32.16 d16, d16
158
+ ; CHECK-NEXT: vmov r0, r1, d16
159
+ ; CHECK-NEXT: mov pc, lr
160
+ %tmp1 = load <4 x half>, <4 x half>* %A
161
+ %tmp2 = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
162
+ ret <4 x half> %tmp2
163
+ }
164
+
83
165
define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
84
- ;CHECK-LABEL: test_vrev32Q8:
85
- ;CHECK: vrev32.8
166
+ ; CHECK-LABEL: test_vrev32Q8:
167
+ ; CHECK: @ %bb.0:
168
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
169
+ ; CHECK-NEXT: vrev32.8 q8, q8
170
+ ; CHECK-NEXT: vmov r0, r1, d16
171
+ ; CHECK-NEXT: vmov r2, r3, d17
172
+ ; CHECK-NEXT: mov pc, lr
86
173
%tmp1 = load <16 x i8>, <16 x i8>* %A
87
174
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
88
175
ret <16 x i8> %tmp2
89
176
}
90
177
91
178
define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
92
- ;CHECK-LABEL: test_vrev32Q16:
93
- ;CHECK: vrev32.16
179
+ ; CHECK-LABEL: test_vrev32Q16:
180
+ ; CHECK: @ %bb.0:
181
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
182
+ ; CHECK-NEXT: vrev32.16 q8, q8
183
+ ; CHECK-NEXT: vmov r0, r1, d16
184
+ ; CHECK-NEXT: vmov r2, r3, d17
185
+ ; CHECK-NEXT: mov pc, lr
94
186
%tmp1 = load <8 x i16>, <8 x i16>* %A
95
187
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
96
188
ret <8 x i16> %tmp2
97
189
}
98
190
191
+ define <8 x half> @test_vrev32Qf16(<8 x half>* %A) nounwind {
192
+ ; CHECK-LABEL: test_vrev32Qf16:
193
+ ; CHECK: @ %bb.0:
194
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
195
+ ; CHECK-NEXT: vrev32.16 q8, q8
196
+ ; CHECK-NEXT: vmov r0, r1, d16
197
+ ; CHECK-NEXT: vmov r2, r3, d17
198
+ ; CHECK-NEXT: mov pc, lr
199
+ %tmp1 = load <8 x half>, <8 x half>* %A
200
+ %tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
201
+ ret <8 x half> %tmp2
202
+ }
203
+
99
204
define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
100
- ;CHECK-LABEL: test_vrev16D8:
101
- ;CHECK: vrev16.8
205
+ ; CHECK-LABEL: test_vrev16D8:
206
+ ; CHECK: @ %bb.0:
207
+ ; CHECK-NEXT: vldr d16, [r0]
208
+ ; CHECK-NEXT: vrev16.8 d16, d16
209
+ ; CHECK-NEXT: vmov r0, r1, d16
210
+ ; CHECK-NEXT: mov pc, lr
102
211
%tmp1 = load <8 x i8>, <8 x i8>* %A
103
212
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
104
213
ret <8 x i8> %tmp2
105
214
}
106
215
107
216
define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
108
- ;CHECK-LABEL: test_vrev16Q8:
109
- ;CHECK: vrev16.8
217
+ ; CHECK-LABEL: test_vrev16Q8:
218
+ ; CHECK: @ %bb.0:
219
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
220
+ ; CHECK-NEXT: vrev16.8 q8, q8
221
+ ; CHECK-NEXT: vmov r0, r1, d16
222
+ ; CHECK-NEXT: vmov r2, r3, d17
223
+ ; CHECK-NEXT: mov pc, lr
110
224
%tmp1 = load <16 x i8>, <16 x i8>* %A
111
225
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
112
226
ret <16 x i8> %tmp2
@@ -115,27 +229,54 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
115
229
; Undef shuffle indices should not prevent matching to VREV:
116
230
117
231
define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
118
- ;CHECK-LABEL: test_vrev64D8_undef:
119
- ;CHECK: vrev64.8
232
+ ; CHECK-LABEL: test_vrev64D8_undef:
233
+ ; CHECK: @ %bb.0:
234
+ ; CHECK-NEXT: vldr d16, [r0]
235
+ ; CHECK-NEXT: vrev64.8 d16, d16
236
+ ; CHECK-NEXT: vmov r0, r1, d16
237
+ ; CHECK-NEXT: mov pc, lr
120
238
%tmp1 = load <8 x i8>, <8 x i8>* %A
121
239
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
122
240
ret <8 x i8> %tmp2
123
241
}
124
242
125
243
define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
126
- ;CHECK-LABEL: test_vrev32Q16_undef:
127
- ;CHECK: vrev32.16
244
+ ; CHECK-LABEL: test_vrev32Q16_undef:
245
+ ; CHECK: @ %bb.0:
246
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
247
+ ; CHECK-NEXT: vrev32.16 q8, q8
248
+ ; CHECK-NEXT: vmov r0, r1, d16
249
+ ; CHECK-NEXT: vmov r2, r3, d17
250
+ ; CHECK-NEXT: mov pc, lr
128
251
%tmp1 = load <8 x i16>, <8 x i16>* %A
129
252
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
130
253
ret <8 x i16> %tmp2
131
254
}
132
255
256
+ define <8 x half> @test_vrev32Qf16_undef(<8 x half>* %A) nounwind {
257
+ ; CHECK-LABEL: test_vrev32Qf16_undef:
258
+ ; CHECK: @ %bb.0:
259
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
260
+ ; CHECK-NEXT: vrev32.16 q8, q8
261
+ ; CHECK-NEXT: vmov r0, r1, d16
262
+ ; CHECK-NEXT: vmov r2, r3, d17
263
+ ; CHECK-NEXT: mov pc, lr
264
+ %tmp1 = load <8 x half>, <8 x half>* %A
265
+ %tmp2 = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
266
+ ret <8 x half> %tmp2
267
+ }
268
+
133
269
; A vcombine feeding a VREV should not obscure things. Radar 8597007.
134
270
135
271
define void @test_with_vcombine(<4 x float>* %v) nounwind {
136
- ;CHECK-LABEL: test_with_vcombine:
137
- ;CHECK-NOT: vext
138
- ;CHECK: vrev64.32
272
+ ; CHECK-LABEL: test_with_vcombine:
273
+ ; CHECK: @ %bb.0:
274
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r0:128]
275
+ ; CHECK-NEXT: vadd.f32 d18, d17, d17
276
+ ; CHECK-NEXT: vrev64.32 d16, d16
277
+ ; CHECK-NEXT: vrev64.32 d17, d18
278
+ ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
279
+ ; CHECK-NEXT: mov pc, lr
139
280
%tmp1 = load <4 x float>, <4 x float>* %v, align 16
140
281
%tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
141
282
%tmp3 = extractelement <2 x double> %tmp2, i32 0
@@ -152,7 +293,15 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
152
293
; to <2 x i16> when stored to memory.
153
294
define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
154
295
; CHECK-LABEL: test_vrev64:
155
- ; CHECK: vst1.32
296
+ ; CHECK: @ %bb.0: @ %entry
297
+ ; CHECK-NEXT: vld1.32 {d16, d17}, [r0]
298
+ ; CHECK-NEXT: vmov.u16 r0, d17[2]
299
+ ; CHECK-NEXT: vmov.u16 r2, d17[1]
300
+ ; CHECK-NEXT: vmov.32 d16[0], r0
301
+ ; CHECK-NEXT: vmov.32 d16[1], r2
302
+ ; CHECK-NEXT: vuzp.16 d16, d17
303
+ ; CHECK-NEXT: vst1.32 {d16[0]}, [r1:32]
304
+ ; CHECK-NEXT: mov pc, lr
156
305
entry:
157
306
%0 = bitcast <4 x i16>* %source to <8 x i16>*
158
307
%tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
@@ -166,9 +315,15 @@ entry:
166
315
167
316
; Test vrev of float4
168
317
define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
169
- ; CHECK: float_vrev64
170
- ; CHECK: vext.32
171
- ; CHECK: vrev64.32
318
+ ; CHECK-LABEL: float_vrev64:
319
+ ; CHECK: @ %bb.0: @ %entry
320
+ ; CHECK-NEXT: vmov.i32 q8, #0x0
321
+ ; CHECK-NEXT: vld1.32 {d18, d19}, [r0]
322
+ ; CHECK-NEXT: add r0, r1, #176
323
+ ; CHECK-NEXT: vext.32 q8, q9, q8, #3
324
+ ; CHECK-NEXT: vrev64.32 q8, q8
325
+ ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]
326
+ ; CHECK-NEXT: mov pc, lr
172
327
entry:
173
328
%0 = bitcast float* %source to <4 x float>*
174
329
%tmp2 = load <4 x float>, <4 x float>* %0, align 4
@@ -180,7 +335,13 @@ entry:
180
335
181
336
define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
182
337
; CHECK-LABEL: test_vrev32_bswap:
183
- ; CHECK: vrev32.8
338
+ ; CHECK: @ %bb.0:
339
+ ; CHECK-NEXT: vmov d17, r2, r3
340
+ ; CHECK-NEXT: vmov d16, r0, r1
341
+ ; CHECK-NEXT: vrev32.8 q8, q8
342
+ ; CHECK-NEXT: vmov r0, r1, d16
343
+ ; CHECK-NEXT: vmov r2, r3, d17
344
+ ; CHECK-NEXT: mov pc, lr
184
345
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
185
346
ret <4 x i32> %bswap
186
347
}
0 commit comments