@@ -15,20 +15,8 @@ define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
15
15
define <2 x i64 > @extract_v2i64_nxv2i64_idx2 (<vscale x 2 x i64 > %vec ) nounwind {
16
16
; CHECK-LABEL: extract_v2i64_nxv2i64_idx2:
17
17
; CHECK: // %bb.0:
18
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
19
- ; CHECK-NEXT: addvl sp, sp, #-1
20
- ; CHECK-NEXT: cntd x8
21
- ; CHECK-NEXT: mov w9, #2 // =0x2
22
- ; CHECK-NEXT: ptrue p0.d
23
- ; CHECK-NEXT: sub x8, x8, #2
24
- ; CHECK-NEXT: cmp x8, #2
25
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
26
- ; CHECK-NEXT: csel x8, x8, x9, lo
27
- ; CHECK-NEXT: mov x9, sp
28
- ; CHECK-NEXT: lsl x8, x8, #3
29
- ; CHECK-NEXT: ldr q0, [x9, x8]
30
- ; CHECK-NEXT: addvl sp, sp, #1
31
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
18
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
19
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
32
20
; CHECK-NEXT: ret
33
21
%retval = call <2 x i64 > @llvm.vector.extract.v2i64.nxv2i64 (<vscale x 2 x i64 > %vec , i64 2 )
34
22
ret <2 x i64 > %retval
@@ -48,20 +36,8 @@ define <4 x i32> @extract_v4i32_nxv4i32(<vscale x 4 x i32> %vec) nounwind {
48
36
define <4 x i32 > @extract_v4i32_nxv4i32_idx4 (<vscale x 4 x i32 > %vec ) nounwind {
49
37
; CHECK-LABEL: extract_v4i32_nxv4i32_idx4:
50
38
; CHECK: // %bb.0:
51
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
52
- ; CHECK-NEXT: addvl sp, sp, #-1
53
- ; CHECK-NEXT: cntw x8
54
- ; CHECK-NEXT: mov w9, #4 // =0x4
55
- ; CHECK-NEXT: ptrue p0.s
56
- ; CHECK-NEXT: sub x8, x8, #4
57
- ; CHECK-NEXT: cmp x8, #4
58
- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
59
- ; CHECK-NEXT: csel x8, x8, x9, lo
60
- ; CHECK-NEXT: mov x9, sp
61
- ; CHECK-NEXT: lsl x8, x8, #2
62
- ; CHECK-NEXT: ldr q0, [x9, x8]
63
- ; CHECK-NEXT: addvl sp, sp, #1
64
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
39
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
40
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
65
41
; CHECK-NEXT: ret
66
42
%retval = call <4 x i32 > @llvm.vector.extract.v4i32.nxv4i32 (<vscale x 4 x i32 > %vec , i64 4 )
67
43
ret <4 x i32 > %retval
@@ -82,18 +58,9 @@ define <4 x i32> @extract_v4i32_nxv2i32(<vscale x 2 x i32> %vec) nounwind #1 {
82
58
define <4 x i32 > @extract_v4i32_nxv2i32_idx4 (<vscale x 2 x i32 > %vec ) nounwind #1 {
83
59
; CHECK-LABEL: extract_v4i32_nxv2i32_idx4:
84
60
; CHECK: // %bb.0:
85
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
86
- ; CHECK-NEXT: addvl sp, sp, #-1
87
- ; CHECK-NEXT: ptrue p0.d
88
- ; CHECK-NEXT: mov x8, #4 // =0x4
89
- ; CHECK-NEXT: mov x9, sp
90
- ; CHECK-NEXT: ptrue p1.d, vl4
91
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
92
- ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3]
61
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32
93
62
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
94
63
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
95
- ; CHECK-NEXT: addvl sp, sp, #1
96
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
97
64
; CHECK-NEXT: ret
98
65
%retval = call <4 x i32 > @llvm.vector.extract.v4i32.nxv2i32 (<vscale x 2 x i32 > %vec , i64 4 )
99
66
ret <4 x i32 > %retval
@@ -113,20 +80,8 @@ define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
113
80
define <8 x i16 > @extract_v8i16_nxv8i16_idx8 (<vscale x 8 x i16 > %vec ) nounwind {
114
81
; CHECK-LABEL: extract_v8i16_nxv8i16_idx8:
115
82
; CHECK: // %bb.0:
116
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
117
- ; CHECK-NEXT: addvl sp, sp, #-1
118
- ; CHECK-NEXT: cnth x8
119
- ; CHECK-NEXT: mov w9, #8 // =0x8
120
- ; CHECK-NEXT: ptrue p0.h
121
- ; CHECK-NEXT: sub x8, x8, #8
122
- ; CHECK-NEXT: cmp x8, #8
123
- ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
124
- ; CHECK-NEXT: csel x8, x8, x9, lo
125
- ; CHECK-NEXT: mov x9, sp
126
- ; CHECK-NEXT: lsl x8, x8, #1
127
- ; CHECK-NEXT: ldr q0, [x9, x8]
128
- ; CHECK-NEXT: addvl sp, sp, #1
129
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
83
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
84
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
130
85
; CHECK-NEXT: ret
131
86
%retval = call <8 x i16 > @llvm.vector.extract.v8i16.nxv8i16 (<vscale x 8 x i16 > %vec , i64 8 )
132
87
ret <8 x i16 > %retval
@@ -147,18 +102,9 @@ define <8 x i16> @extract_v8i16_nxv4i16(<vscale x 4 x i16> %vec) nounwind #1 {
147
102
define <8 x i16 > @extract_v8i16_nxv4i16_idx8 (<vscale x 4 x i16 > %vec ) nounwind #1 {
148
103
; CHECK-LABEL: extract_v8i16_nxv4i16_idx8:
149
104
; CHECK: // %bb.0:
150
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
151
- ; CHECK-NEXT: addvl sp, sp, #-1
152
- ; CHECK-NEXT: ptrue p0.s
153
- ; CHECK-NEXT: mov x8, #8 // =0x8
154
- ; CHECK-NEXT: mov x9, sp
155
- ; CHECK-NEXT: ptrue p1.s, vl8
156
- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
157
- ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2]
105
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32
158
106
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
159
107
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
160
- ; CHECK-NEXT: addvl sp, sp, #1
161
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
162
108
; CHECK-NEXT: ret
163
109
%retval = call <8 x i16 > @llvm.vector.extract.v8i16.nxv4i16 (<vscale x 4 x i16 > %vec , i64 8 )
164
110
ret <8 x i16 > %retval
@@ -180,19 +126,10 @@ define <8 x i16> @extract_v8i16_nxv2i16(<vscale x 2 x i16> %vec) nounwind #1 {
180
126
define <8 x i16 > @extract_v8i16_nxv2i16_idx8 (<vscale x 2 x i16 > %vec ) nounwind #1 {
181
127
; CHECK-LABEL: extract_v8i16_nxv2i16_idx8:
182
128
; CHECK: // %bb.0:
183
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
184
- ; CHECK-NEXT: addvl sp, sp, #-1
185
- ; CHECK-NEXT: ptrue p0.d
186
- ; CHECK-NEXT: mov x8, #8 // =0x8
187
- ; CHECK-NEXT: mov x9, sp
188
- ; CHECK-NEXT: ptrue p1.d, vl8
189
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
190
- ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3]
129
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
191
130
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
192
131
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
193
132
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
194
- ; CHECK-NEXT: addvl sp, sp, #1
195
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
196
133
; CHECK-NEXT: ret
197
134
%retval = call <8 x i16 > @llvm.vector.extract.v8i16.nxv2i16 (<vscale x 2 x i16 > %vec , i64 8 )
198
135
ret <8 x i16 > %retval
@@ -212,19 +149,8 @@ define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
212
149
define <16 x i8 > @extract_v16i8_nxv16i8_idx16 (<vscale x 16 x i8 > %vec ) nounwind {
213
150
; CHECK-LABEL: extract_v16i8_nxv16i8_idx16:
214
151
; CHECK: // %bb.0:
215
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
216
- ; CHECK-NEXT: addvl sp, sp, #-1
217
- ; CHECK-NEXT: rdvl x8, #1
218
- ; CHECK-NEXT: ptrue p0.b
219
- ; CHECK-NEXT: mov w9, #16 // =0x10
220
- ; CHECK-NEXT: sub x8, x8, #16
221
- ; CHECK-NEXT: cmp x8, #16
222
- ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
223
- ; CHECK-NEXT: csel x8, x8, x9, lo
224
- ; CHECK-NEXT: mov x9, sp
225
- ; CHECK-NEXT: ldr q0, [x9, x8]
226
- ; CHECK-NEXT: addvl sp, sp, #1
227
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
152
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
153
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
228
154
; CHECK-NEXT: ret
229
155
%retval = call <16 x i8 > @llvm.vector.extract.v16i8.nxv16i8 (<vscale x 16 x i8 > %vec , i64 16 )
230
156
ret <16 x i8 > %retval
@@ -245,18 +171,9 @@ define <16 x i8> @extract_v16i8_nxv8i8(<vscale x 8 x i8> %vec) nounwind #1 {
245
171
define <16 x i8 > @extract_v16i8_nxv8i8_idx16 (<vscale x 8 x i8 > %vec ) nounwind #1 {
246
172
; CHECK-LABEL: extract_v16i8_nxv8i8_idx16:
247
173
; CHECK: // %bb.0:
248
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
249
- ; CHECK-NEXT: addvl sp, sp, #-1
250
- ; CHECK-NEXT: ptrue p0.h
251
- ; CHECK-NEXT: mov x8, #16 // =0x10
252
- ; CHECK-NEXT: mov x9, sp
253
- ; CHECK-NEXT: ptrue p1.h, vl16
254
- ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
255
- ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9, x8, lsl #1]
174
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32
256
175
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
257
176
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
258
- ; CHECK-NEXT: addvl sp, sp, #1
259
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
260
177
; CHECK-NEXT: ret
261
178
%retval = call <16 x i8 > @llvm.vector.extract.v16i8.nxv8i8 (<vscale x 8 x i8 > %vec , i64 16 )
262
179
ret <16 x i8 > %retval
@@ -278,19 +195,10 @@ define <16 x i8> @extract_v16i8_nxv4i8(<vscale x 4 x i8> %vec) nounwind #1 {
278
195
define <16 x i8 > @extract_v16i8_nxv4i8_idx16 (<vscale x 4 x i8 > %vec ) nounwind #1 {
279
196
; CHECK-LABEL: extract_v16i8_nxv4i8_idx16:
280
197
; CHECK: // %bb.0:
281
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
282
- ; CHECK-NEXT: addvl sp, sp, #-1
283
- ; CHECK-NEXT: ptrue p0.s
284
- ; CHECK-NEXT: mov x8, #16 // =0x10
285
- ; CHECK-NEXT: mov x9, sp
286
- ; CHECK-NEXT: ptrue p1.s, vl16
287
- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
288
- ; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2]
198
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #64
289
199
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
290
200
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
291
201
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
292
- ; CHECK-NEXT: addvl sp, sp, #1
293
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
294
202
; CHECK-NEXT: ret
295
203
%retval = call <16 x i8 > @llvm.vector.extract.v16i8.nxv4i8 (<vscale x 4 x i8 > %vec , i64 16 )
296
204
ret <16 x i8 > %retval
@@ -313,17 +221,11 @@ define <16 x i8> @extract_v16i8_nxv2i8(<vscale x 2 x i8> %vec) nounwind #1 {
313
221
define <16 x i8 > @extract_v16i8_nxv2i8_idx16 (<vscale x 2 x i8 > %vec ) nounwind #1 {
314
222
; CHECK-LABEL: extract_v16i8_nxv2i8_idx16:
315
223
; CHECK: // %bb.0:
316
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
317
- ; CHECK-NEXT: addvl sp, sp, #-1
318
- ; CHECK-NEXT: ptrue p0.d
319
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
320
- ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
224
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128
321
225
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
322
226
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
323
227
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
324
228
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
325
- ; CHECK-NEXT: addvl sp, sp, #1
326
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
327
229
; CHECK-NEXT: ret
328
230
%retval = call <16 x i8 > @llvm.vector.extract.v16i8.nxv2i8 (<vscale x 2 x i8 > %vec , i64 16 )
329
231
ret <16 x i8 > %retval
@@ -434,13 +336,8 @@ define <16 x i1> @extract_v16i1_nxv16i1(<vscale x 16 x i1> %inmask) {
434
336
define <2 x i64 > @extract_fixed_v2i64_nxv2i64 (<vscale x 2 x i64 > %vec ) nounwind #0 {
435
337
; CHECK-LABEL: extract_fixed_v2i64_nxv2i64:
436
338
; CHECK: // %bb.0:
437
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
438
- ; CHECK-NEXT: addvl sp, sp, #-1
439
- ; CHECK-NEXT: ptrue p0.d
440
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
441
- ; CHECK-NEXT: ldr q0, [sp, #16]
442
- ; CHECK-NEXT: addvl sp, sp, #1
443
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
339
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
340
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
444
341
; CHECK-NEXT: ret
445
342
%retval = call <2 x i64 > @llvm.vector.extract.v2i64.nxv2i64 (<vscale x 2 x i64 > %vec , i64 2 )
446
343
ret <2 x i64 > %retval
@@ -449,14 +346,9 @@ define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
449
346
define void @extract_fixed_v4i64_nxv2i64 (<vscale x 2 x i64 > %vec , ptr %p ) nounwind #0 {
450
347
; CHECK-LABEL: extract_fixed_v4i64_nxv2i64:
451
348
; CHECK: // %bb.0:
452
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
453
- ; CHECK-NEXT: addvl sp, sp, #-1
349
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32
454
350
; CHECK-NEXT: ptrue p0.d
455
- ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
456
- ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
457
351
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
458
- ; CHECK-NEXT: addvl sp, sp, #1
459
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
460
352
; CHECK-NEXT: ret
461
353
%retval = call <4 x i64 > @llvm.vector.extract.v4i64.nxv2i64 (<vscale x 2 x i64 > %vec , i64 4 )
462
354
store <4 x i64 > %retval , ptr %p
0 commit comments