@@ -181,4 +181,149 @@ entry:
181
181
ret <vscale x 2 x i64 > %2
182
182
}
183
183
184
+
185
+ define <vscale x 16 x i8 > @i8_m2v_4s (ptr %b ) {
186
+ ; CHECK-LABEL: i8_m2v_4s:
187
+ ; CHECK: // %bb.0: // %entry
188
+ ; CHECK-NEXT: cnth x8, all, mul #4
189
+ ; CHECK-NEXT: ptrue p0.b
190
+ ; CHECK-NEXT: mov w9, #4 // =0x4
191
+ ; CHECK-NEXT: sub x8, x0, x8
192
+ ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
193
+ ; CHECK-NEXT: ret
194
+ entry:
195
+ %0 = tail call i64 @llvm.vscale.i64 ()
196
+ %1 = mul i64 %0 , -32
197
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 %1
198
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 4
199
+ %2 = load <vscale x 16 x i8 >, ptr %add.ptr1 , align 16
200
+ ret <vscale x 16 x i8 > %2
201
+ }
202
+
203
+ define <vscale x 16 x i8 > @i8_4s_m2v (ptr %b ) {
204
+ ; CHECK-LABEL: i8_4s_m2v:
205
+ ; CHECK: // %bb.0: // %entry
206
+ ; CHECK-NEXT: cnth x8, all, mul #4
207
+ ; CHECK-NEXT: ptrue p0.b
208
+ ; CHECK-NEXT: mov w9, #4 // =0x4
209
+ ; CHECK-NEXT: sub x8, x0, x8
210
+ ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
211
+ ; CHECK-NEXT: ret
212
+ entry:
213
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 4
214
+ %0 = tail call i64 @llvm.vscale.i64 ()
215
+ %1 = mul i64 %0 , -32
216
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 %1
217
+ %2 = load <vscale x 16 x i8 >, ptr %add.ptr1 , align 16
218
+ ret <vscale x 16 x i8 > %2
219
+ }
220
+
221
+ define <vscale x 8 x i16 > @i16_m2v_8s (ptr %b ) {
222
+ ; CHECK-LABEL: i16_m2v_8s:
223
+ ; CHECK: // %bb.0: // %entry
224
+ ; CHECK-NEXT: cnth x8, all, mul #4
225
+ ; CHECK-NEXT: ptrue p0.h
226
+ ; CHECK-NEXT: mov x9, #4 // =0x4
227
+ ; CHECK-NEXT: sub x8, x0, x8
228
+ ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
229
+ ; CHECK-NEXT: ret
230
+ entry:
231
+ %0 = tail call i64 @llvm.vscale.i64 ()
232
+ %1 = mul i64 %0 , -32
233
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 %1
234
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 8
235
+ %2 = load <vscale x 8 x i16 >, ptr %add.ptr1 , align 16
236
+ ret <vscale x 8 x i16 > %2
237
+ }
238
+
239
+ define <vscale x 8 x i16 > @i16_8s_m2v (ptr %b ) {
240
+ ; CHECK-LABEL: i16_8s_m2v:
241
+ ; CHECK: // %bb.0: // %entry
242
+ ; CHECK-NEXT: cnth x8, all, mul #4
243
+ ; CHECK-NEXT: ptrue p0.h
244
+ ; CHECK-NEXT: mov x9, #4 // =0x4
245
+ ; CHECK-NEXT: sub x8, x0, x8
246
+ ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
247
+ ; CHECK-NEXT: ret
248
+ entry:
249
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 8
250
+ %0 = tail call i64 @llvm.vscale.i64 ()
251
+ %1 = mul i64 %0 , -32
252
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 %1
253
+ %2 = load <vscale x 8 x i16 >, ptr %add.ptr1 , align 16
254
+ ret <vscale x 8 x i16 > %2
255
+ }
256
+
257
+ define <vscale x 4 x i32 > @i32_m2v_16s (ptr %b ) {
258
+ ; CHECK-LABEL: i32_m2v_16s:
259
+ ; CHECK: // %bb.0: // %entry
260
+ ; CHECK-NEXT: cnth x8, all, mul #4
261
+ ; CHECK-NEXT: ptrue p0.s
262
+ ; CHECK-NEXT: mov x9, #4 // =0x4
263
+ ; CHECK-NEXT: sub x8, x0, x8
264
+ ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
265
+ ; CHECK-NEXT: ret
266
+ entry:
267
+ %0 = tail call i64 @llvm.vscale.i64 ()
268
+ %1 = mul i64 %0 , -32
269
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 %1
270
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 16
271
+ %2 = load <vscale x 4 x i32 >, ptr %add.ptr1 , align 16
272
+ ret <vscale x 4 x i32 > %2
273
+ }
274
+
275
+ define <vscale x 4 x i32 > @i32_16s_m2v (ptr %b ) {
276
+ ; CHECK-LABEL: i32_16s_m2v:
277
+ ; CHECK: // %bb.0: // %entry
278
+ ; CHECK-NEXT: cnth x8, all, mul #4
279
+ ; CHECK-NEXT: ptrue p0.s
280
+ ; CHECK-NEXT: mov x9, #4 // =0x4
281
+ ; CHECK-NEXT: sub x8, x0, x8
282
+ ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
283
+ ; CHECK-NEXT: ret
284
+ entry:
285
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 16
286
+ %0 = tail call i64 @llvm.vscale.i64 ()
287
+ %1 = mul i64 %0 , -32
288
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 %1
289
+ %2 = load <vscale x 4 x i32 >, ptr %add.ptr1 , align 16
290
+ ret <vscale x 4 x i32 > %2
291
+ }
292
+
293
+ define <vscale x 2 x i64 > @i64_m2v_32s (ptr %b ) {
294
+ ; CHECK-LABEL: i64_m2v_32s:
295
+ ; CHECK: // %bb.0: // %entry
296
+ ; CHECK-NEXT: cnth x8, all, mul #4
297
+ ; CHECK-NEXT: ptrue p0.d
298
+ ; CHECK-NEXT: mov x9, #4 // =0x4
299
+ ; CHECK-NEXT: sub x8, x0, x8
300
+ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
301
+ ; CHECK-NEXT: ret
302
+ entry:
303
+ %0 = tail call i64 @llvm.vscale.i64 ()
304
+ %1 = mul i64 %0 , -32
305
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 %1
306
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 32
307
+ %2 = load <vscale x 2 x i64 >, ptr %add.ptr1 , align 16
308
+ ret <vscale x 2 x i64 > %2
309
+ }
310
+
311
+ define <vscale x 2 x i64 > @i64_32s_m2v (ptr %b ) {
312
+ ; CHECK-LABEL: i64_32s_m2v:
313
+ ; CHECK: // %bb.0: // %entry
314
+ ; CHECK-NEXT: cnth x8, all, mul #4
315
+ ; CHECK-NEXT: ptrue p0.d
316
+ ; CHECK-NEXT: mov x9, #4 // =0x4
317
+ ; CHECK-NEXT: sub x8, x0, x8
318
+ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
319
+ ; CHECK-NEXT: ret
320
+ entry:
321
+ %add.ptr = getelementptr inbounds i8 , ptr %b , i64 32
322
+ %0 = tail call i64 @llvm.vscale.i64 ()
323
+ %1 = mul i64 %0 , -32
324
+ %add.ptr1 = getelementptr inbounds i8 , ptr %add.ptr , i64 %1
325
+ %2 = load <vscale x 2 x i64 >, ptr %add.ptr1 , align 16
326
+ ret <vscale x 2 x i64 > %2
327
+ }
328
+
184
329
declare i64 @llvm.vscale.i64 ()
0 commit comments