@@ -257,6 +257,49 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_load_v2f64_v4f64(ptr %p
257
257
ret {<2 x double >, <2 x double >} %res1
258
258
}
259
259
260
+ define { <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor3 (ptr %p ) {
261
+ ; CHECK-LABEL: vector_deinterleave_load_factor3:
262
+ ; CHECK: # %bb.0:
263
+ ; CHECK-NEXT: addi sp, sp, -16
264
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
265
+ ; CHECK-NEXT: csrr a1, vlenb
266
+ ; CHECK-NEXT: slli a1, a1, 1
267
+ ; CHECK-NEXT: sub sp, sp, a1
268
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
269
+ ; CHECK-NEXT: vsetivli zero, 24, e8, m2, ta, ma
270
+ ; CHECK-NEXT: vle8.v v8, (a0)
271
+ ; CHECK-NEXT: csrr a0, vlenb
272
+ ; CHECK-NEXT: srli a0, a0, 1
273
+ ; CHECK-NEXT: add a1, a0, a0
274
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
275
+ ; CHECK-NEXT: vslidedown.vi v12, v8, 8
276
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m2, ta, ma
277
+ ; CHECK-NEXT: vslidedown.vi v10, v8, 16
278
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
279
+ ; CHECK-NEXT: vslideup.vx v8, v12, a0
280
+ ; CHECK-NEXT: addi a0, sp, 16
281
+ ; CHECK-NEXT: vmv1r.v v9, v10
282
+ ; CHECK-NEXT: vs2r.v v8, (a0)
283
+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
284
+ ; CHECK-NEXT: vlseg3e8.v v6, (a0)
285
+ ; CHECK-NEXT: csrr a0, vlenb
286
+ ; CHECK-NEXT: slli a0, a0, 1
287
+ ; CHECK-NEXT: add sp, sp, a0
288
+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
289
+ ; CHECK-NEXT: addi sp, sp, 16
290
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
291
+ ; CHECK-NEXT: ret
292
+ %vec = load <24 x i8 >, ptr %p
293
+ %d0 = call {<8 x i8 >, <8 x i8 >, <8 x i8 >} @llvm.vector.deinterleave3 (<24 x i8 > %vec )
294
+ %t0 = extractvalue {<8 x i8 >, <8 x i8 >, <8 x i8 >} %d0 , 0
295
+ %t1 = extractvalue {<8 x i8 >, <8 x i8 >, <8 x i8 >} %d0 , 1
296
+ %t2 = extractvalue {<8 x i8 >, <8 x i8 >, <8 x i8 >} %d0 , 2
297
+ %res0 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 > } poison, <8 x i8 > %t0 , 0
298
+ %res1 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 > } %res0 , <8 x i8 > %t1 , 0
299
+ %res2 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 > } %res1 , <8 x i8 > %t2 , 0
300
+ ret { <8 x i8 >, <8 x i8 >, <8 x i8 > } %res2
301
+ }
302
+
260
303
define { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor4 (ptr %p ) {
261
304
; CHECK-LABEL: vector_deinterleave_load_factor4:
262
305
; CHECK: # %bb.0:
@@ -281,6 +324,127 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact
281
324
ret { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3
282
325
}
283
326
327
+ define { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor5 (ptr %p ) {
328
+ ; CHECK-LABEL: vector_deinterleave_load_factor5:
329
+ ; CHECK: # %bb.0:
330
+ ; CHECK-NEXT: addi sp, sp, -16
331
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
332
+ ; CHECK-NEXT: csrr a1, vlenb
333
+ ; CHECK-NEXT: slli a1, a1, 2
334
+ ; CHECK-NEXT: sub sp, sp, a1
335
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
336
+ ; CHECK-NEXT: li a1, 40
337
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
338
+ ; CHECK-NEXT: vle8.v v8, (a0)
339
+ ; CHECK-NEXT: csrr a0, vlenb
340
+ ; CHECK-NEXT: srli a0, a0, 1
341
+ ; CHECK-NEXT: add a1, a0, a0
342
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m2, ta, ma
343
+ ; CHECK-NEXT: vslidedown.vi v12, v8, 24
344
+ ; CHECK-NEXT: vslidedown.vi v14, v8, 16
345
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
346
+ ; CHECK-NEXT: vslidedown.vi v13, v8, 8
347
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
348
+ ; CHECK-NEXT: vslideup.vx v14, v12, a0
349
+ ; CHECK-NEXT: vmv1r.v v12, v8
350
+ ; CHECK-NEXT: vslideup.vx v12, v13, a0
351
+ ; CHECK-NEXT: li a0, 32
352
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m4, ta, ma
353
+ ; CHECK-NEXT: vslidedown.vx v8, v8, a0
354
+ ; CHECK-NEXT: vmv1r.v v13, v14
355
+ ; CHECK-NEXT: addi a0, sp, 16
356
+ ; CHECK-NEXT: vmv2r.v v14, v8
357
+ ; CHECK-NEXT: vs4r.v v12, (a0)
358
+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
359
+ ; CHECK-NEXT: vlseg5e8.v v8, (a0)
360
+ ; CHECK-NEXT: csrr a0, vlenb
361
+ ; CHECK-NEXT: slli a0, a0, 2
362
+ ; CHECK-NEXT: add sp, sp, a0
363
+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
364
+ ; CHECK-NEXT: addi sp, sp, 16
365
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
366
+ ; CHECK-NEXT: ret
367
+ %vec = load <40 x i8 >, ptr %p
368
+ %d0 = call {<8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >} @llvm.vector.deinterleave5 (<40 x i8 > %vec )
369
+ %t0 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 0
370
+ %t1 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 1
371
+ %t2 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 2
372
+ %t3 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 3
373
+ %t4 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 4
374
+ %res0 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } poison, <8 x i8 > %t0 , 0
375
+ %res1 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res0 , <8 x i8 > %t1 , 1
376
+ %res2 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res1 , <8 x i8 > %t2 , 2
377
+ %res3 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res2 , <8 x i8 > %t3 , 3
378
+ %res4 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t4 , 4
379
+ ret { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res4
380
+ }
381
+
382
+ define { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor7 (ptr %p ) {
383
+ ; CHECK-LABEL: vector_deinterleave_load_factor7:
384
+ ; CHECK: # %bb.0:
385
+ ; CHECK-NEXT: addi sp, sp, -16
386
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
387
+ ; CHECK-NEXT: csrr a1, vlenb
388
+ ; CHECK-NEXT: slli a1, a1, 2
389
+ ; CHECK-NEXT: sub sp, sp, a1
390
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
391
+ ; CHECK-NEXT: li a1, 56
392
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
393
+ ; CHECK-NEXT: vle8.v v8, (a0)
394
+ ; CHECK-NEXT: csrr a0, vlenb
395
+ ; CHECK-NEXT: li a1, 40
396
+ ; CHECK-NEXT: li a2, 32
397
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m4, ta, ma
398
+ ; CHECK-NEXT: vslidedown.vx v16, v8, a1
399
+ ; CHECK-NEXT: li a1, 48
400
+ ; CHECK-NEXT: srli a0, a0, 1
401
+ ; CHECK-NEXT: vslidedown.vx v12, v8, a2
402
+ ; CHECK-NEXT: add a2, a0, a0
403
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m2, ta, ma
404
+ ; CHECK-NEXT: vslidedown.vi v14, v8, 24
405
+ ; CHECK-NEXT: vslidedown.vi v18, v8, 16
406
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
407
+ ; CHECK-NEXT: vslidedown.vi v13, v8, 8
408
+ ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
409
+ ; CHECK-NEXT: vslideup.vx v18, v14, a0
410
+ ; CHECK-NEXT: vsetivli zero, 8, e8, m4, ta, ma
411
+ ; CHECK-NEXT: vslidedown.vx v20, v8, a1
412
+ ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
413
+ ; CHECK-NEXT: vslideup.vx v8, v13, a0
414
+ ; CHECK-NEXT: vslideup.vx v12, v16, a0
415
+ ; CHECK-NEXT: vmv1r.v v9, v18
416
+ ; CHECK-NEXT: addi a0, sp, 16
417
+ ; CHECK-NEXT: vmv1r.v v13, v20
418
+ ; CHECK-NEXT: vmv2r.v v10, v12
419
+ ; CHECK-NEXT: vs4r.v v8, (a0)
420
+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
421
+ ; CHECK-NEXT: vlseg7e8.v v8, (a0)
422
+ ; CHECK-NEXT: csrr a0, vlenb
423
+ ; CHECK-NEXT: slli a0, a0, 2
424
+ ; CHECK-NEXT: add sp, sp, a0
425
+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
426
+ ; CHECK-NEXT: addi sp, sp, 16
427
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
428
+ ; CHECK-NEXT: ret
429
+ %vec = load <56 x i8 >, ptr %p
430
+ %d0 = call {<8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >} @llvm.vector.deinterleave7 (<56 x i8 > %vec )
431
+ %t0 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 0
432
+ %t1 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 1
433
+ %t2 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 2
434
+ %t3 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 3
435
+ %t4 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 4
436
+ %t5 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 5
437
+ %t6 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 6
438
+ %res0 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } poison, <8 x i8 > %t0 , 0
439
+ %res1 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res0 , <8 x i8 > %t1 , 1
440
+ %res2 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res1 , <8 x i8 > %t2 , 2
441
+ %res3 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res2 , <8 x i8 > %t3 , 3
442
+ %res4 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t4 , 4
443
+ %res5 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t5 , 5
444
+ %res6 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t6 , 6
445
+ ret { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res6
446
+ }
447
+
284
448
define {<2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >} @vector_deinterleave_load_factor8 (ptr %ptr ) {
285
449
; CHECK-LABEL: vector_deinterleave_load_factor8:
286
450
; CHECK: # %bb.0:
0 commit comments