@@ -307,243 +307,14 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) {
307
307
ret <32 x i32 > %2
308
308
}
309
309
310
- ; TODO: This case should be a simple vnsrl, but gets scalarized instead
311
310
define <32 x i8 > @vnsrl_v32i8_v64i8 (<64 x i8 > %in ) {
312
- ; RV32-LABEL: vnsrl_v32i8_v64i8:
313
- ; RV32: # %bb.0:
314
- ; RV32-NEXT: addi sp, sp, -128
315
- ; RV32-NEXT: .cfi_def_cfa_offset 128
316
- ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
317
- ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
318
- ; RV32-NEXT: .cfi_offset ra, -4
319
- ; RV32-NEXT: .cfi_offset s0, -8
320
- ; RV32-NEXT: addi s0, sp, 128
321
- ; RV32-NEXT: .cfi_def_cfa s0, 0
322
- ; RV32-NEXT: andi sp, sp, -64
323
- ; RV32-NEXT: li a0, 64
324
- ; RV32-NEXT: mv a1, sp
325
- ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
326
- ; RV32-NEXT: vse8.v v8, (a1)
327
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
328
- ; RV32-NEXT: vslidedown.vi v10, v8, 1
329
- ; RV32-NEXT: vmv.x.s a0, v10
330
- ; RV32-NEXT: li a1, 32
331
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
332
- ; RV32-NEXT: vmv.v.x v10, a0
333
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
334
- ; RV32-NEXT: vslidedown.vi v12, v8, 3
335
- ; RV32-NEXT: vmv.x.s a0, v12
336
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
337
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
338
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
339
- ; RV32-NEXT: vslidedown.vi v12, v8, 5
340
- ; RV32-NEXT: vmv.x.s a0, v12
341
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
342
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
343
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
344
- ; RV32-NEXT: vslidedown.vi v12, v8, 7
345
- ; RV32-NEXT: vmv.x.s a0, v12
346
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
347
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
348
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
349
- ; RV32-NEXT: vslidedown.vi v12, v8, 9
350
- ; RV32-NEXT: vmv.x.s a0, v12
351
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
352
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
353
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
354
- ; RV32-NEXT: vslidedown.vi v12, v8, 11
355
- ; RV32-NEXT: vmv.x.s a0, v12
356
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
357
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
358
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
359
- ; RV32-NEXT: vslidedown.vi v12, v8, 13
360
- ; RV32-NEXT: vmv.x.s a0, v12
361
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
362
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
363
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
364
- ; RV32-NEXT: vslidedown.vi v12, v8, 15
365
- ; RV32-NEXT: vmv.x.s a0, v12
366
- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
367
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
368
- ; RV32-NEXT: vslidedown.vi v12, v8, 17
369
- ; RV32-NEXT: vmv.x.s a0, v12
370
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
371
- ; RV32-NEXT: vslidedown.vi v12, v8, 19
372
- ; RV32-NEXT: vmv.x.s a0, v12
373
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
374
- ; RV32-NEXT: vslidedown.vi v12, v8, 21
375
- ; RV32-NEXT: vmv.x.s a0, v12
376
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
377
- ; RV32-NEXT: vslidedown.vi v12, v8, 23
378
- ; RV32-NEXT: vmv.x.s a0, v12
379
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
380
- ; RV32-NEXT: vslidedown.vi v12, v8, 25
381
- ; RV32-NEXT: vmv.x.s a0, v12
382
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
383
- ; RV32-NEXT: vslidedown.vi v12, v8, 27
384
- ; RV32-NEXT: vmv.x.s a0, v12
385
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
386
- ; RV32-NEXT: vslidedown.vi v12, v8, 29
387
- ; RV32-NEXT: vmv.x.s a0, v12
388
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
389
- ; RV32-NEXT: vslidedown.vi v8, v8, 31
390
- ; RV32-NEXT: vmv.x.s a0, v8
391
- ; RV32-NEXT: vslide1down.vx v8, v10, a0
392
- ; RV32-NEXT: lbu a0, 33(sp)
393
- ; RV32-NEXT: lbu a1, 35(sp)
394
- ; RV32-NEXT: lbu a2, 37(sp)
395
- ; RV32-NEXT: lbu a3, 39(sp)
396
- ; RV32-NEXT: vslide1down.vx v8, v8, a0
397
- ; RV32-NEXT: vslide1down.vx v8, v8, a1
398
- ; RV32-NEXT: vslide1down.vx v8, v8, a2
399
- ; RV32-NEXT: vslide1down.vx v8, v8, a3
400
- ; RV32-NEXT: lbu a0, 41(sp)
401
- ; RV32-NEXT: lbu a1, 43(sp)
402
- ; RV32-NEXT: lbu a2, 45(sp)
403
- ; RV32-NEXT: lbu a3, 47(sp)
404
- ; RV32-NEXT: vslide1down.vx v8, v8, a0
405
- ; RV32-NEXT: vslide1down.vx v8, v8, a1
406
- ; RV32-NEXT: vslide1down.vx v8, v8, a2
407
- ; RV32-NEXT: vslide1down.vx v8, v8, a3
408
- ; RV32-NEXT: lbu a0, 49(sp)
409
- ; RV32-NEXT: lbu a1, 51(sp)
410
- ; RV32-NEXT: lbu a2, 53(sp)
411
- ; RV32-NEXT: lbu a3, 55(sp)
412
- ; RV32-NEXT: vslide1down.vx v8, v8, a0
413
- ; RV32-NEXT: vslide1down.vx v8, v8, a1
414
- ; RV32-NEXT: vslide1down.vx v8, v8, a2
415
- ; RV32-NEXT: vslide1down.vx v8, v8, a3
416
- ; RV32-NEXT: lbu a0, 57(sp)
417
- ; RV32-NEXT: lbu a1, 59(sp)
418
- ; RV32-NEXT: lbu a2, 61(sp)
419
- ; RV32-NEXT: lbu a3, 63(sp)
420
- ; RV32-NEXT: vslide1down.vx v8, v8, a0
421
- ; RV32-NEXT: vslide1down.vx v8, v8, a1
422
- ; RV32-NEXT: vslide1down.vx v8, v8, a2
423
- ; RV32-NEXT: vslide1down.vx v8, v8, a3
424
- ; RV32-NEXT: addi sp, s0, -128
425
- ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
426
- ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
427
- ; RV32-NEXT: addi sp, sp, 128
428
- ; RV32-NEXT: ret
429
- ;
430
- ; RV64-LABEL: vnsrl_v32i8_v64i8:
431
- ; RV64: # %bb.0:
432
- ; RV64-NEXT: addi sp, sp, -128
433
- ; RV64-NEXT: .cfi_def_cfa_offset 128
434
- ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
435
- ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
436
- ; RV64-NEXT: .cfi_offset ra, -8
437
- ; RV64-NEXT: .cfi_offset s0, -16
438
- ; RV64-NEXT: addi s0, sp, 128
439
- ; RV64-NEXT: .cfi_def_cfa s0, 0
440
- ; RV64-NEXT: andi sp, sp, -64
441
- ; RV64-NEXT: li a0, 64
442
- ; RV64-NEXT: mv a1, sp
443
- ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
444
- ; RV64-NEXT: vse8.v v8, (a1)
445
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
446
- ; RV64-NEXT: vslidedown.vi v10, v8, 1
447
- ; RV64-NEXT: vmv.x.s a0, v10
448
- ; RV64-NEXT: li a1, 32
449
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
450
- ; RV64-NEXT: vmv.v.x v10, a0
451
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
452
- ; RV64-NEXT: vslidedown.vi v12, v8, 3
453
- ; RV64-NEXT: vmv.x.s a0, v12
454
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
455
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
456
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
457
- ; RV64-NEXT: vslidedown.vi v12, v8, 5
458
- ; RV64-NEXT: vmv.x.s a0, v12
459
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
460
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
461
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
462
- ; RV64-NEXT: vslidedown.vi v12, v8, 7
463
- ; RV64-NEXT: vmv.x.s a0, v12
464
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
465
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
466
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
467
- ; RV64-NEXT: vslidedown.vi v12, v8, 9
468
- ; RV64-NEXT: vmv.x.s a0, v12
469
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
470
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
471
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
472
- ; RV64-NEXT: vslidedown.vi v12, v8, 11
473
- ; RV64-NEXT: vmv.x.s a0, v12
474
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
475
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
476
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
477
- ; RV64-NEXT: vslidedown.vi v12, v8, 13
478
- ; RV64-NEXT: vmv.x.s a0, v12
479
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
480
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
481
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
482
- ; RV64-NEXT: vslidedown.vi v12, v8, 15
483
- ; RV64-NEXT: vmv.x.s a0, v12
484
- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
485
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
486
- ; RV64-NEXT: vslidedown.vi v12, v8, 17
487
- ; RV64-NEXT: vmv.x.s a0, v12
488
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
489
- ; RV64-NEXT: vslidedown.vi v12, v8, 19
490
- ; RV64-NEXT: vmv.x.s a0, v12
491
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
492
- ; RV64-NEXT: vslidedown.vi v12, v8, 21
493
- ; RV64-NEXT: vmv.x.s a0, v12
494
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
495
- ; RV64-NEXT: vslidedown.vi v12, v8, 23
496
- ; RV64-NEXT: vmv.x.s a0, v12
497
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
498
- ; RV64-NEXT: vslidedown.vi v12, v8, 25
499
- ; RV64-NEXT: vmv.x.s a0, v12
500
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
501
- ; RV64-NEXT: vslidedown.vi v12, v8, 27
502
- ; RV64-NEXT: vmv.x.s a0, v12
503
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
504
- ; RV64-NEXT: vslidedown.vi v12, v8, 29
505
- ; RV64-NEXT: vmv.x.s a0, v12
506
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
507
- ; RV64-NEXT: vslidedown.vi v8, v8, 31
508
- ; RV64-NEXT: vmv.x.s a0, v8
509
- ; RV64-NEXT: vslide1down.vx v8, v10, a0
510
- ; RV64-NEXT: lbu a0, 33(sp)
511
- ; RV64-NEXT: lbu a1, 35(sp)
512
- ; RV64-NEXT: lbu a2, 37(sp)
513
- ; RV64-NEXT: lbu a3, 39(sp)
514
- ; RV64-NEXT: vslide1down.vx v8, v8, a0
515
- ; RV64-NEXT: vslide1down.vx v8, v8, a1
516
- ; RV64-NEXT: vslide1down.vx v8, v8, a2
517
- ; RV64-NEXT: vslide1down.vx v8, v8, a3
518
- ; RV64-NEXT: lbu a0, 41(sp)
519
- ; RV64-NEXT: lbu a1, 43(sp)
520
- ; RV64-NEXT: lbu a2, 45(sp)
521
- ; RV64-NEXT: lbu a3, 47(sp)
522
- ; RV64-NEXT: vslide1down.vx v8, v8, a0
523
- ; RV64-NEXT: vslide1down.vx v8, v8, a1
524
- ; RV64-NEXT: vslide1down.vx v8, v8, a2
525
- ; RV64-NEXT: vslide1down.vx v8, v8, a3
526
- ; RV64-NEXT: lbu a0, 49(sp)
527
- ; RV64-NEXT: lbu a1, 51(sp)
528
- ; RV64-NEXT: lbu a2, 53(sp)
529
- ; RV64-NEXT: lbu a3, 55(sp)
530
- ; RV64-NEXT: vslide1down.vx v8, v8, a0
531
- ; RV64-NEXT: vslide1down.vx v8, v8, a1
532
- ; RV64-NEXT: vslide1down.vx v8, v8, a2
533
- ; RV64-NEXT: vslide1down.vx v8, v8, a3
534
- ; RV64-NEXT: lbu a0, 57(sp)
535
- ; RV64-NEXT: lbu a1, 59(sp)
536
- ; RV64-NEXT: lbu a2, 61(sp)
537
- ; RV64-NEXT: lbu a3, 63(sp)
538
- ; RV64-NEXT: vslide1down.vx v8, v8, a0
539
- ; RV64-NEXT: vslide1down.vx v8, v8, a1
540
- ; RV64-NEXT: vslide1down.vx v8, v8, a2
541
- ; RV64-NEXT: vslide1down.vx v8, v8, a3
542
- ; RV64-NEXT: addi sp, s0, -128
543
- ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
544
- ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
545
- ; RV64-NEXT: addi sp, sp, 128
546
- ; RV64-NEXT: ret
311
+ ; CHECK-LABEL: vnsrl_v32i8_v64i8:
312
+ ; CHECK: # %bb.0:
313
+ ; CHECK-NEXT: li a0, 32
314
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
315
+ ; CHECK-NEXT: vnsrl.wi v12, v8, 8
316
+ ; CHECK-NEXT: vmv.v.v v8, v12
317
+ ; CHECK-NEXT: ret
547
318
%res = shufflevector <64 x i8 > %in , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
548
319
ret <32 x i8 > %res
549
320
}
0 commit comments