|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s |
| 2 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,V |
| 3 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zve32f -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,ZVE32F |
3 | 4 |
|
4 | 5 | %struct.foo = type { i32, i32, i32, i32 }
|
5 | 6 |
|
@@ -54,30 +55,55 @@ for.cond.cleanup: ; preds = %vector.body
|
54 | 55 |
|
55 | 56 | define void @gather_masked(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, <32 x i8> %maskedoff) {
|
56 | 57 | ;
|
57 |
| -; CHECK-LABEL: gather_masked: |
58 |
| -; CHECK: # %bb.0: # %entry |
59 |
| -; CHECK-NEXT: li a2, 0 |
60 |
| -; CHECK-NEXT: lui a3, 983765 |
61 |
| -; CHECK-NEXT: addiw a3, a3, 873 |
62 |
| -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
63 |
| -; CHECK-NEXT: vmv.s.x v0, a3 |
64 |
| -; CHECK-NEXT: li a3, 32 |
65 |
| -; CHECK-NEXT: li a4, 5 |
66 |
| -; CHECK-NEXT: li a5, 1024 |
67 |
| -; CHECK-NEXT: .LBB1_1: # %vector.body |
68 |
| -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
69 |
| -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu |
70 |
| -; CHECK-NEXT: vmv1r.v v9, v8 |
71 |
| -; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t |
72 |
| -; CHECK-NEXT: add a6, a0, a2 |
73 |
| -; CHECK-NEXT: vle8.v v10, (a6) |
74 |
| -; CHECK-NEXT: vadd.vv v9, v10, v9 |
75 |
| -; CHECK-NEXT: vse8.v v9, (a6) |
76 |
| -; CHECK-NEXT: addi a2, a2, 32 |
77 |
| -; CHECK-NEXT: addi a1, a1, 160 |
78 |
| -; CHECK-NEXT: bne a2, a5, .LBB1_1 |
79 |
| -; CHECK-NEXT: # %bb.2: # %for.cond.cleanup |
80 |
| -; CHECK-NEXT: ret |
| 58 | +; V-LABEL: gather_masked: |
| 59 | +; V: # %bb.0: # %entry |
| 60 | +; V-NEXT: li a2, 0 |
| 61 | +; V-NEXT: lui a3, 983765 |
| 62 | +; V-NEXT: addiw a3, a3, 873 |
| 63 | +; V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
| 64 | +; V-NEXT: vmv.s.x v0, a3 |
| 65 | +; V-NEXT: li a3, 32 |
| 66 | +; V-NEXT: li a4, 5 |
| 67 | +; V-NEXT: li a5, 1024 |
| 68 | +; V-NEXT: .LBB1_1: # %vector.body |
| 69 | +; V-NEXT: # =>This Inner Loop Header: Depth=1 |
| 70 | +; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu |
| 71 | +; V-NEXT: vmv1r.v v9, v8 |
| 72 | +; V-NEXT: vlse8.v v9, (a1), a4, v0.t |
| 73 | +; V-NEXT: add a6, a0, a2 |
| 74 | +; V-NEXT: vle8.v v10, (a6) |
| 75 | +; V-NEXT: vadd.vv v9, v10, v9 |
| 76 | +; V-NEXT: vse8.v v9, (a6) |
| 77 | +; V-NEXT: addi a2, a2, 32 |
| 78 | +; V-NEXT: addi a1, a1, 160 |
| 79 | +; V-NEXT: bne a2, a5, .LBB1_1 |
| 80 | +; V-NEXT: # %bb.2: # %for.cond.cleanup |
| 81 | +; V-NEXT: ret |
| 82 | +; |
| 83 | +; ZVE32F-LABEL: gather_masked: |
| 84 | +; ZVE32F: # %bb.0: # %entry |
| 85 | +; ZVE32F-NEXT: li a2, 0 |
| 86 | +; ZVE32F-NEXT: lui a3, 983765 |
| 87 | +; ZVE32F-NEXT: addiw a3, a3, 873 |
| 88 | +; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| 89 | +; ZVE32F-NEXT: vmv.s.x v0, a3 |
| 90 | +; ZVE32F-NEXT: li a3, 32 |
| 91 | +; ZVE32F-NEXT: li a4, 5 |
| 92 | +; ZVE32F-NEXT: li a5, 1024 |
| 93 | +; ZVE32F-NEXT: .LBB1_1: # %vector.body |
| 94 | +; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 |
| 95 | +; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu |
| 96 | +; ZVE32F-NEXT: vmv1r.v v9, v8 |
| 97 | +; ZVE32F-NEXT: vlse8.v v9, (a1), a4, v0.t |
| 98 | +; ZVE32F-NEXT: add a6, a0, a2 |
| 99 | +; ZVE32F-NEXT: vle8.v v10, (a6) |
| 100 | +; ZVE32F-NEXT: vadd.vv v9, v10, v9 |
| 101 | +; ZVE32F-NEXT: vse8.v v9, (a6) |
| 102 | +; ZVE32F-NEXT: addi a2, a2, 32 |
| 103 | +; ZVE32F-NEXT: addi a1, a1, 160 |
| 104 | +; ZVE32F-NEXT: bne a2, a5, .LBB1_1 |
| 105 | +; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup |
| 106 | +; ZVE32F-NEXT: ret |
81 | 107 | entry:
|
82 | 108 | br label %vector.body
|
83 | 109 |
|
@@ -242,30 +268,55 @@ for.cond.cleanup: ; preds = %vector.body
|
242 | 268 |
|
243 | 269 | define void @scatter_masked(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, <32 x i8> %maskedoff) {
|
244 | 270 | ;
|
245 |
| -; CHECK-LABEL: scatter_masked: |
246 |
| -; CHECK: # %bb.0: # %entry |
247 |
| -; CHECK-NEXT: li a2, 0 |
248 |
| -; CHECK-NEXT: li a3, 32 |
249 |
| -; CHECK-NEXT: lui a4, 983765 |
250 |
| -; CHECK-NEXT: addiw a4, a4, 873 |
251 |
| -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
252 |
| -; CHECK-NEXT: vmv.s.x v0, a4 |
253 |
| -; CHECK-NEXT: li a4, 5 |
254 |
| -; CHECK-NEXT: li a5, 1024 |
255 |
| -; CHECK-NEXT: .LBB5_1: # %vector.body |
256 |
| -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
257 |
| -; CHECK-NEXT: add a6, a1, a2 |
258 |
| -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu |
259 |
| -; CHECK-NEXT: vle8.v v9, (a6) |
260 |
| -; CHECK-NEXT: vmv1r.v v10, v8 |
261 |
| -; CHECK-NEXT: vlse8.v v10, (a0), a4, v0.t |
262 |
| -; CHECK-NEXT: vadd.vv v9, v10, v9 |
263 |
| -; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t |
264 |
| -; CHECK-NEXT: addi a2, a2, 32 |
265 |
| -; CHECK-NEXT: addi a0, a0, 160 |
266 |
| -; CHECK-NEXT: bne a2, a5, .LBB5_1 |
267 |
| -; CHECK-NEXT: # %bb.2: # %for.cond.cleanup |
268 |
| -; CHECK-NEXT: ret |
| 271 | +; V-LABEL: scatter_masked: |
| 272 | +; V: # %bb.0: # %entry |
| 273 | +; V-NEXT: li a2, 0 |
| 274 | +; V-NEXT: li a3, 32 |
| 275 | +; V-NEXT: lui a4, 983765 |
| 276 | +; V-NEXT: addiw a4, a4, 873 |
| 277 | +; V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
| 278 | +; V-NEXT: vmv.s.x v0, a4 |
| 279 | +; V-NEXT: li a4, 5 |
| 280 | +; V-NEXT: li a5, 1024 |
| 281 | +; V-NEXT: .LBB5_1: # %vector.body |
| 282 | +; V-NEXT: # =>This Inner Loop Header: Depth=1 |
| 283 | +; V-NEXT: add a6, a1, a2 |
| 284 | +; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu |
| 285 | +; V-NEXT: vle8.v v9, (a6) |
| 286 | +; V-NEXT: vmv1r.v v10, v8 |
| 287 | +; V-NEXT: vlse8.v v10, (a0), a4, v0.t |
| 288 | +; V-NEXT: vadd.vv v9, v10, v9 |
| 289 | +; V-NEXT: vsse8.v v9, (a0), a4, v0.t |
| 290 | +; V-NEXT: addi a2, a2, 32 |
| 291 | +; V-NEXT: addi a0, a0, 160 |
| 292 | +; V-NEXT: bne a2, a5, .LBB5_1 |
| 293 | +; V-NEXT: # %bb.2: # %for.cond.cleanup |
| 294 | +; V-NEXT: ret |
| 295 | +; |
| 296 | +; ZVE32F-LABEL: scatter_masked: |
| 297 | +; ZVE32F: # %bb.0: # %entry |
| 298 | +; ZVE32F-NEXT: li a2, 0 |
| 299 | +; ZVE32F-NEXT: li a3, 32 |
| 300 | +; ZVE32F-NEXT: lui a4, 983765 |
| 301 | +; ZVE32F-NEXT: addiw a4, a4, 873 |
| 302 | +; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| 303 | +; ZVE32F-NEXT: vmv.s.x v0, a4 |
| 304 | +; ZVE32F-NEXT: li a4, 5 |
| 305 | +; ZVE32F-NEXT: li a5, 1024 |
| 306 | +; ZVE32F-NEXT: .LBB5_1: # %vector.body |
| 307 | +; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 |
| 308 | +; ZVE32F-NEXT: add a6, a1, a2 |
| 309 | +; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu |
| 310 | +; ZVE32F-NEXT: vle8.v v9, (a6) |
| 311 | +; ZVE32F-NEXT: vmv1r.v v10, v8 |
| 312 | +; ZVE32F-NEXT: vlse8.v v10, (a0), a4, v0.t |
| 313 | +; ZVE32F-NEXT: vadd.vv v9, v10, v9 |
| 314 | +; ZVE32F-NEXT: vsse8.v v9, (a0), a4, v0.t |
| 315 | +; ZVE32F-NEXT: addi a2, a2, 32 |
| 316 | +; ZVE32F-NEXT: addi a0, a0, 160 |
| 317 | +; ZVE32F-NEXT: bne a2, a5, .LBB5_1 |
| 318 | +; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup |
| 319 | +; ZVE32F-NEXT: ret |
269 | 320 | entry:
|
270 | 321 | br label %vector.body
|
271 | 322 |
|
@@ -554,24 +605,51 @@ declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32 immar
|
554 | 605 | ; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
|
555 | 606 | define void @gather_of_pointers(i32** noalias nocapture %0, i32** noalias nocapture readonly %1) {
|
556 | 607 | ;
|
557 |
| -; CHECK-LABEL: gather_of_pointers: |
558 |
| -; CHECK: # %bb.0: |
559 |
| -; CHECK-NEXT: li a2, 1024 |
560 |
| -; CHECK-NEXT: li a3, 40 |
561 |
| -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
562 |
| -; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 |
563 |
| -; CHECK-NEXT: vlse64.v v8, (a1), a3 |
564 |
| -; CHECK-NEXT: addi a4, a1, 80 |
565 |
| -; CHECK-NEXT: vlse64.v v9, (a4), a3 |
566 |
| -; CHECK-NEXT: vse64.v v8, (a0) |
567 |
| -; CHECK-NEXT: addi a4, a0, 16 |
568 |
| -; CHECK-NEXT: vse64.v v9, (a4) |
569 |
| -; CHECK-NEXT: addi a2, a2, -4 |
570 |
| -; CHECK-NEXT: addi a0, a0, 32 |
571 |
| -; CHECK-NEXT: addi a1, a1, 160 |
572 |
| -; CHECK-NEXT: bnez a2, .LBB10_1 |
573 |
| -; CHECK-NEXT: # %bb.2: |
574 |
| -; CHECK-NEXT: ret |
| 608 | +; V-LABEL: gather_of_pointers: |
| 609 | +; V: # %bb.0: |
| 610 | +; V-NEXT: li a2, 1024 |
| 611 | +; V-NEXT: li a3, 40 |
| 612 | +; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
| 613 | +; V-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 |
| 614 | +; V-NEXT: vlse64.v v8, (a1), a3 |
| 615 | +; V-NEXT: addi a4, a1, 80 |
| 616 | +; V-NEXT: vlse64.v v9, (a4), a3 |
| 617 | +; V-NEXT: vse64.v v8, (a0) |
| 618 | +; V-NEXT: addi a4, a0, 16 |
| 619 | +; V-NEXT: vse64.v v9, (a4) |
| 620 | +; V-NEXT: addi a2, a2, -4 |
| 621 | +; V-NEXT: addi a0, a0, 32 |
| 622 | +; V-NEXT: addi a1, a1, 160 |
| 623 | +; V-NEXT: bnez a2, .LBB10_1 |
| 624 | +; V-NEXT: # %bb.2: |
| 625 | +; V-NEXT: ret |
| 626 | +; |
| 627 | +; ZVE32F-LABEL: gather_of_pointers: |
| 628 | +; ZVE32F: # %bb.0: |
| 629 | +; ZVE32F-NEXT: li a2, 0 |
| 630 | +; ZVE32F-NEXT: li a3, 1 |
| 631 | +; ZVE32F-NEXT: li a4, 1024 |
| 632 | +; ZVE32F-NEXT: li a5, 40 |
| 633 | +; ZVE32F-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 |
| 634 | +; ZVE32F-NEXT: mul a6, a3, a5 |
| 635 | +; ZVE32F-NEXT: add a6, a1, a6 |
| 636 | +; ZVE32F-NEXT: mul a7, a2, a5 |
| 637 | +; ZVE32F-NEXT: add a7, a1, a7 |
| 638 | +; ZVE32F-NEXT: ld t0, 0(a6) |
| 639 | +; ZVE32F-NEXT: ld t1, 0(a7) |
| 640 | +; ZVE32F-NEXT: ld a6, 80(a6) |
| 641 | +; ZVE32F-NEXT: ld a7, 80(a7) |
| 642 | +; ZVE32F-NEXT: sd t0, 8(a0) |
| 643 | +; ZVE32F-NEXT: sd t1, 0(a0) |
| 644 | +; ZVE32F-NEXT: sd a6, 24(a0) |
| 645 | +; ZVE32F-NEXT: sd a7, 16(a0) |
| 646 | +; ZVE32F-NEXT: addi a2, a2, 4 |
| 647 | +; ZVE32F-NEXT: addi a3, a3, 4 |
| 648 | +; ZVE32F-NEXT: addi a4, a4, -4 |
| 649 | +; ZVE32F-NEXT: addi a0, a0, 32 |
| 650 | +; ZVE32F-NEXT: bnez a4, .LBB10_1 |
| 651 | +; ZVE32F-NEXT: # %bb.2: |
| 652 | +; ZVE32F-NEXT: ret |
575 | 653 | br label %3
|
576 | 654 |
|
577 | 655 | 3: ; preds = %3, %2
|
@@ -604,24 +682,51 @@ declare <2 x i32*> @llvm.masked.gather.v2p0i32.v2p0p0i32(<2 x i32**>, i32 immarg
|
604 | 682 | ; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
|
605 | 683 | define void @scatter_of_pointers(i32** noalias nocapture %0, i32** noalias nocapture readonly %1) {
|
606 | 684 | ;
|
607 |
| -; CHECK-LABEL: scatter_of_pointers: |
608 |
| -; CHECK: # %bb.0: |
609 |
| -; CHECK-NEXT: li a2, 1024 |
610 |
| -; CHECK-NEXT: li a3, 40 |
611 |
| -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
612 |
| -; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 |
613 |
| -; CHECK-NEXT: vle64.v v8, (a1) |
614 |
| -; CHECK-NEXT: addi a4, a1, 16 |
615 |
| -; CHECK-NEXT: vle64.v v9, (a4) |
616 |
| -; CHECK-NEXT: addi a4, a0, 80 |
617 |
| -; CHECK-NEXT: vsse64.v v8, (a0), a3 |
618 |
| -; CHECK-NEXT: vsse64.v v9, (a4), a3 |
619 |
| -; CHECK-NEXT: addi a2, a2, -4 |
620 |
| -; CHECK-NEXT: addi a1, a1, 32 |
621 |
| -; CHECK-NEXT: addi a0, a0, 160 |
622 |
| -; CHECK-NEXT: bnez a2, .LBB11_1 |
623 |
| -; CHECK-NEXT: # %bb.2: |
624 |
| -; CHECK-NEXT: ret |
| 685 | +; V-LABEL: scatter_of_pointers: |
| 686 | +; V: # %bb.0: |
| 687 | +; V-NEXT: li a2, 1024 |
| 688 | +; V-NEXT: li a3, 40 |
| 689 | +; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
| 690 | +; V-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 |
| 691 | +; V-NEXT: vle64.v v8, (a1) |
| 692 | +; V-NEXT: addi a4, a1, 16 |
| 693 | +; V-NEXT: vle64.v v9, (a4) |
| 694 | +; V-NEXT: addi a4, a0, 80 |
| 695 | +; V-NEXT: vsse64.v v8, (a0), a3 |
| 696 | +; V-NEXT: vsse64.v v9, (a4), a3 |
| 697 | +; V-NEXT: addi a2, a2, -4 |
| 698 | +; V-NEXT: addi a1, a1, 32 |
| 699 | +; V-NEXT: addi a0, a0, 160 |
| 700 | +; V-NEXT: bnez a2, .LBB11_1 |
| 701 | +; V-NEXT: # %bb.2: |
| 702 | +; V-NEXT: ret |
| 703 | +; |
| 704 | +; ZVE32F-LABEL: scatter_of_pointers: |
| 705 | +; ZVE32F: # %bb.0: |
| 706 | +; ZVE32F-NEXT: li a2, 0 |
| 707 | +; ZVE32F-NEXT: li a3, 1 |
| 708 | +; ZVE32F-NEXT: li a4, 1024 |
| 709 | +; ZVE32F-NEXT: li a5, 40 |
| 710 | +; ZVE32F-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 |
| 711 | +; ZVE32F-NEXT: ld a6, 8(a1) |
| 712 | +; ZVE32F-NEXT: ld a7, 0(a1) |
| 713 | +; ZVE32F-NEXT: ld t0, 24(a1) |
| 714 | +; ZVE32F-NEXT: ld t1, 16(a1) |
| 715 | +; ZVE32F-NEXT: mul t2, a3, a5 |
| 716 | +; ZVE32F-NEXT: add t2, a0, t2 |
| 717 | +; ZVE32F-NEXT: mul t3, a2, a5 |
| 718 | +; ZVE32F-NEXT: add t3, a0, t3 |
| 719 | +; ZVE32F-NEXT: sd a7, 0(t3) |
| 720 | +; ZVE32F-NEXT: sd a6, 0(t2) |
| 721 | +; ZVE32F-NEXT: sd t1, 80(t3) |
| 722 | +; ZVE32F-NEXT: sd t0, 80(t2) |
| 723 | +; ZVE32F-NEXT: addi a2, a2, 4 |
| 724 | +; ZVE32F-NEXT: addi a3, a3, 4 |
| 725 | +; ZVE32F-NEXT: addi a4, a4, -4 |
| 726 | +; ZVE32F-NEXT: addi a1, a1, 32 |
| 727 | +; ZVE32F-NEXT: bnez a4, .LBB11_1 |
| 728 | +; ZVE32F-NEXT: # %bb.2: |
| 729 | +; ZVE32F-NEXT: ret |
625 | 730 | br label %3
|
626 | 731 |
|
627 | 732 | 3: ; preds = %3, %2
|
|
0 commit comments