|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 | 2 | ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvl256b | FileCheck %s --check-prefixes=CHECK,V
|
3 | 3 | ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zve32f,+zvl256b | FileCheck %s --check-prefixes=CHECK,ZVE32F
|
4 |
| -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+optimized-zero-stride-load,+zvl256b | FileCheck %s --check-prefixes=CHECK,OPTIMIZED |
5 |
| -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zve32f,+optimized-zero-stride-load,+zvl256b | FileCheck %s --check-prefixes=CHECK,OPTIMIZED |
| 4 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+optimized-zero-stride-load,+zvl256b | FileCheck %s --check-prefixes=CHECK,OPTIMIZED,OPTZVE32F |
| 5 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zve32f,+optimized-zero-stride-load,+zvl256b | FileCheck %s --check-prefixes=CHECK,OPTIMIZED,OPTV |
6 | 6 |
|
7 | 7 | %struct.foo = type { i32, i32, i32, i32 }
|
8 | 8 |
|
@@ -677,6 +677,54 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
|
677 | 677 | ; ZVE32F-NEXT: bne a0, a4, .LBB12_1
|
678 | 678 | ; ZVE32F-NEXT: # %bb.2: # %bb18
|
679 | 679 | ; ZVE32F-NEXT: ret
|
| 680 | +; |
| 681 | +; OPTZVE32F-LABEL: gather_of_pointers: |
| 682 | +; OPTZVE32F: # %bb.0: # %bb |
| 683 | +; OPTZVE32F-NEXT: lui a2, 2 |
| 684 | +; OPTZVE32F-NEXT: add a2, a0, a2 |
| 685 | +; OPTZVE32F-NEXT: li a3, 40 |
| 686 | +; OPTZVE32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| 687 | +; OPTZVE32F-NEXT: .LBB12_1: # %bb2 |
| 688 | +; OPTZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 |
| 689 | +; OPTZVE32F-NEXT: vlse64.v v8, (a1), a3 |
| 690 | +; OPTZVE32F-NEXT: addi a4, a1, 80 |
| 691 | +; OPTZVE32F-NEXT: vlse64.v v9, (a4), a3 |
| 692 | +; OPTZVE32F-NEXT: addi a4, a0, 16 |
| 693 | +; OPTZVE32F-NEXT: vse64.v v8, (a0) |
| 694 | +; OPTZVE32F-NEXT: addi a0, a0, 32 |
| 695 | +; OPTZVE32F-NEXT: vse64.v v9, (a4) |
| 696 | +; OPTZVE32F-NEXT: addi a1, a1, 160 |
| 697 | +; OPTZVE32F-NEXT: bne a0, a2, .LBB12_1 |
| 698 | +; OPTZVE32F-NEXT: # %bb.2: # %bb18 |
| 699 | +; OPTZVE32F-NEXT: ret |
| 700 | +; |
| 701 | +; OPTV-LABEL: gather_of_pointers: |
| 702 | +; OPTV: # %bb.0: # %bb |
| 703 | +; OPTV-NEXT: li a2, 0 |
| 704 | +; OPTV-NEXT: lui a4, 2 |
| 705 | +; OPTV-NEXT: li a3, 1 |
| 706 | +; OPTV-NEXT: add a4, a0, a4 |
| 707 | +; OPTV-NEXT: li a5, 40 |
| 708 | +; OPTV-NEXT: .LBB12_1: # %bb2 |
| 709 | +; OPTV-NEXT: # =>This Inner Loop Header: Depth=1 |
| 710 | +; OPTV-NEXT: mul a6, a3, a5 |
| 711 | +; OPTV-NEXT: mul a7, a2, a5 |
| 712 | +; OPTV-NEXT: addi a2, a2, 4 |
| 713 | +; OPTV-NEXT: add a6, a1, a6 |
| 714 | +; OPTV-NEXT: add a7, a1, a7 |
| 715 | +; OPTV-NEXT: ld t0, 0(a7) |
| 716 | +; OPTV-NEXT: ld t1, 0(a6) |
| 717 | +; OPTV-NEXT: ld a7, 80(a7) |
| 718 | +; OPTV-NEXT: ld a6, 80(a6) |
| 719 | +; OPTV-NEXT: sd t0, 0(a0) |
| 720 | +; OPTV-NEXT: sd t1, 8(a0) |
| 721 | +; OPTV-NEXT: sd a7, 16(a0) |
| 722 | +; OPTV-NEXT: sd a6, 24(a0) |
| 723 | +; OPTV-NEXT: addi a0, a0, 32 |
| 724 | +; OPTV-NEXT: addi a3, a3, 4 |
| 725 | +; OPTV-NEXT: bne a0, a4, .LBB12_1 |
| 726 | +; OPTV-NEXT: # %bb.2: # %bb18 |
| 727 | +; OPTV-NEXT: ret |
680 | 728 | bb:
|
681 | 729 | br label %bb2
|
682 | 730 |
|
@@ -754,6 +802,54 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
|
754 | 802 | ; ZVE32F-NEXT: bne a1, a4, .LBB13_1
|
755 | 803 | ; ZVE32F-NEXT: # %bb.2: # %bb18
|
756 | 804 | ; ZVE32F-NEXT: ret
|
| 805 | +; |
| 806 | +; OPTZVE32F-LABEL: scatter_of_pointers: |
| 807 | +; OPTZVE32F: # %bb.0: # %bb |
| 808 | +; OPTZVE32F-NEXT: lui a2, 2 |
| 809 | +; OPTZVE32F-NEXT: add a2, a1, a2 |
| 810 | +; OPTZVE32F-NEXT: li a3, 40 |
| 811 | +; OPTZVE32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| 812 | +; OPTZVE32F-NEXT: .LBB13_1: # %bb2 |
| 813 | +; OPTZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 |
| 814 | +; OPTZVE32F-NEXT: addi a4, a1, 16 |
| 815 | +; OPTZVE32F-NEXT: vle64.v v8, (a1) |
| 816 | +; OPTZVE32F-NEXT: vle64.v v9, (a4) |
| 817 | +; OPTZVE32F-NEXT: addi a4, a0, 80 |
| 818 | +; OPTZVE32F-NEXT: addi a1, a1, 32 |
| 819 | +; OPTZVE32F-NEXT: vsse64.v v8, (a0), a3 |
| 820 | +; OPTZVE32F-NEXT: vsse64.v v9, (a4), a3 |
| 821 | +; OPTZVE32F-NEXT: addi a0, a0, 160 |
| 822 | +; OPTZVE32F-NEXT: bne a1, a2, .LBB13_1 |
| 823 | +; OPTZVE32F-NEXT: # %bb.2: # %bb18 |
| 824 | +; OPTZVE32F-NEXT: ret |
| 825 | +; |
| 826 | +; OPTV-LABEL: scatter_of_pointers: |
| 827 | +; OPTV: # %bb.0: # %bb |
| 828 | +; OPTV-NEXT: li a2, 0 |
| 829 | +; OPTV-NEXT: lui a4, 2 |
| 830 | +; OPTV-NEXT: li a3, 1 |
| 831 | +; OPTV-NEXT: add a4, a1, a4 |
| 832 | +; OPTV-NEXT: li a5, 40 |
| 833 | +; OPTV-NEXT: .LBB13_1: # %bb2 |
| 834 | +; OPTV-NEXT: # =>This Inner Loop Header: Depth=1 |
| 835 | +; OPTV-NEXT: ld a6, 0(a1) |
| 836 | +; OPTV-NEXT: ld a7, 8(a1) |
| 837 | +; OPTV-NEXT: ld t0, 16(a1) |
| 838 | +; OPTV-NEXT: ld t1, 24(a1) |
| 839 | +; OPTV-NEXT: mul t2, a3, a5 |
| 840 | +; OPTV-NEXT: mul t3, a2, a5 |
| 841 | +; OPTV-NEXT: addi a2, a2, 4 |
| 842 | +; OPTV-NEXT: addi a1, a1, 32 |
| 843 | +; OPTV-NEXT: add t2, a0, t2 |
| 844 | +; OPTV-NEXT: add t3, a0, t3 |
| 845 | +; OPTV-NEXT: sd a6, 0(t3) |
| 846 | +; OPTV-NEXT: sd a7, 0(t2) |
| 847 | +; OPTV-NEXT: sd t0, 80(t3) |
| 848 | +; OPTV-NEXT: sd t1, 80(t2) |
| 849 | +; OPTV-NEXT: addi a3, a3, 4 |
| 850 | +; OPTV-NEXT: bne a1, a4, .LBB13_1 |
| 851 | +; OPTV-NEXT: # %bb.2: # %bb18 |
| 852 | +; OPTV-NEXT: ret |
757 | 853 | bb:
|
758 | 854 | br label %bb2
|
759 | 855 |
|
|
0 commit comments