|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK |
3 |
| -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK |
| 2 | +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 |
| 3 | +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 |
4 | 4 |
|
5 | 5 | define <2 x i16> @vwmul_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
|
6 | 6 | ; CHECK-LABEL: vwmul_v2i16:
|
@@ -649,3 +649,239 @@ define <16 x i64> @vwmul_vx_v16i64(<16 x i32>* %x, i32 %y) {
|
649 | 649 | ret <16 x i64> %f
|
650 | 650 | }
|
651 | 651 |
|
| 652 | +define <8 x i16> @vwmul_vx_v8i16_i8(<8 x i8>* %x, i8* %y) { |
| 653 | +; CHECK-LABEL: vwmul_vx_v8i16_i8: |
| 654 | +; CHECK: # %bb.0: |
| 655 | +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| 656 | +; CHECK-NEXT: vle8.v v25, (a0) |
| 657 | +; CHECK-NEXT: lb a0, 0(a1) |
| 658 | +; CHECK-NEXT: vwmul.vx v8, v25, a0 |
| 659 | +; CHECK-NEXT: ret |
| 660 | + %a = load <8 x i8>, <8 x i8>* %x |
| 661 | + %b = load i8, i8* %y |
| 662 | + %c = sext i8 %b to i16 |
| 663 | + %d = insertelement <8 x i16> undef, i16 %c, i32 0 |
| 664 | + %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer |
| 665 | + %f = sext <8 x i8> %a to <8 x i16> |
| 666 | + %g = mul <8 x i16> %e, %f |
| 667 | + ret <8 x i16> %g |
| 668 | +} |
| 669 | + |
| 670 | +define <8 x i16> @vwmul_vx_v8i16_i16(<8 x i8>* %x, i16* %y) { |
| 671 | +; CHECK-LABEL: vwmul_vx_v8i16_i16: |
| 672 | +; CHECK: # %bb.0: |
| 673 | +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| 674 | +; CHECK-NEXT: vle8.v v25, (a0) |
| 675 | +; CHECK-NEXT: lh a0, 0(a1) |
| 676 | +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| 677 | +; CHECK-NEXT: vsext.vf2 v26, v25 |
| 678 | +; CHECK-NEXT: vmul.vx v8, v26, a0 |
| 679 | +; CHECK-NEXT: ret |
| 680 | + %a = load <8 x i8>, <8 x i8>* %x |
| 681 | + %b = load i16, i16* %y |
| 682 | + %d = insertelement <8 x i16> undef, i16 %b, i32 0 |
| 683 | + %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer |
| 684 | + %f = sext <8 x i8> %a to <8 x i16> |
| 685 | + %g = mul <8 x i16> %e, %f |
| 686 | + ret <8 x i16> %g |
| 687 | +} |
| 688 | + |
| 689 | +define <4 x i32> @vwmul_vx_v4i32_i8(<4 x i16>* %x, i8* %y) { |
| 690 | +; CHECK-LABEL: vwmul_vx_v4i32_i8: |
| 691 | +; CHECK: # %bb.0: |
| 692 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| 693 | +; CHECK-NEXT: vle16.v v25, (a0) |
| 694 | +; CHECK-NEXT: lb a0, 0(a1) |
| 695 | +; CHECK-NEXT: vwmul.vx v8, v25, a0 |
| 696 | +; CHECK-NEXT: ret |
| 697 | + %a = load <4 x i16>, <4 x i16>* %x |
| 698 | + %b = load i8, i8* %y |
| 699 | + %c = sext i8 %b to i32 |
| 700 | + %d = insertelement <4 x i32> undef, i32 %c, i32 0 |
| 701 | + %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer |
| 702 | + %f = sext <4 x i16> %a to <4 x i32> |
| 703 | + %g = mul <4 x i32> %e, %f |
| 704 | + ret <4 x i32> %g |
| 705 | +} |
| 706 | + |
| 707 | +define <4 x i32> @vwmul_vx_v4i32_i16(<4 x i16>* %x, i16* %y) { |
| 708 | +; CHECK-LABEL: vwmul_vx_v4i32_i16: |
| 709 | +; CHECK: # %bb.0: |
| 710 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| 711 | +; CHECK-NEXT: vle16.v v25, (a0) |
| 712 | +; CHECK-NEXT: lh a0, 0(a1) |
| 713 | +; CHECK-NEXT: vwmul.vx v8, v25, a0 |
| 714 | +; CHECK-NEXT: ret |
| 715 | + %a = load <4 x i16>, <4 x i16>* %x |
| 716 | + %b = load i16, i16* %y |
| 717 | + %c = sext i16 %b to i32 |
| 718 | + %d = insertelement <4 x i32> undef, i32 %c, i32 0 |
| 719 | + %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer |
| 720 | + %f = sext <4 x i16> %a to <4 x i32> |
| 721 | + %g = mul <4 x i32> %e, %f |
| 722 | + ret <4 x i32> %g |
| 723 | +} |
| 724 | + |
| 725 | +define <4 x i32> @vwmul_vx_v4i32_i32(<4 x i16>* %x, i32* %y) { |
| 726 | +; CHECK-LABEL: vwmul_vx_v4i32_i32: |
| 727 | +; CHECK: # %bb.0: |
| 728 | +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| 729 | +; CHECK-NEXT: vle16.v v25, (a0) |
| 730 | +; CHECK-NEXT: lw a0, 0(a1) |
| 731 | +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| 732 | +; CHECK-NEXT: vsext.vf2 v26, v25 |
| 733 | +; CHECK-NEXT: vmul.vx v8, v26, a0 |
| 734 | +; CHECK-NEXT: ret |
| 735 | + %a = load <4 x i16>, <4 x i16>* %x |
| 736 | + %b = load i32, i32* %y |
| 737 | + %d = insertelement <4 x i32> undef, i32 %b, i32 0 |
| 738 | + %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer |
| 739 | + %f = sext <4 x i16> %a to <4 x i32> |
| 740 | + %g = mul <4 x i32> %e, %f |
| 741 | + ret <4 x i32> %g |
| 742 | +} |
| 743 | + |
| 744 | +define <2 x i64> @vwmul_vx_v2i64_i8(<2 x i32>* %x, i8* %y) { |
| 745 | +; RV32-LABEL: vwmul_vx_v2i64_i8: |
| 746 | +; RV32: # %bb.0: |
| 747 | +; RV32-NEXT: addi sp, sp, -16 |
| 748 | +; RV32-NEXT: .cfi_def_cfa_offset 16 |
| 749 | +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 750 | +; RV32-NEXT: lb a1, 0(a1) |
| 751 | +; RV32-NEXT: vle32.v v25, (a0) |
| 752 | +; RV32-NEXT: srai a0, a1, 31 |
| 753 | +; RV32-NEXT: sw a1, 8(sp) |
| 754 | +; RV32-NEXT: sw a0, 12(sp) |
| 755 | +; RV32-NEXT: addi a0, sp, 8 |
| 756 | +; RV32-NEXT: vlse64.v v26, (a0), zero |
| 757 | +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 758 | +; RV32-NEXT: vsext.vf2 v27, v25 |
| 759 | +; RV32-NEXT: vmul.vv v8, v26, v27 |
| 760 | +; RV32-NEXT: addi sp, sp, 16 |
| 761 | +; RV32-NEXT: ret |
| 762 | +; |
| 763 | +; RV64-LABEL: vwmul_vx_v2i64_i8: |
| 764 | +; RV64: # %bb.0: |
| 765 | +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 766 | +; RV64-NEXT: vle32.v v25, (a0) |
| 767 | +; RV64-NEXT: lb a0, 0(a1) |
| 768 | +; RV64-NEXT: vwmul.vx v8, v25, a0 |
| 769 | +; RV64-NEXT: ret |
| 770 | + %a = load <2 x i32>, <2 x i32>* %x |
| 771 | + %b = load i8, i8* %y |
| 772 | + %c = sext i8 %b to i64 |
| 773 | + %d = insertelement <2 x i64> undef, i64 %c, i64 0 |
| 774 | + %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer |
| 775 | + %f = sext <2 x i32> %a to <2 x i64> |
| 776 | + %g = mul <2 x i64> %e, %f |
| 777 | + ret <2 x i64> %g |
| 778 | +} |
| 779 | + |
| 780 | +define <2 x i64> @vwmul_vx_v2i64_i16(<2 x i32>* %x, i16* %y) { |
| 781 | +; RV32-LABEL: vwmul_vx_v2i64_i16: |
| 782 | +; RV32: # %bb.0: |
| 783 | +; RV32-NEXT: addi sp, sp, -16 |
| 784 | +; RV32-NEXT: .cfi_def_cfa_offset 16 |
| 785 | +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 786 | +; RV32-NEXT: lh a1, 0(a1) |
| 787 | +; RV32-NEXT: vle32.v v25, (a0) |
| 788 | +; RV32-NEXT: srai a0, a1, 31 |
| 789 | +; RV32-NEXT: sw a1, 8(sp) |
| 790 | +; RV32-NEXT: sw a0, 12(sp) |
| 791 | +; RV32-NEXT: addi a0, sp, 8 |
| 792 | +; RV32-NEXT: vlse64.v v26, (a0), zero |
| 793 | +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 794 | +; RV32-NEXT: vsext.vf2 v27, v25 |
| 795 | +; RV32-NEXT: vmul.vv v8, v26, v27 |
| 796 | +; RV32-NEXT: addi sp, sp, 16 |
| 797 | +; RV32-NEXT: ret |
| 798 | +; |
| 799 | +; RV64-LABEL: vwmul_vx_v2i64_i16: |
| 800 | +; RV64: # %bb.0: |
| 801 | +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 802 | +; RV64-NEXT: vle32.v v25, (a0) |
| 803 | +; RV64-NEXT: lh a0, 0(a1) |
| 804 | +; RV64-NEXT: vwmul.vx v8, v25, a0 |
| 805 | +; RV64-NEXT: ret |
| 806 | + %a = load <2 x i32>, <2 x i32>* %x |
| 807 | + %b = load i16, i16* %y |
| 808 | + %c = sext i16 %b to i64 |
| 809 | + %d = insertelement <2 x i64> undef, i64 %c, i64 0 |
| 810 | + %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer |
| 811 | + %f = sext <2 x i32> %a to <2 x i64> |
| 812 | + %g = mul <2 x i64> %e, %f |
| 813 | + ret <2 x i64> %g |
| 814 | +} |
| 815 | + |
| 816 | +define <2 x i64> @vwmul_vx_v2i64_i32(<2 x i32>* %x, i32* %y) { |
| 817 | +; RV32-LABEL: vwmul_vx_v2i64_i32: |
| 818 | +; RV32: # %bb.0: |
| 819 | +; RV32-NEXT: addi sp, sp, -16 |
| 820 | +; RV32-NEXT: .cfi_def_cfa_offset 16 |
| 821 | +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 822 | +; RV32-NEXT: lw a1, 0(a1) |
| 823 | +; RV32-NEXT: vle32.v v25, (a0) |
| 824 | +; RV32-NEXT: srai a0, a1, 31 |
| 825 | +; RV32-NEXT: sw a1, 8(sp) |
| 826 | +; RV32-NEXT: sw a0, 12(sp) |
| 827 | +; RV32-NEXT: addi a0, sp, 8 |
| 828 | +; RV32-NEXT: vlse64.v v26, (a0), zero |
| 829 | +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 830 | +; RV32-NEXT: vsext.vf2 v27, v25 |
| 831 | +; RV32-NEXT: vmul.vv v8, v26, v27 |
| 832 | +; RV32-NEXT: addi sp, sp, 16 |
| 833 | +; RV32-NEXT: ret |
| 834 | +; |
| 835 | +; RV64-LABEL: vwmul_vx_v2i64_i32: |
| 836 | +; RV64: # %bb.0: |
| 837 | +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 838 | +; RV64-NEXT: vle32.v v25, (a0) |
| 839 | +; RV64-NEXT: lw a0, 0(a1) |
| 840 | +; RV64-NEXT: vwmul.vx v8, v25, a0 |
| 841 | +; RV64-NEXT: ret |
| 842 | + %a = load <2 x i32>, <2 x i32>* %x |
| 843 | + %b = load i32, i32* %y |
| 844 | + %c = sext i32 %b to i64 |
| 845 | + %d = insertelement <2 x i64> undef, i64 %c, i64 0 |
| 846 | + %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer |
| 847 | + %f = sext <2 x i32> %a to <2 x i64> |
| 848 | + %g = mul <2 x i64> %e, %f |
| 849 | + ret <2 x i64> %g |
| 850 | +} |
| 851 | + |
| 852 | +define <2 x i64> @vwmul_vx_v2i64_i64(<2 x i32>* %x, i64* %y) { |
| 853 | +; RV32-LABEL: vwmul_vx_v2i64_i64: |
| 854 | +; RV32: # %bb.0: |
| 855 | +; RV32-NEXT: addi sp, sp, -16 |
| 856 | +; RV32-NEXT: .cfi_def_cfa_offset 16 |
| 857 | +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 858 | +; RV32-NEXT: lw a2, 4(a1) |
| 859 | +; RV32-NEXT: lw a1, 0(a1) |
| 860 | +; RV32-NEXT: vle32.v v25, (a0) |
| 861 | +; RV32-NEXT: sw a2, 12(sp) |
| 862 | +; RV32-NEXT: sw a1, 8(sp) |
| 863 | +; RV32-NEXT: addi a0, sp, 8 |
| 864 | +; RV32-NEXT: vlse64.v v26, (a0), zero |
| 865 | +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 866 | +; RV32-NEXT: vsext.vf2 v27, v25 |
| 867 | +; RV32-NEXT: vmul.vv v8, v26, v27 |
| 868 | +; RV32-NEXT: addi sp, sp, 16 |
| 869 | +; RV32-NEXT: ret |
| 870 | +; |
| 871 | +; RV64-LABEL: vwmul_vx_v2i64_i64: |
| 872 | +; RV64: # %bb.0: |
| 873 | +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| 874 | +; RV64-NEXT: vle32.v v25, (a0) |
| 875 | +; RV64-NEXT: ld a0, 0(a1) |
| 876 | +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu |
| 877 | +; RV64-NEXT: vsext.vf2 v26, v25 |
| 878 | +; RV64-NEXT: vmul.vx v8, v26, a0 |
| 879 | +; RV64-NEXT: ret |
| 880 | + %a = load <2 x i32>, <2 x i32>* %x |
| 881 | + %b = load i64, i64* %y |
| 882 | + %d = insertelement <2 x i64> undef, i64 %b, i64 0 |
| 883 | + %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer |
| 884 | + %f = sext <2 x i32> %a to <2 x i64> |
| 885 | + %g = mul <2 x i64> %e, %f |
| 886 | + ret <2 x i64> %g |
| 887 | +} |
0 commit comments