Skip to content

[RISCV] Lower fixed-length vp_{gather,scatter} for zvfhmin/zvfbfmin #115253

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

lukel97
Copy link
Contributor

@lukel97 lukel97 commented Nov 7, 2024

This uses the same lowering as masked gathers and scatters.

This uses the same lowering as masked gathers and scatters.
@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Changes

This uses the same lowering as masked gathers and scatters.


Patch is 28.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115253.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-5)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (+219-46)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll (+92-18)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a625e9d5efeb55..4abad26316bcd4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1339,12 +1339,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                             ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
                            VT, Custom);
 
-        // FIXME: vp_gather/scatter can be hoisted to here.
         setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
                             ISD::MGATHER, ISD::MSCATTER},
                            VT, Custom);
-        setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
-                            ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
+        setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
+                            ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
                             ISD::EXPERIMENTAL_VP_STRIDED_STORE},
                            VT, Custom);
 
@@ -1409,8 +1408,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
                            Custom);
 
-        setOperationAction({ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom);
-
         setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
                             ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
                             ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index f66974e5114067..88ae643ca742e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -1,7 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr>, <2 x i1>, i32)
@@ -1152,6 +1156,175 @@ define <8 x i64> @vpgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m
   ret <8 x i64> %v
 }
 
+declare <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr>, <2 x i1>, i32)
+
+define <2 x bfloat> @vpgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_v2bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_v2bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
+; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
+  ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr>, <4 x i1>, i32)
+
+define <4 x bfloat> @vpgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_v4bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_v4bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
+; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
+  ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vpgather_truemask_v4bf16(<4 x ptr> %ptrs, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_truemask_v4bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV32-NEXT:    vluxei32.v v9, (zero), v8
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_truemask_v4bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; RV64-NEXT:    vluxei64.v v10, (zero), v8
+; RV64-NEXT:    vmv1r.v v8, v10
+; RV64-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
+  ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr>, <8 x i1>, i32)
+
+define <8 x bfloat> @vpgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_v8bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v10
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_v8bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
+; RV64-NEXT:    vmv.v.v v8, v12
+; RV64-NEXT:    ret
+  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vpgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v8i8_v8bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vsext.vf4 v10, v8
+; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v8i8_v8bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vsext.vf8 v12, v8
+; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
+  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vpgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vsext.vf4 v10, v8
+; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vsext.vf8 v12, v8
+; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <8 x i8> %idxs to <8 x i16>
+  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
+  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vpgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vwaddu.vv v9, v8, v8
+; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV32-NEXT:    vluxei16.v v8, (a0), v9, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vwaddu.vv v9, v8, v8
+; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV64-NEXT:    vluxei16.v v8, (a0), v9, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <8 x i8> %idxs to <8 x i16>
+  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
+  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vpgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v8bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT:    vwadd.vv v10, v8, v8
+; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v8bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vsext.vf4 v12, v8
+; RV64-NEXT:    vadd.vv v12, v12, v12
+; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
+  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
+  ret <8 x bfloat> %v
+}
+
 declare <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr>, <2 x i1>, i32)
 
 define <2 x half> @vpgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
@@ -1872,10 +2045,10 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a2, 16
 ; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    bltu a0, a2, .LBB86_2
+; RV32-NEXT:    bltu a0, a2, .LBB94_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
-; RV32-NEXT:  .LBB86_2:
+; RV32-NEXT:  .LBB94_2:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
 ; RV32-NEXT:    addi a1, a0, -16
@@ -1895,10 +2068,10 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB86_2
+; RV64-NEXT:    bltu a0, a2, .LBB94_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a1, 16
-; RV64-NEXT:  .LBB86_2:
+; RV64-NEXT:  .LBB94_2:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
 ; RV64-NEXT:    addi a1, a0, -16
@@ -1923,10 +2096,10 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs,
 ; RV32-NEXT:    li a3, 16
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB87_2
+; RV32-NEXT:    bltu a1, a3, .LBB95_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB87_2:
+; RV32-NEXT:  .LBB95_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -1952,10 +2125,10 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs,
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v24, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB87_2
+; RV64-NEXT:    bltu a1, a3, .LBB95_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB87_2:
+; RV64-NEXT:  .LBB95_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -1981,10 +2154,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i
 ; RV32-NEXT:    li a3, 16
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB88_2
+; RV32-NEXT:    bltu a1, a3, .LBB96_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB88_2:
+; RV32-NEXT:  .LBB96_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2011,10 +2184,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v24, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB88_2
+; RV64-NEXT:    bltu a1, a3, .LBB96_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB88_2:
+; RV64-NEXT:  .LBB96_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2041,10 +2214,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i
 ; RV32-NEXT:    li a3, 16
 ; RV32-NEXT:    vsll.vi v16, v12, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB89_2
+; RV32-NEXT:    bltu a1, a3, .LBB97_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB89_2:
+; RV32-NEXT:  .LBB97_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2067,10 +2240,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v16, v12, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB89_2
+; RV64-NEXT:    bltu a1, a3, .LBB97_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB89_2:
+; RV64-NEXT:  .LBB97_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2099,10 +2272,10 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs
 ; RV32-NEXT:    li a3, 16
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB90_2
+; RV32-NEXT:    bltu a1, a3, .LBB98_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB90_2:
+; RV32-NEXT:  .LBB98_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2128,10 +2301,10 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v24, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB90_2
+; RV64-NEXT:    bltu a1, a3, .LBB98_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB90_2:
+; RV64-NEXT:  .LBB98_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2157,10 +2330,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16>
 ; RV32-NEXT:    li a3, 16
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB91_2
+; RV32-NEXT:    bltu a1, a3, .LBB99_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB91_2:
+; RV32-NEXT:  .LBB99_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2187,10 +2360,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16>
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v24, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB91_2
+; RV64-NEXT:    bltu a1, a3, .LBB99_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB91_2:
+; RV64-NEXT:  .LBB99_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2217,10 +2390,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16>
 ; RV32-NEXT:    li a3, 16
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB92_2
+; RV32-NEXT:    bltu a1, a3, .LBB100_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB92_2:
+; RV32-NEXT:  .LBB100_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2243,10 +2416,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16>
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v16, v16, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB92_2
+; RV64-NEXT:    bltu a1, a3, .LBB100_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB92_2:
+; RV64-NEXT:  .LBB100_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2274,10 +2447,10 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB93_2
+; RV32-NEXT:    bltu a1, a3, .LBB101_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB93_2:
+; RV32-NEXT:  .LBB101_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2303,10 +2476,10 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v24, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB93_2
+; RV64-NEXT:    bltu a1, a3, .LBB101_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB93_2:
+; RV64-NEXT:  .LBB101_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2331,10 +2504,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32>
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB94_2
+; RV32-NEXT:    bltu a1, a3, .LBB102_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB94_2:
+; RV32-NEXT:  .LBB102_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2361,10 +2534,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32>
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v24, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB94_2
+; RV64-NEXT:    bltu a1, a3, .LBB102_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB94_2:
+; RV64-NEXT:  .LBB102_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2390,10 +2563,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32>
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vsll.vi v16, v8, 3
 ; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bltu a1, a3, .LBB95_2
+; RV32-NEXT:    bltu a1, a3, .LBB103_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB95_2:
+; RV32-NEXT:  .LBB103_2:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    addi a2, a1, -16
@@ -2420,10 +2593,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32>
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v24, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB95_2
+; RV64-NEXT:    bltu a1, a3, .LBB103_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB95_2:
+; RV64-NEXT:  .LBB103_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
 ; RV64-NEXT:    addi a2, a1, -16
@@ -2463,10 +2636,10 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    bltu a1, a2, .LBB96_2
+; RV32-NEXT:    bltu a1, a2, .LBB104_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
-; RV32-NEXT:  .LBB96_2:
+; RV32-NEXT:  .LBB104_2:
 ; RV32-NEXT:    vmv1r.v v0, v7
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
 ; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
@@ -2479,10 +2652,10 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x
 ; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:    vsll.vi v8, v8, 3
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:    bltu a1, a3, .LBB96_2
+; RV64-NEXT:    bltu a1, a3,...
[truncated]

Copy link
Collaborator

@preames preames left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@lukel97 lukel97 merged commit 481ff22 into llvm:main Nov 7, 2024
8 of 9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants