Skip to content

[RISCV] Rework subvec handling in insert_vector_elt lowering #75513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7765,10 +7765,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,

// If we know the index we're going to insert at, we can shrink Vec so that
// we're performing the scalar inserts and slideup on a smaller LMUL.
MVT OrigContainerVT = ContainerVT;
SDValue OrigVec = Vec;
SDValue AlignedIdx;
if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
MVT OrigContainerVT = ContainerVT;
SDValue OrigVec = Vec;
SDValue AlignedIdx;
const unsigned OrigIdx = IdxC->getZExtValue();
// Do we know an upper bound on LMUL?
if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
Expand All @@ -7795,9 +7795,17 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
ContainerVT = M1VT;
}

if (AlignedIdx)
if (AlignedIdx) {
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
AlignedIdx);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Vec,
Val, Idx);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
Vec, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
}
}

MVT XLenVT = Subtarget.getXLenVT();
Expand Down Expand Up @@ -7826,10 +7834,6 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
if (!VecVT.isFloatingPoint())
Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);

if (AlignedIdx)
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
Vec, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
Expand Down Expand Up @@ -7861,11 +7865,6 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);

if (AlignedIdx)
ValInVec =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
ValInVec, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return ValInVec;
return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
Expand Down Expand Up @@ -7897,9 +7896,6 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
Idx, Mask, InsertVL, Policy);

if (AlignedIdx)
Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
Slideup, AlignedIdx);
if (!VecVT.isFixedLengthVector())
return Slideup;
return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
Expand Down
16 changes: 7 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ define <4 x i32> @insertelt_v4i32_idx(<4 x i32> %a, i32 %y, i32 zeroext %idx) {
define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v32i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <32 x i32> %a, i32 %y, i32 0
Expand Down Expand Up @@ -89,8 +88,7 @@ define <32 x i32> @insertelt_v32i32_idx(<32 x i32> %a, i32 %y, i32 zeroext %idx)
define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v64i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <64 x i32> %a, i32 %y, i32 0
Expand Down Expand Up @@ -388,7 +386,7 @@ define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) {
; CHECK-LABEL: insertelt_v8i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -1
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <8 x i64> %a, i64 -1, i32 0
Expand Down Expand Up @@ -466,7 +464,7 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) {
; CHECK-LABEL: insertelt_c6_v8i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 6
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <8 x i64> %a, i64 6, i32 0
Expand Down Expand Up @@ -569,7 +567,7 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
define <16 x i32> @insertelt_c0_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
; CHECK-LABEL: insertelt_c0_v16xi32_exact:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma
; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%v = insertelement <16 x i32> %vin, i32 %a, i32 0
Expand Down Expand Up @@ -612,7 +610,7 @@ define <16 x i32> @insertelt_c3_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_ra
define <16 x i32> @insertelt_c12_v16xi32_exact(<16 x i32> %vin, i32 %a) vscale_range(2,2) {
; CHECK-LABEL: insertelt_c12_v16xi32_exact:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma
; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v11, a0
; CHECK-NEXT: ret
%v = insertelement <16 x i32> %vin, i32 %a, i32 12
Expand Down Expand Up @@ -662,7 +660,7 @@ define <8 x i64> @insertelt_c4_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range
;
; RV64-LABEL: insertelt_c4_v8xi64_exact:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; RV64-NEXT: vsetvli a1, zero, e64, m1, tu, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: ret
%v = insertelement <8 x i64> %vin, i64 %a, i32 4
Expand Down
68 changes: 33 additions & 35 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2445,7 +2445,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a3, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
Expand Down Expand Up @@ -2539,7 +2539,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a3, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB35_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -2624,8 +2624,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
; RV64ZVE32F-NEXT: j .LBB35_9
Expand Down Expand Up @@ -2689,7 +2689,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a3, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB36_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -2774,8 +2774,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB36_8
; RV64ZVE32F-NEXT: j .LBB36_9
Expand Down Expand Up @@ -2842,7 +2842,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a3, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB37_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -3001,7 +3001,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a3, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB38_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -3086,8 +3086,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB38_8
; RV64ZVE32F-NEXT: j .LBB38_9
Expand Down Expand Up @@ -3152,7 +3152,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a3, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB39_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -3237,8 +3237,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB39_8
; RV64ZVE32F-NEXT: j .LBB39_9
Expand Down Expand Up @@ -3306,7 +3306,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a4, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a3
; RV64ZVE32F-NEXT: .LBB40_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
Expand Down Expand Up @@ -3459,7 +3459,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB41_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
Expand Down Expand Up @@ -8267,7 +8267,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
Expand Down Expand Up @@ -8361,7 +8361,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB74_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -8446,8 +8446,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
; RV64ZVE32F-NEXT: j .LBB74_9
Expand Down Expand Up @@ -8511,7 +8511,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB75_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -8596,8 +8596,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB75_8
; RV64ZVE32F-NEXT: j .LBB75_9
Expand Down Expand Up @@ -8664,7 +8664,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB76_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -8823,7 +8823,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB77_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -8908,8 +8908,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB77_8
; RV64ZVE32F-NEXT: j .LBB77_9
Expand Down Expand Up @@ -8974,7 +8974,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB78_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down Expand Up @@ -9059,8 +9059,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
; RV64ZVE32F-NEXT: j .LBB78_9
Expand Down Expand Up @@ -9128,7 +9128,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a3, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB79_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
Expand Down Expand Up @@ -9281,7 +9281,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
Expand Down Expand Up @@ -12416,12 +12416,10 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB98_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB98_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
Expand Down