Skip to content

[RISCV] Use a tail agnostic vslideup if possible for scalable insert_subvector #83146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9729,8 +9729,15 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,

auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);

ElementCount EndIndex =
ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
VL = computeVLMax(SubVecVT, DL, DAG);

// Use tail agnostic policy if we're inserting over Vec's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
if (EndIndex == VecVT.getVectorElementCount())
Policy = RISCVII::TAIL_AGNOSTIC;

// If we're inserting into the lowest elements, use a tail undisturbed
// vmv.v.v.
if (RemIdx == 0) {
Expand All @@ -9744,7 +9751,7 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);

SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
SlideupAmt, Mask, VL);
SlideupAmt, Mask, VL, Policy);
}

// If required, insert this subvector back into the correct vector register.
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale
; CHECK-NEXT: slli a1, a0, 1
; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2235,9 +2235,9 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFH-NEXT: vmfeq.vv v16, v8, v24, v0.t
; ZVFH-NEXT: add a0, a1, a1
; ZVFH-NEXT: vsetvli zero, a0, e8, m1, tu, ma
; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; ZVFH-NEXT: vslideup.vx v16, v1, a1
; ZVFH-NEXT: vmv1r.v v0, v16
; ZVFH-NEXT: vmv.v.v v0, v16
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 4
; ZVFH-NEXT: add sp, sp, a0
Expand Down Expand Up @@ -2337,7 +2337,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a2, a5
; ZVFHMIN-NEXT: .LBB85_4:
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v2, v26, a3
; ZVFHMIN-NEXT: sub a5, a2, a4
; ZVFHMIN-NEXT: sltu a6, a2, a5
Expand Down Expand Up @@ -2395,12 +2395,12 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v0, v1
; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v8, v3, a3
; ZVFHMIN-NEXT: add a0, a1, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, tu, ma
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v8, v2, a1
; ZVFHMIN-NEXT: vmv1r.v v0, v8
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 34
; ZVFHMIN-NEXT: mul a0, a0, a1
Expand Down Expand Up @@ -3637,7 +3637,7 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc
; CHECK-NEXT: slli a0, a1, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v17, v16, a0
; CHECK-NEXT: vmv1r.v v0, v17
; CHECK-NEXT: csrr a0, vlenb
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3387,7 +3387,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: srli a0, a0, 3
; RV32-NEXT: add a1, a0, a0
; RV32-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; RV32-NEXT: vslideup.vx v0, v24, a0
; RV32-NEXT: ret
;
Expand All @@ -3400,7 +3400,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: srli a0, a0, 3
; RV64-NEXT: add a1, a0, a0
; RV64-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; RV64-NEXT: vslideup.vx v0, v24, a0
; RV64-NEXT: ret
;
Expand All @@ -3413,7 +3413,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN32-NEXT: csrr a0, vlenb
; ZVFHMIN32-NEXT: srli a0, a0, 3
; ZVFHMIN32-NEXT: add a1, a0, a0
; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN32-NEXT: ret
;
Expand All @@ -3426,7 +3426,7 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) {
; ZVFHMIN64-NEXT: csrr a0, vlenb
; ZVFHMIN64-NEXT: srli a0, a0, 3
; ZVFHMIN64-NEXT: add a1, a0, a0
; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0
; ZVFHMIN64-NEXT: ret
%vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2424,7 +2424,7 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t
; CHECK-NEXT: add a0, a1, a1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v1, a1
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: csrr a0, vlenb
Expand Down Expand Up @@ -2459,7 +2459,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b,
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-NEXT: add a0, a2, a2
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
Expand Down Expand Up @@ -2492,7 +2492,7 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t
; CHECK-NEXT: add a0, a2, a2
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v16, v25, a2
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: ret
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3235,7 +3235,7 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) {
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-NEXT: vmseq.vi v24, v16, 0
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vx v0, v24, a0
; CHECK-NEXT: ret
%vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: add a2, a1, a1
; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma
; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v9, v8, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vsm.v v9, (a0)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vx v0, v8, a0
; CHECK-NEXT: ret
;
Expand All @@ -44,7 +44,7 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: add a1, a0, a0
; ZVBB-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; ZVBB-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; ZVBB-NEXT: vslideup.vx v0, v8, a0
; ZVBB-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
Expand Down Expand Up @@ -376,9 +376,9 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v10, a0
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v10, v8, a0
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: vmv.v.v v8, v10
Comment on lines +379 to +381
Copy link
Contributor Author

@lukel97 lukel97 Feb 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why using the ta policy causes us to lose the vmv1r.v here, I don't think it's directly related to this patch.

Also a1 is VLMAX, perhaps RISCVInsertVSETVLI could detect this and emit the vsetvli a0, zero form

; CHECK-NEXT: ret
;
; ZVBB-LABEL: vector_interleave_nxv4f16_nxv2f16:
Expand All @@ -391,9 +391,9 @@ define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVBB-NEXT: vslidedown.vx v8, v10, a0
; ZVBB-NEXT: add a1, a0, a0
; ZVBB-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; ZVBB-NEXT: vslideup.vx v10, v8, a0
; ZVBB-NEXT: vmv1r.v v8, v10
; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
%res = call <vscale x 4 x half> @llvm.experimental.vector.interleave2.nxv4f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 4 x half> %res
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v24
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1>
Expand Down Expand Up @@ -967,7 +967,7 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v24
; ZVFHMIN-NEXT: vand.vi v8, v8, 1
; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0
; ZVFHMIN-NEXT: ret
%evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
; CHECK-NEXT: lui a2, 1048568
; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v9, a2
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
Expand Down Expand Up @@ -982,7 +982,7 @@ define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
; CHECK-NEXT: lui a2, 1048568
; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v9, a2
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
Expand Down