Skip to content

[RISCV] Remove hasSideEffects=1 for saturating/fault-only-first instructions #90049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions llvm/include/llvm/IR/IntrinsicsRISCV.td
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
llvm_anyint_ty],
[IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
[IntrNoMem]>, RISCVVIntrinsic {
let ScalarOperand = 2;
let VLOperand = 3;
}
Expand All @@ -684,7 +684,7 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
LLVMMatchType<2>],
[ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
[ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
let ScalarOperand = 2;
let VLOperand = 4;
}
Expand All @@ -708,7 +708,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
llvm_anyint_ty, LLVMMatchType<2>],
[ImmArg<ArgIndex<3>>, IntrNoMem, IntrHasSideEffects]>,
[ImmArg<ArgIndex<3>>, IntrNoMem]>,
RISCVVIntrinsic {
let VLOperand = 4;
}
Expand All @@ -721,7 +721,7 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
LLVMMatchType<2>, LLVMMatchType<2>],
[ImmArg<ArgIndex<4>>,ImmArg<ArgIndex<6>>, IntrNoMem, IntrHasSideEffects]>,
[ImmArg<ArgIndex<4>>,ImmArg<ArgIndex<6>>, IntrNoMem]>,
RISCVVIntrinsic {
let VLOperand = 5;
}
Expand All @@ -733,7 +733,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
llvm_anyint_ty, LLVMMatchType<3>],
[ImmArg<ArgIndex<3>>, IntrNoMem, IntrHasSideEffects]>,
[ImmArg<ArgIndex<3>>, IntrNoMem]>,
RISCVVIntrinsic {
let VLOperand = 4;
}
Expand All @@ -746,8 +746,7 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
LLVMMatchType<3>, LLVMMatchType<3>],
[ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem,
IntrHasSideEffects]>, RISCVVIntrinsic {
[ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 5;
}
// Input: (vector_in, vector_in, scalar_in, vl, policy)
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3668,7 +3668,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
}

// Skip if True has side effect.
// TODO: Support vleff and vlsegff.
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
return false;

Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -6232,7 +6232,7 @@ defm PseudoVSUX : VPseudoIStore<Ordered=false>;
//===----------------------------------------------------------------------===//

// vleff may update VL register
let hasSideEffects = 1, Defs = [VL] in
let Defs = [VL] in
defm PseudoVL : VPseudoFFLoad;

//===----------------------------------------------------------------------===//
Expand All @@ -6248,7 +6248,7 @@ defm PseudoVSOXSEG : VPseudoISegStore<Ordered=true>;
defm PseudoVSUXSEG : VPseudoISegStore<Ordered=false>;

// vlseg<nf>e<eew>ff.v may update VL register
let hasSideEffects = 1, Defs = [VL] in {
let Defs = [VL] in {
defm PseudoVLSEG : VPseudoUSSegLoadFF;
}

Expand Down Expand Up @@ -6450,7 +6450,7 @@ defm PseudoVMV_V : VPseudoUnaryVMV_V_X_I;
//===----------------------------------------------------------------------===//
// 12.1. Vector Single-Width Saturating Add and Subtract
//===----------------------------------------------------------------------===//
let Defs = [VXSAT], hasSideEffects = 1 in {
let Defs = [VXSAT] in {
defm PseudoVSADDU : VPseudoVSALU_VV_VX_VI<Commutable=1>;
defm PseudoVSADD : VPseudoVSALU_VV_VX_VI<Commutable=1>;
defm PseudoVSSUBU : VPseudoVSALU_VV_VX;
Expand All @@ -6468,7 +6468,7 @@ defm PseudoVASUB : VPseudoVAALU_VV_VX_RM;
//===----------------------------------------------------------------------===//
// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
//===----------------------------------------------------------------------===//
let Defs = [VXSAT], hasSideEffects = 1 in {
let Defs = [VXSAT] in {
defm PseudoVSMUL : VPseudoVSMUL_VV_VX_RM;
}

Expand All @@ -6481,7 +6481,7 @@ defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI_RM<uimm5>;
//===----------------------------------------------------------------------===//
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
//===----------------------------------------------------------------------===//
let Defs = [VXSAT], hasSideEffects = 1 in {
let Defs = [VXSAT] in {
defm PseudoVNCLIP : VPseudoVNCLP_WV_WX_WI_RM;
defm PseudoVNCLIPU : VPseudoVNCLP_WV_WX_WI_RM;
}
Expand Down
69 changes: 27 additions & 42 deletions llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: li a0, 55
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vloxseg2ei32.v v16, (a0), v8
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
Expand All @@ -37,37 +40,24 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: li s0, 36
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v16, v8, v12, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call func
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vrgather.vv v16, v8, v12, v0.t
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfwsub.wv v8, v0, v20
; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
; CHECK-NEXT: vssubu.vv v16, v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, s0, e32, m8, tu, mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfdiv.vv v8, v16, v8, v0.t
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfwsub.wv v8, v24, v16
; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, mu
; CHECK-NEXT: vfdiv.vv v8, v24, v8, v0.t
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
Expand Down Expand Up @@ -109,25 +99,20 @@ define void @last_chance_recoloring_failure() {
; SUBREGLIVENESS-NEXT: addi a0, sp, 16
; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; SUBREGLIVENESS-NEXT: call func
; SUBREGLIVENESS-NEXT: li a0, 32
; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; SUBREGLIVENESS-NEXT: vrgather.vv v16, v8, v12, v0.t
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
; SUBREGLIVENESS-NEXT: add a0, sp, a0
; SUBREGLIVENESS-NEXT: addi a0, a0, 16
; SUBREGLIVENESS-NEXT: csrr a1, vlenb
; SUBREGLIVENESS-NEXT: slli a1, a1, 3
; SUBREGLIVENESS-NEXT: add a1, sp, a1
; SUBREGLIVENESS-NEXT: addi a1, a1, 16
; SUBREGLIVENESS-NEXT: csrr a2, vlenb
; SUBREGLIVENESS-NEXT: slli a2, a2, 2
; SUBREGLIVENESS-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: add a1, a1, a2
; SUBREGLIVENESS-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: addi a1, sp, 16
; SUBREGLIVENESS-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: vfwsub.wv v8, v24, v20
; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, tu, mu
; SUBREGLIVENESS-NEXT: vssubu.vv v16, v16, v8, v0.t
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e32, m8, tu, mu
; SUBREGLIVENESS-NEXT: slli a1, a1, 2
; SUBREGLIVENESS-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: add a0, a0, a1
; SUBREGLIVENESS-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: addi a0, sp, 16
; SUBREGLIVENESS-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; SUBREGLIVENESS-NEXT: vfwsub.wv v8, v24, v16
; SUBREGLIVENESS-NEXT: vsetvli zero, zero, e32, m8, tu, mu
; SUBREGLIVENESS-NEXT: vfdiv.vv v8, v24, v8, v0.t
; SUBREGLIVENESS-NEXT: vse32.v v8, (a0)
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
Expand Down
21 changes: 9 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/commutable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -655,10 +655,9 @@ define <vscale x 1 x i64> @commutable_vsadd_vv(<vscale x 1 x i64> %0, <vscale x
; CHECK-LABEL: commutable_vsadd_vv:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vsadd.vv v10, v8, v9
; CHECK-NEXT: vsadd.vv v8, v9, v8
; CHECK-NEXT: vsadd.vv v8, v8, v9
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v10, v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vsadd.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2)
Expand All @@ -673,7 +672,7 @@ define <vscale x 1 x i64> @commutable_vsadd_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vsadd.vv v10, v8, v9, v0.t
; CHECK-NEXT: vsadd.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v10, v8
; CHECK-NEXT: ret
Expand All @@ -689,10 +688,9 @@ define <vscale x 1 x i64> @commutable_vsaddu_vv(<vscale x 1 x i64> %0, <vscale x
; CHECK-LABEL: commutable_vsaddu_vv:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vsaddu.vv v10, v8, v9
; CHECK-NEXT: vsaddu.vv v8, v9, v8
; CHECK-NEXT: vsaddu.vv v8, v8, v9
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v10, v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vsaddu.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2)
Expand All @@ -707,7 +705,7 @@ define <vscale x 1 x i64> @commutable_vsaddu_vv_masked(<vscale x 1 x i64> %0, <v
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vsaddu.vv v10, v8, v9, v0.t
; CHECK-NEXT: vsaddu.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v10, v8
; CHECK-NEXT: ret
Expand Down Expand Up @@ -794,10 +792,9 @@ define <vscale x 1 x i64> @commutable_vsmul_vv(<vscale x 1 x i64> %0, <vscale x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsmul.vv v10, v8, v9
; CHECK-NEXT: vsmul.vv v8, v9, v8
; CHECK-NEXT: vsmul.vv v8, v8, v9
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v10, v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen 0, iXLen %2)
Expand All @@ -813,7 +810,7 @@ define <vscale x 1 x i64> @commutable_vsmul_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsmul.vv v10, v8, v9, v0.t
; CHECK-NEXT: vsmul.vv v8, v9, v8, v0.t
; CHECK-NEXT: vsmul.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v10, v8
; CHECK-NEXT: ret
Expand Down
17 changes: 8 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/copyprop.mir
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -start-after=finalize-isel | FileCheck %s
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v,+xsfvcp -start-after=finalize-isel | FileCheck %s

--- |
define void @foo() {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: vsll.vi v9, v8, 5
; CHECK-NEXT: vmerge.vim v9, v9, -1, v0
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vssra.vi v8, v8, 2
; CHECK-NEXT: vsll.vi v8, v8, 5
; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
; CHECK-NEXT: sf.vc.v.x 3, 31, v9, a1
; CHECK-NEXT: bgeu a0, zero, .LBB0_3
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: bltu a0, a2, .LBB0_4
; CHECK-NEXT: .LBB0_2: # %entry
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: vse64.v v9, (a1)
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: bgeu a0, a2, .LBB0_2
; CHECK-NEXT: .LBB0_4: # %entry
; CHECK-NEXT: vse64.v v9, (a1)
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
entry:
ret void
Expand Down Expand Up @@ -51,7 +50,7 @@ body: |
%26:vrnov0 = IMPLICIT_DEF
%25:vrnov0 = PseudoVMERGE_VIM_M1 %26, %17, -1, $v0, 1, 6 /* e64 */
%pt8:vr = IMPLICIT_DEF
%29:vr = PseudoVSSRA_VI_M1 %pt8, %3, 2, 0, 1, 6 /* e64 */, 0
%29:vr = PseudoVC_V_X_SE_M1 3, 31, %2, 1, 6 /* e64 */, implicit-def dead $vcix_state, implicit $vcix_state
%pt9:vr = IMPLICIT_DEF
%30:vr = PseudoVMV_V_I_M1 %pt9, 0, 1, 6 /* e64 */, 0
BGEU %1, $x0, %bb.2
Expand Down
32 changes: 12 additions & 20 deletions llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,15 +194,12 @@ define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
ret void
}

; FIXME: Merge vmerge.vvm and vleffN.v
declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32>, ptr, i64)
define <vscale x 2 x i32> @vpmerge_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_vleff:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vle32ff.v v9, (a0)
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to decide if this is correct. It's certainly a weird test case. You would normally want the vp.merge to use vl produced by vleff.

If the VL gets trimmed by the vleff, the elements between the input VL and the trimmed VL are undefined if the mask bit for those elements are non-zero. The spec allows hardware to write the active elements past the trimmed VL to any value.

I think we're ok here. If vleff trims any elements they would be undefined in %b. The vp.merge would propagate them if the mask bit is non-zero. If the mask is 0 the vp.merge would replace them with passthru. That seems to be what the combined vp.merge would do.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks correct to me.

; CHECK-NEXT: ret
%1 = zext i32 %vl to i64
%a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
Expand Down Expand Up @@ -634,14 +631,11 @@ define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
ret void
}

; FIXME: select vselect.vvm and vleffN.v
define <vscale x 2 x i32> @vpselect_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vpselect_vleff:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vle32ff.v v9, (a0)
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
; CHECK-NEXT: ret
%1 = zext i32 %vl to i64
%a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
Expand Down Expand Up @@ -898,22 +892,20 @@ define <vscale x 2 x i32> @vpselect_trunc(<vscale x 2 x i32> %passthru, <vscale
define void @test_dag_loop() {
; CHECK-LABEL: test_dag_loop:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (zero)
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
; CHECK-NEXT: vmclr.m v0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 0, e8, m4, tu, mu
; CHECK-NEXT: vmv4r.v v20, v16
; CHECK-NEXT: vssubu.vx v20, v16, zero, v0.t
; CHECK-NEXT: vmv4r.v v12, v8
; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; CHECK-NEXT: vmseq.vv v0, v20, v16
; CHECK-NEXT: vmseq.vv v0, v12, v8
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vsetivli zero, 1, e16, m8, tu, ma
; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 1, e16, m8, tu, mu
; CHECK-NEXT: vle16.v v8, (zero), v0.t
; CHECK-NEXT: vsetivli zero, 0, e16, m8, ta, ma
; CHECK-NEXT: vse16.v v16, (zero)
; CHECK-NEXT: vse16.v v8, (zero)
; CHECK-NEXT: ret
entry:
%0 = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, ptr null, i64 1)
Expand Down