-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][VLOPT] Add Vector FP instructions to getOperandInfo #121609
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Although we cannot reduce the VL of these instructions (i.e. add to isSupported) we can add them to getOperandInfo to enable optimization where the FP vector instruction are users. Most of the instructions are covered by existing tests, and I added tests for the narrowing conversions because I was a little unsure whether the dest or the source was 2*SEW and 2*LMUL.
@llvm/pr-subscribers-backend-risc-v Author: Michael Maitland (michaelmaitland) ChangesAlthough we cannot reduce the VL of these instructions (i.e. add to isSupported) we can add them to getOperandInfo to enable optimization where the FP vector instruction are users. Most of the instructions are covered by existing tests, and I added tests for the narrowing conversions because I was a little unsure whether the dest or the source was 2SEW and 2LMUL. Patch is 38.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121609.diff 8 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 85ea5a23e8f293..01b15371ca748e 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -456,6 +456,49 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VCOMPRESS_VM:
// Vector Element Index Instruction
case RISCV::VID_V:
+ // Vector Single-Width Floating-Point Add/Subtract Instructions
+ case RISCV::VFADD_VF:
+ case RISCV::VFADD_VV:
+ case RISCV::VFSUB_VF:
+ case RISCV::VFSUB_VV:
+ case RISCV::VFRSUB_VF:
+ // Vector Single-Width Floating-Point Multiply/Divide Instructions
+ case RISCV::VFMUL_VF:
+ case RISCV::VFMUL_VV:
+ case RISCV::VFDIV_VF:
+ case RISCV::VFDIV_VV:
+ case RISCV::VFRDIV_VF:
+ // Vector Floating-Point Square-Root Instruction
+ case RISCV::VFSQRT_V:
+ // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+ case RISCV::VFRSQRT7_V:
+ // Vector Floating-Point Reciprocal Estimate Instruction
+ case RISCV::VFREC7_V:
+ // Vector Floating-Point MIN/MAX Instructions
+ case RISCV::VFMIN_VF:
+ case RISCV::VFMIN_VV:
+ case RISCV::VFMAX_VF:
+ // Vector Floating-Point Sign-Injection Instructions
+ case RISCV::VFMAX_VV:
+ case RISCV::VFSGNJ_VF:
+ case RISCV::VFSGNJ_VV:
+ case RISCV::VFSGNJN_VV:
+ case RISCV::VFSGNJN_VF:
+ case RISCV::VFSGNJX_VF:
+ case RISCV::VFSGNJX_VV:
+ // Vector Floating-Point Classify Instruction
+ case RISCV::VFCLASS_V:
+ // Vector Floating-Point Move Instruction
+ case RISCV::VFMV_V_F:
+ // Single-Width Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFCVT_XU_F_V:
+ case RISCV::VFCVT_X_F_V:
+ case RISCV::VFCVT_RTZ_XU_F_V:
+ case RISCV::VFCVT_RTZ_X_F_V:
+ case RISCV::VFCVT_F_XU_V:
+ case RISCV::VFCVT_F_X_V:
+ // Vector Floating-Point Merge Instruction
+ case RISCV::VFMERGE_VFM:
return OperandInfo(MIVLMul, MILog2SEW);
// Vector Widening Integer Add/Subtract
@@ -488,7 +531,24 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VWMACC_VX:
case RISCV::VWMACCSU_VV:
case RISCV::VWMACCSU_VX:
- case RISCV::VWMACCUS_VX: {
+ case RISCV::VWMACCUS_VX:
+ // Vector Widening Floating-Point Add/Subtract Instructions
+ // Dest EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
+ case RISCV::VFWADD_VV:
+ case RISCV::VFWADD_VF:
+ case RISCV::VFWSUB_VV:
+ case RISCV::VFWSUB_VF:
+ // Vector Widening Floating-Point Multiply
+ case RISCV::VFWMUL_VF:
+ case RISCV::VFWMUL_VV:
+ // Widening Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFWCVT_XU_F_V:
+ case RISCV::VFWCVT_X_F_V:
+ case RISCV::VFWCVT_RTZ_XU_F_V:
+ case RISCV::VFWCVT_RTZ_X_F_V:
+ case RISCV::VFWCVT_F_XU_V:
+ case RISCV::VFWCVT_F_X_V:
+ case RISCV::VFWCVT_F_F_V: {
unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
RISCVII::VLMUL EMUL =
IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
@@ -503,7 +563,21 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VWADD_WV:
case RISCV::VWADD_WX:
case RISCV::VWSUB_WV:
- case RISCV::VWSUB_WX: {
+ case RISCV::VWSUB_WX:
+ // Vector Widening Floating-Point Add/Subtract Instructions
+ case RISCV::VFWADD_WF:
+ case RISCV::VFWADD_WV:
+ case RISCV::VFWSUB_WF:
+ case RISCV::VFWSUB_WV:
+ // Vector Widening Floating-Point Fused Multiply-Add Instructions
+ case RISCV::VFWMACC_VF:
+ case RISCV::VFWMACC_VV:
+ case RISCV::VFWNMACC_VF:
+ case RISCV::VFWNMACC_VV:
+ case RISCV::VFWMSAC_VF:
+ case RISCV::VFWMSAC_VV:
+ case RISCV::VFWNMSAC_VF:
+ case RISCV::VFWNMSAC_VV: {
bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
bool TwoTimes = IsMODef || IsOp1;
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
@@ -539,7 +613,16 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VNCLIPU_WX:
case RISCV::VNCLIP_WI:
case RISCV::VNCLIP_WV:
- case RISCV::VNCLIP_WX: {
+ case RISCV::VNCLIP_WX:
+ // Narrowing Floating-Point/Integer Type-Convert Instructions
+ case RISCV::VFNCVT_XU_F_W:
+ case RISCV::VFNCVT_X_F_W:
+ case RISCV::VFNCVT_RTZ_XU_F_W:
+ case RISCV::VFNCVT_RTZ_X_F_W:
+ case RISCV::VFNCVT_F_XU_W:
+ case RISCV::VFNCVT_F_X_W:
+ case RISCV::VFNCVT_F_F_W:
+ case RISCV::VFNCVT_ROD_F_F_W: {
bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
bool TwoTimes = IsOp1;
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
@@ -615,7 +698,19 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
case RISCV::VMADC_VI:
case RISCV::VMADC_VX:
case RISCV::VMSBC_VV:
- case RISCV::VMSBC_VX: {
+ case RISCV::VMSBC_VX:
+ // 13.13. Vector Floating-Point Compare Instructions
+ // Dest EEW=1 and EMUL=(EEW/SEW)*LMUL. Source EEW=SEW EMUL=LMUL.
+ case RISCV::VMFEQ_VF:
+ case RISCV::VMFEQ_VV:
+ case RISCV::VMFNE_VF:
+ case RISCV::VMFNE_VV:
+ case RISCV::VMFLT_VF:
+ case RISCV::VMFLT_VV:
+ case RISCV::VMFLE_VF:
+ case RISCV::VMFLE_VV:
+ case RISCV::VMFGT_VF:
+ case RISCV::VMFGE_VF: {
if (IsMODef)
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
return OperandInfo(MIVLMul, MILog2SEW);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
index 6cd3884f029fdd..a1d548e1878b44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll
@@ -143,9 +143,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b,
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -170,9 +169,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va,
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v11, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v11, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -198,9 +196,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va,
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -225,9 +222,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
@@ -254,9 +250,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %v
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
@@ -283,9 +278,8 @@ define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %
; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
@@ -362,9 +356,8 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b,
; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -389,9 +382,8 @@ define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va,
; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -468,9 +460,8 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b,
; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -495,9 +486,8 @@ define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -574,9 +564,8 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b,
; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -601,9 +590,8 @@ define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -696,9 +684,8 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half
; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v4, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
@@ -723,9 +710,8 @@ define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %
; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
index c92a79e49c1642..94b80075ac14c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll
@@ -120,9 +120,8 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -148,9 +147,8 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32_commute(<vscale x 1 x half> %a, h
; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_commute:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v11, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v11, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -177,9 +175,8 @@ define <vscale x 1 x float> @vmfsac_vf_nxv1f32_unmasked(<vscale x 1 x half> %a,
; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -255,9 +252,8 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32(<vscale x 2 x half> %a, half %b,
; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -283,9 +279,8 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32_commute(<vscale x 2 x half> %a, h
; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_commute:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v11, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v11, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -312,9 +307,8 @@ define <vscale x 2 x float> @vmfsac_vf_nxv2f32_unmasked(<vscale x 2 x half> %a,
; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -392,9 +386,8 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b,
; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -420,9 +413,8 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_commute(<vscale x 4 x half> %a, h
; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_commute:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -449,9 +441,8 @@ define <vscale x 4 x float> @vmfsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a,
; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -529,9 +520,8 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b,
; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -557,9 +547,8 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_commute(<vscale x 8 x half> %a, h
; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_commute:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -586,9 +575,8 @@ define <vscale x 8 x float> @vmfsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a,
; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_unmasked:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
index 0a0bc6696a9f96..ea457069bcdf1d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll
@@ -71,9 +71,8 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32(<vscale x 1 x half> %a, half %b,
; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
@@ -101,9 +100,8 @@ define <vscale x 1 x float> @vfnmacc_vf_nxv1f32_commute(<vscale x 1 x half> %a,
; ZVFHMIN-LABEL: vfnmacc_vf_nxv1f32_commute:
...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Although we cannot reduce the VL of these instructions (i.e. add to isSupported) we can add them to getOperandInfo to enable optimization where the FP vector instruction are users. Most of the instructions are covered by existing tests, and I added tests for the narrowing conversions because I was a little unsure whether the dest or the source was 2SEW and 2LMUL.