-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions #125652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…iply-Add Instructions Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesThese instructions have EEW=SEW for all operands
Nevermind, we only had op info support! Full diff: https://github.com/llvm/llvm-project/pull/125652.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0960245b8362d8..d4829bced24709 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -451,6 +451,23 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VFDIV_VF:
case RISCV::VFDIV_VV:
case RISCV::VFRDIV_VF:
+ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+ case RISCV::VFMACC_VV:
+ case RISCV::VFMACC_VF:
+ case RISCV::VFNMACC_VV:
+ case RISCV::VFNMACC_VF:
+ case RISCV::VFMSAC_VV:
+ case RISCV::VFMSAC_VF:
+ case RISCV::VFNMSAC_VV:
+ case RISCV::VFNMSAC_VF:
+ case RISCV::VFMADD_VV:
+ case RISCV::VFMADD_VF:
+ case RISCV::VFNMADD_VV:
+ case RISCV::VFNMADD_VF:
+ case RISCV::VFMSUB_VV:
+ case RISCV::VFMSUB_VF:
+ case RISCV::VFNMSUB_VV:
+ case RISCV::VFNMSUB_VF:
// Vector Floating-Point Square-Root Instruction
case RISCV::VFSQRT_V:
// Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -1016,6 +1033,23 @@ static bool isSupportedInstr(const MachineInstr &MI) {
// Vector Widening Floating-Point Multiply
case RISCV::VFWMUL_VF:
case RISCV::VFWMUL_VV:
+ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+ case RISCV::VFMACC_VV:
+ case RISCV::VFMACC_VF:
+ case RISCV::VFNMACC_VV:
+ case RISCV::VFNMACC_VF:
+ case RISCV::VFMSAC_VV:
+ case RISCV::VFMSAC_VF:
+ case RISCV::VFNMSAC_VV:
+ case RISCV::VFNMSAC_VF:
+ case RISCV::VFMADD_VV:
+ case RISCV::VFMADD_VF:
+ case RISCV::VFNMADD_VV:
+ case RISCV::VFNMADD_VF:
+ case RISCV::VFMSUB_VV:
+ case RISCV::VFMSUB_VF:
+ case RISCV::VFNMSUB_VV:
+ case RISCV::VFNMSUB_VF:
// Vector Floating-Point MIN/MAX Instructions
case RISCV::VFMIN_VF:
case RISCV::VFMIN_VV:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 163166c1df7434..b2166da42a5c92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -4031,3 +4031,403 @@ define <vscale x 4 x float> @vfsgnjx_vf(<vscale x 4 x float> %a, float %b, iXLen
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
ret <vscale x 4 x float> %2
}
+
+define <vscale x 4 x float> @vfmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmacc.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmacc.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmacc_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmacc.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmacc.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmacc.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmacc_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmacc.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsac.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsac.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsac_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsac.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsac.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsac.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsac_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsac.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmadd.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmadd_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmadd.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmadd_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmadd.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmadd.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmadd_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmadd.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmadd_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmadd.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsub.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsub_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsub.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsub_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsub.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsub.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsub_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsub.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsub_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsub.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
|
; VLOPT-NEXT: vfadd.vv v8, v8, v12 | ||
; VLOPT-NEXT: fsrm a1 | ||
; VLOPT-NEXT: ret | ||
%1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pass 7 as the frm to avoid fsr instructions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/10970 Here is the relevant piece of the build log for the reference
|
…iply-Add Instructions (llvm#125652) These instructions have EEW=SEW for all operands.
These instructions have EEW=SEW for all operands
Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.Nevermind, we only had op info support!