[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions #125652

lukel97 · 2025-02-04T08:49:09Z

These instructions have EEW=SEW for all operands

~~Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.~~

Nevermind, we only had op info support!

…iply-Add Instructions Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.

llvmbot · 2025-02-04T08:50:51Z

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Changes

These instructions have EEW=SEW for all operands

~~Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.~~

Nevermind, we only had op info support!

Full diff: https://github.com/llvm/llvm-project/pull/125652.diff

2 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp (+34)
(modified) llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll (+400)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0960245b8362d8..d4829bced24709 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -451,6 +451,23 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
   case RISCV::VFDIV_VF:
   case RISCV::VFDIV_VV:
   case RISCV::VFRDIV_VF:
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  case RISCV::VFMACC_VV:
+  case RISCV::VFMACC_VF:
+  case RISCV::VFNMACC_VV:
+  case RISCV::VFNMACC_VF:
+  case RISCV::VFMSAC_VV:
+  case RISCV::VFMSAC_VF:
+  case RISCV::VFNMSAC_VV:
+  case RISCV::VFNMSAC_VF:
+  case RISCV::VFMADD_VV:
+  case RISCV::VFMADD_VF:
+  case RISCV::VFNMADD_VV:
+  case RISCV::VFNMADD_VF:
+  case RISCV::VFMSUB_VV:
+  case RISCV::VFMSUB_VF:
+  case RISCV::VFNMSUB_VV:
+  case RISCV::VFNMSUB_VF:
   // Vector Floating-Point Square-Root Instruction
   case RISCV::VFSQRT_V:
   // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -1016,6 +1033,23 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   // Vector Widening Floating-Point Multiply
   case RISCV::VFWMUL_VF:
   case RISCV::VFWMUL_VV:
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  case RISCV::VFMACC_VV:
+  case RISCV::VFMACC_VF:
+  case RISCV::VFNMACC_VV:
+  case RISCV::VFNMACC_VF:
+  case RISCV::VFMSAC_VV:
+  case RISCV::VFMSAC_VF:
+  case RISCV::VFNMSAC_VV:
+  case RISCV::VFNMSAC_VF:
+  case RISCV::VFMADD_VV:
+  case RISCV::VFMADD_VF:
+  case RISCV::VFNMADD_VV:
+  case RISCV::VFNMADD_VF:
+  case RISCV::VFMSUB_VV:
+  case RISCV::VFMSUB_VF:
+  case RISCV::VFNMSUB_VV:
+  case RISCV::VFNMSUB_VF:
   // Vector Floating-Point MIN/MAX Instructions
   case RISCV::VFMIN_VF:
   case RISCV::VFMIN_VV:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 163166c1df7434..b2166da42a5c92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -4031,3 +4031,403 @@ define <vscale x 4 x float> @vfsgnjx_vf(<vscale x 4 x float> %a, float %b, iXLen
   %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
   ret <vscale x 4 x float> %2
 }
+
+define <vscale x 4 x float> @vfmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmacc.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmacc_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmacc.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmacc_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmacc.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmacc.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmacc_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmacc.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmacc_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmacc.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsac.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsac_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsac.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsac_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsac.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsac.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsac_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsac.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsac_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsac.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmadd.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmadd.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmadd_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmadd.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmadd.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmadd.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmadd_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmadd.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsub.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsub.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsub_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsub.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsub.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsub.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsub_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsub.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}

michaelmaitland · 2025-02-04T15:44:22Z

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)


pass 7 as the frm to avoid fsr instructions

preames

LGTM

michaelmaitland

LGTM

llvm-ci · 2025-02-05T02:36:21Z

LLVM Buildbot has detected a new failure on builder lldb-arm-ubuntu running on linaro-lldb-arm-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/10970

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: terminal/TestSTTYBeforeAndAfter.py (1125 of 2891)
PASS: lldb-api :: test_utils/TestDecorators.py (1126 of 2891)
PASS: lldb-api :: test_utils/TestInlineTest.py (1127 of 2891)
PASS: lldb-api :: test_utils/TestPExpectTest.py (1128 of 2891)
PASS: lldb-api :: test_utils/base/TestBaseTest.py (1129 of 2891)
PASS: lldb-api :: python_api/watchpoint/watchlocation/TestTargetWatchAddress.py (1130 of 2891)
PASS: lldb-api :: terminal/TestEditline.py (1131 of 2891)
UNSUPPORTED: lldb-api :: tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py (1132 of 2891)
PASS: lldb-api :: tools/lldb-dap/breakpoint/TestDAP_breakpointLocations.py (1133 of 2891)
PASS: lldb-api :: tools/lldb-dap/attach/TestDAP_attachByPortNum.py (1134 of 2891)
FAIL: lldb-api :: tools/lldb-dap/attach/TestDAP_attach.py (1135 of 2891)
******************** TEST 'lldb-api :: tools/lldb-dap/attach/TestDAP_attach.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --arch armv8l --build-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/tools/lldb-dap/attach -p TestDAP_attach.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision 19a41358ff859f8d4d71659ea2715f84b682502c)
  clang revision 19a41358ff859f8d4d71659ea2715f84b682502c
  llvm revision 19a41358ff859f8d4d71659ea2715f84b682502c
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']
========= DEBUG ADAPTER PROTOCOL LOGS =========
1738722760.066297293 --> 
Content-Length: 344

{
  "arguments": {
    "adapterID": "lldb-native",
    "clientID": "vscode",
    "columnsStartAt1": true,
    "linesStartAt1": true,
    "locale": "en-us",
    "pathFormat": "path",
    "sourceInitFile": false,
    "supportsRunInTerminalRequest": true,
    "supportsStartDebuggingRequest": true,
    "supportsVariablePaging": true,
    "supportsVariableType": true
  },
  "command": "initialize",
  "seq": 1,
  "type": "request"
}
1738722760.070298672 <-- 
Content-Length: 1631

…iply-Add Instructions (llvm#125652) These instructions have EEW=SEW for all operands.

lukel97 added 2 commits February 4, 2025 16:44

Precommit tests

a8467fd

[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Mult…

5bc1337

…iply-Add Instructions Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.

llvmbot added the backend:RISC-V label Feb 4, 2025

lukel97 requested review from preames, michaelmaitland and wangpc-pp February 4, 2025 08:51

Update fixed-vectors-fp.ll

5d86245

michaelmaitland reviewed Feb 4, 2025

View reviewed changes

Fix FRM + policy ops

8beb254

preames approved these changes Feb 4, 2025

View reviewed changes

michaelmaitland approved these changes Feb 4, 2025

View reviewed changes

lukel97 merged commit 19a4135 into llvm:main Feb 5, 2025
8 checks passed

Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025

[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Mult…

511d3c5

…iply-Add Instructions (llvm#125652) These instructions have EEW=SEW for all operands.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions #125652

[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions #125652

Uh oh!

lukel97 commented Feb 4, 2025 •

edited

Loading

Uh oh!

llvmbot commented Feb 4, 2025

Uh oh!

michaelmaitland Feb 4, 2025

Uh oh!

preames left a comment

Uh oh!

michaelmaitland left a comment

Uh oh!

Uh oh!

llvm-ci commented Feb 5, 2025

Uh oh!

Uh oh!

[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions #125652

[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions #125652

Uh oh!

Conversation

lukel97 commented Feb 4, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 4, 2025

Uh oh!

michaelmaitland Feb 4, 2025

Choose a reason for hiding this comment

Uh oh!

preames left a comment

Choose a reason for hiding this comment

Uh oh!

michaelmaitland left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Feb 5, 2025

Uh oh!

Uh oh!

lukel97 commented Feb 4, 2025 •

edited

Loading