Skip to content

[RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions #125652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 5, 2025

Conversation

lukel97
Copy link
Contributor

@lukel97 lukel97 commented Feb 4, 2025

These instructions have EEW=SEW for all operands

Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.

Nevermind, we only had op info support!

…iply-Add Instructions

Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.
@llvmbot
Copy link
Member

llvmbot commented Feb 4, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Changes

These instructions have EEW=SEW for all operands

Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.

Nevermind, we only had op info support!


Full diff: https://github.com/llvm/llvm-project/pull/125652.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp (+34)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll (+400)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0960245b8362d8..d4829bced24709 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -451,6 +451,23 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
   case RISCV::VFDIV_VF:
   case RISCV::VFDIV_VV:
   case RISCV::VFRDIV_VF:
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  case RISCV::VFMACC_VV:
+  case RISCV::VFMACC_VF:
+  case RISCV::VFNMACC_VV:
+  case RISCV::VFNMACC_VF:
+  case RISCV::VFMSAC_VV:
+  case RISCV::VFMSAC_VF:
+  case RISCV::VFNMSAC_VV:
+  case RISCV::VFNMSAC_VF:
+  case RISCV::VFMADD_VV:
+  case RISCV::VFMADD_VF:
+  case RISCV::VFNMADD_VV:
+  case RISCV::VFNMADD_VF:
+  case RISCV::VFMSUB_VV:
+  case RISCV::VFMSUB_VF:
+  case RISCV::VFNMSUB_VV:
+  case RISCV::VFNMSUB_VF:
   // Vector Floating-Point Square-Root Instruction
   case RISCV::VFSQRT_V:
   // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -1016,6 +1033,23 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   // Vector Widening Floating-Point Multiply
   case RISCV::VFWMUL_VF:
   case RISCV::VFWMUL_VV:
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  case RISCV::VFMACC_VV:
+  case RISCV::VFMACC_VF:
+  case RISCV::VFNMACC_VV:
+  case RISCV::VFNMACC_VF:
+  case RISCV::VFMSAC_VV:
+  case RISCV::VFMSAC_VF:
+  case RISCV::VFNMSAC_VV:
+  case RISCV::VFNMSAC_VF:
+  case RISCV::VFMADD_VV:
+  case RISCV::VFMADD_VF:
+  case RISCV::VFNMADD_VV:
+  case RISCV::VFNMADD_VF:
+  case RISCV::VFMSUB_VV:
+  case RISCV::VFMSUB_VF:
+  case RISCV::VFNMSUB_VV:
+  case RISCV::VFNMSUB_VF:
   // Vector Floating-Point MIN/MAX Instructions
   case RISCV::VFMIN_VF:
   case RISCV::VFMIN_VV:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 163166c1df7434..b2166da42a5c92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -4031,3 +4031,403 @@ define <vscale x 4 x float> @vfsgnjx_vf(<vscale x 4 x float> %a, float %b, iXLen
   %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
   ret <vscale x 4 x float> %2
 }
+
+define <vscale x 4 x float> @vfmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmacc.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmacc_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmacc.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmacc_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmacc.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmacc.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmacc_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmacc.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmacc_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmacc.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsac.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsac_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsac.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsac_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsac.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsac.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsac_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsac.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsac_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsac.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmadd.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmadd.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmadd_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmadd.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmadd.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmadd.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmadd_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmadd.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsub.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsub.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsub_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfmsub.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsub.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsub.vv v8, v10, v12
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    fsrmi a1, 3
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT:    vfnmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    fsrm a1
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsub_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    fsrmi a1, 3
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT:    vfnmsub.vf v8, fa0, v10
+; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    fsrm a1
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}

; VLOPT-NEXT: vfadd.vv v8, v8, v12
; VLOPT-NEXT: fsrm a1
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pass 7 as the frm to avoid fsr instructions

Copy link
Collaborator

@preames preames left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Contributor

@michaelmaitland michaelmaitland left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@lukel97 lukel97 merged commit 19a4135 into llvm:main Feb 5, 2025
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Feb 5, 2025

LLVM Buildbot has detected a new failure on builder lldb-arm-ubuntu running on linaro-lldb-arm-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/10970

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: terminal/TestSTTYBeforeAndAfter.py (1125 of 2891)
PASS: lldb-api :: test_utils/TestDecorators.py (1126 of 2891)
PASS: lldb-api :: test_utils/TestInlineTest.py (1127 of 2891)
PASS: lldb-api :: test_utils/TestPExpectTest.py (1128 of 2891)
PASS: lldb-api :: test_utils/base/TestBaseTest.py (1129 of 2891)
PASS: lldb-api :: python_api/watchpoint/watchlocation/TestTargetWatchAddress.py (1130 of 2891)
PASS: lldb-api :: terminal/TestEditline.py (1131 of 2891)
UNSUPPORTED: lldb-api :: tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py (1132 of 2891)
PASS: lldb-api :: tools/lldb-dap/breakpoint/TestDAP_breakpointLocations.py (1133 of 2891)
PASS: lldb-api :: tools/lldb-dap/attach/TestDAP_attachByPortNum.py (1134 of 2891)
FAIL: lldb-api :: tools/lldb-dap/attach/TestDAP_attach.py (1135 of 2891)
******************** TEST 'lldb-api :: tools/lldb-dap/attach/TestDAP_attach.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --arch armv8l --build-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/tools/lldb-dap/attach -p TestDAP_attach.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision 19a41358ff859f8d4d71659ea2715f84b682502c)
  clang revision 19a41358ff859f8d4d71659ea2715f84b682502c
  llvm revision 19a41358ff859f8d4d71659ea2715f84b682502c
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']
========= DEBUG ADAPTER PROTOCOL LOGS =========
1738722760.066297293 --> 
Content-Length: 344

{
  "arguments": {
    "adapterID": "lldb-native",
    "clientID": "vscode",
    "columnsStartAt1": true,
    "linesStartAt1": true,
    "locale": "en-us",
    "pathFormat": "path",
    "sourceInitFile": false,
    "supportsRunInTerminalRequest": true,
    "supportsStartDebuggingRequest": true,
    "supportsVariablePaging": true,
    "supportsVariableType": true
  },
  "command": "initialize",
  "seq": 1,
  "type": "request"
}
1738722760.070298672 <-- 
Content-Length: 1631


Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025
…iply-Add Instructions (llvm#125652)

These instructions have EEW=SEW for all operands.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants