-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][VLOPT] Add support for Widening Floating-Point Fused Multiply-Add Instructions #126485
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesWe already had getOperandInfo support, so this marks the instructions as supported in isCandidate. It also adds support for vfwmaccbf16.v{v,f} from zvfbfwma Full diff: https://github.com/llvm/llvm-project/pull/126485.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index d4829bced247091..6c19a8fd32d42e7 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -545,6 +545,8 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VFWMSAC_VV:
case RISCV::VFWNMSAC_VF:
case RISCV::VFWNMSAC_VV:
+ case RISCV::VFWMACCBF16_VV:
+ case RISCV::VFWMACCBF16_VF:
// Vector Widening Floating-Point Add/Subtract Instructions
// Dest EEW=2*SEW. Source EEW=SEW.
case RISCV::VFWADD_VV:
@@ -1050,6 +1052,17 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VFMSUB_VF:
case RISCV::VFNMSUB_VV:
case RISCV::VFNMSUB_VF:
+ // Vector Widening Floating-Point Fused Multiply-Add Instructions
+ case RISCV::VFWMACC_VV:
+ case RISCV::VFWMACC_VF:
+ case RISCV::VFWNMACC_VV:
+ case RISCV::VFWNMACC_VF:
+ case RISCV::VFWMSAC_VV:
+ case RISCV::VFWMSAC_VF:
+ case RISCV::VFWNMSAC_VV:
+ case RISCV::VFWNMSAC_VF:
+ case RISCV::VFWMACCBF16_VV:
+ case RISCV::VFWMACCBF16_VF:
// Vector Floating-Point MIN/MAX Instructions
case RISCV::VFMIN_VF:
case RISCV::VFMIN_VV:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 053f1209cf21464..f4591a191c8b76c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
@@ -4351,3 +4351,213 @@ define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vsca
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
ret <vscale x 4 x float> %2
}
+
+define <vscale x 4 x double> @vfwmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwmacc.vv v8, v12, v14
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmacc_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwmacc.vf v8, fa0, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwmacc_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwmacc.vf v8, fa0, v12
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwnmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwnmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwnmacc.vv v8, v12, v14
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwnmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwnmacc.vv v8, v12, v14
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwnmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwnmacc_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwnmacc.vf v8, fa0, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwnmacc_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwnmacc.vf v8, fa0, v12
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwmsac.vv v8, v12, v14
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwmsac.vv v8, v12, v14
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmsac_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwmsac.vf v8, fa0, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwmsac_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwmsac.vf v8, fa0, v12
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwnmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwnmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwnmsac.vv v8, v12, v14
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwnmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwnmsac.vv v8, v12, v14
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x double> @vfwnmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwnmsac_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfwnmsac.vf v8, fa0, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwnmsac_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfwnmsac.vf v8, fa0, v12
+; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v16
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x double> %2
+}
+
+define <vscale x 4 x float> @vfwmaccbf16_vv(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmaccbf16_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwmaccbf16_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; VLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vfwmaccbf16_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfwmaccbf16_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; VLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/14380 Here is the relevant piece of the build log for the reference
|
…-Add Instructions (llvm#126485) We already had getOperandInfo support, so this marks the instructions as supported in isCandidate. It also adds support for vfwmaccbf16.v{v,f} from zvfbfwma
We already had getOperandInfo support, so this marks the instructions as supported in isCandidate. It also adds support for vfwmaccbf16.v{v,f} from zvfbfwma