Skip to content

Commit 771f6b9

Browse files
authored
[RISCV][VLOPT] Add support for Widening Floating-Point Fused Multiply-Add Instructions (#126485)
We already had getOperandInfo support, so this marks the instructions as supported in isCandidate. It also adds support for vfwmaccbf16.v{v,f} from zvfbfwma
1 parent 71ee257 commit 771f6b9

File tree

2 files changed

+227
-4
lines changed

2 files changed

+227
-4
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,8 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
545545
case RISCV::VFWMSAC_VV:
546546
case RISCV::VFWNMSAC_VF:
547547
case RISCV::VFWNMSAC_VV:
548+
case RISCV::VFWMACCBF16_VV:
549+
case RISCV::VFWMACCBF16_VF:
548550
// Vector Widening Floating-Point Add/Subtract Instructions
549551
// Dest EEW=2*SEW. Source EEW=SEW.
550552
case RISCV::VFWADD_VV:
@@ -1050,6 +1052,17 @@ static bool isSupportedInstr(const MachineInstr &MI) {
10501052
case RISCV::VFMSUB_VF:
10511053
case RISCV::VFNMSUB_VV:
10521054
case RISCV::VFNMSUB_VF:
1055+
// Vector Widening Floating-Point Fused Multiply-Add Instructions
1056+
case RISCV::VFWMACC_VV:
1057+
case RISCV::VFWMACC_VF:
1058+
case RISCV::VFWNMACC_VV:
1059+
case RISCV::VFWNMACC_VF:
1060+
case RISCV::VFWMSAC_VV:
1061+
case RISCV::VFWMSAC_VF:
1062+
case RISCV::VFWNMSAC_VV:
1063+
case RISCV::VFWNMSAC_VF:
1064+
case RISCV::VFWMACCBF16_VV:
1065+
case RISCV::VFWMACCBF16_VF:
10531066
// Vector Floating-Point MIN/MAX Instructions
10541067
case RISCV::VFMIN_VF:
10551068
case RISCV::VFMIN_VV:

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 214 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
3-
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
4-
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
5-
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
3+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
4+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
5+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
66

77
; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
88

@@ -4351,3 +4351,213 @@ define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vsca
43514351
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
43524352
ret <vscale x 4 x float> %2
43534353
}
4354+
4355+
define <vscale x 4 x double> @vfwmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4356+
; NOVLOPT-LABEL: vfwmacc_vv:
4357+
; NOVLOPT: # %bb.0:
4358+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4359+
; NOVLOPT-NEXT: vfwmacc.vv v8, v12, v14
4360+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4361+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4362+
; NOVLOPT-NEXT: ret
4363+
;
4364+
; VLOPT-LABEL: vfwmacc_vv:
4365+
; VLOPT: # %bb.0:
4366+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4367+
; VLOPT-NEXT: vfwmacc.vv v8, v12, v14
4368+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4369+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4370+
; VLOPT-NEXT: ret
4371+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4372+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4373+
ret <vscale x 4 x double> %2
4374+
}
4375+
4376+
define <vscale x 4 x double> @vfwmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4377+
; NOVLOPT-LABEL: vfwmacc_vf:
4378+
; NOVLOPT: # %bb.0:
4379+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4380+
; NOVLOPT-NEXT: vfwmacc.vf v8, fa0, v12
4381+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4382+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4383+
; NOVLOPT-NEXT: ret
4384+
;
4385+
; VLOPT-LABEL: vfwmacc_vf:
4386+
; VLOPT: # %bb.0:
4387+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4388+
; VLOPT-NEXT: vfwmacc.vf v8, fa0, v12
4389+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4390+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4391+
; VLOPT-NEXT: ret
4392+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4393+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4394+
ret <vscale x 4 x double> %2
4395+
}
4396+
4397+
define <vscale x 4 x double> @vfwnmacc_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4398+
; NOVLOPT-LABEL: vfwnmacc_vv:
4399+
; NOVLOPT: # %bb.0:
4400+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4401+
; NOVLOPT-NEXT: vfwnmacc.vv v8, v12, v14
4402+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4403+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4404+
; NOVLOPT-NEXT: ret
4405+
;
4406+
; VLOPT-LABEL: vfwnmacc_vv:
4407+
; VLOPT: # %bb.0:
4408+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4409+
; VLOPT-NEXT: vfwnmacc.vv v8, v12, v14
4410+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4411+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4412+
; VLOPT-NEXT: ret
4413+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4414+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4415+
ret <vscale x 4 x double> %2
4416+
}
4417+
4418+
define <vscale x 4 x double> @vfwnmacc_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4419+
; NOVLOPT-LABEL: vfwnmacc_vf:
4420+
; NOVLOPT: # %bb.0:
4421+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4422+
; NOVLOPT-NEXT: vfwnmacc.vf v8, fa0, v12
4423+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4424+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4425+
; NOVLOPT-NEXT: ret
4426+
;
4427+
; VLOPT-LABEL: vfwnmacc_vf:
4428+
; VLOPT: # %bb.0:
4429+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4430+
; VLOPT-NEXT: vfwnmacc.vf v8, fa0, v12
4431+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4432+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4433+
; VLOPT-NEXT: ret
4434+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmacc(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4435+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4436+
ret <vscale x 4 x double> %2
4437+
}
4438+
4439+
define <vscale x 4 x double> @vfwmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4440+
; NOVLOPT-LABEL: vfwmsac_vv:
4441+
; NOVLOPT: # %bb.0:
4442+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4443+
; NOVLOPT-NEXT: vfwmsac.vv v8, v12, v14
4444+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4445+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4446+
; NOVLOPT-NEXT: ret
4447+
;
4448+
; VLOPT-LABEL: vfwmsac_vv:
4449+
; VLOPT: # %bb.0:
4450+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4451+
; VLOPT-NEXT: vfwmsac.vv v8, v12, v14
4452+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4453+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4454+
; VLOPT-NEXT: ret
4455+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4456+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4457+
ret <vscale x 4 x double> %2
4458+
}
4459+
4460+
define <vscale x 4 x double> @vfwmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4461+
; NOVLOPT-LABEL: vfwmsac_vf:
4462+
; NOVLOPT: # %bb.0:
4463+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4464+
; NOVLOPT-NEXT: vfwmsac.vf v8, fa0, v12
4465+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4466+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4467+
; NOVLOPT-NEXT: ret
4468+
;
4469+
; VLOPT-LABEL: vfwmsac_vf:
4470+
; VLOPT: # %bb.0:
4471+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4472+
; VLOPT-NEXT: vfwmsac.vf v8, fa0, v12
4473+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4474+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4475+
; VLOPT-NEXT: ret
4476+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4477+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4478+
ret <vscale x 4 x double> %2
4479+
}
4480+
4481+
define <vscale x 4 x double> @vfwnmsac_vv(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4482+
; NOVLOPT-LABEL: vfwnmsac_vv:
4483+
; NOVLOPT: # %bb.0:
4484+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4485+
; NOVLOPT-NEXT: vfwnmsac.vv v8, v12, v14
4486+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4487+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4488+
; NOVLOPT-NEXT: ret
4489+
;
4490+
; VLOPT-LABEL: vfwnmsac_vv:
4491+
; VLOPT: # %bb.0:
4492+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4493+
; VLOPT-NEXT: vfwnmsac.vv v8, v12, v14
4494+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4495+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4496+
; VLOPT-NEXT: ret
4497+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4498+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4499+
ret <vscale x 4 x double> %2
4500+
}
4501+
4502+
define <vscale x 4 x double> @vfwnmsac_vf(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, <vscale x 4 x double> %d, iXLen %vl) {
4503+
; NOVLOPT-LABEL: vfwnmsac_vf:
4504+
; NOVLOPT: # %bb.0:
4505+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
4506+
; NOVLOPT-NEXT: vfwnmsac.vf v8, fa0, v12
4507+
; NOVLOPT-NEXT: vsetvli zero, a0, e64, m4, ta, ma
4508+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v16
4509+
; NOVLOPT-NEXT: ret
4510+
;
4511+
; VLOPT-LABEL: vfwnmsac_vf:
4512+
; VLOPT: # %bb.0:
4513+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
4514+
; VLOPT-NEXT: vfwnmsac.vf v8, fa0, v12
4515+
; VLOPT-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4516+
; VLOPT-NEXT: vfadd.vv v8, v8, v16
4517+
; VLOPT-NEXT: ret
4518+
%1 = call <vscale x 4 x double> @llvm.riscv.vfwnmsac(<vscale x 4 x double> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 0)
4519+
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %d, iXLen 7, iXLen %vl)
4520+
ret <vscale x 4 x double> %2
4521+
}
4522+
4523+
define <vscale x 4 x float> @vfwmaccbf16_vv(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) {
4524+
; NOVLOPT-LABEL: vfwmaccbf16_vv:
4525+
; NOVLOPT: # %bb.0:
4526+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
4527+
; NOVLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11
4528+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
4529+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
4530+
; NOVLOPT-NEXT: ret
4531+
;
4532+
; VLOPT-LABEL: vfwmaccbf16_vv:
4533+
; VLOPT: # %bb.0:
4534+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
4535+
; VLOPT-NEXT: vfwmaccbf16.vv v8, v10, v11
4536+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4537+
; VLOPT-NEXT: vfadd.vv v8, v8, v12
4538+
; VLOPT-NEXT: ret
4539+
%1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0)
4540+
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
4541+
ret <vscale x 4 x float> %2
4542+
}
4543+
4544+
define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, <vscale x 4 x float> %d, iXLen %vl) {
4545+
; NOVLOPT-LABEL: vfwmaccbf16_vf:
4546+
; NOVLOPT: # %bb.0:
4547+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
4548+
; NOVLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10
4549+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
4550+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
4551+
; NOVLOPT-NEXT: ret
4552+
;
4553+
; VLOPT-LABEL: vfwmaccbf16_vf:
4554+
; VLOPT: # %bb.0:
4555+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
4556+
; VLOPT-NEXT: vfwmaccbf16.vf v8, fa0, v10
4557+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4558+
; VLOPT-NEXT: vfadd.vv v8, v8, v12
4559+
; VLOPT-NEXT: ret
4560+
%1 = call <vscale x 4 x float> @llvm.riscv.vfwmaccbf16(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c, iXLen 7, iXLen -1, iXLen 0)
4561+
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
4562+
ret <vscale x 4 x float> %2
4563+
}

0 commit comments

Comments
 (0)