Skip to content

Commit 8e972ef

Browse files
authored
[RISCV] Add scalable vector patterns for vfwmaccbf16.v{v,f} (#106771)
We can reuse the patterns for vfwmacc.v{v,f} as long as we swap out fpext_oneuse for riscv_fpextend_bf16 in the scalar case.
1 parent 360e4ab commit 8e972ef

File tree

3 files changed

+254
-5
lines changed

3 files changed

+254
-5
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -676,13 +676,18 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM<SDNode op,
676676
: VPatWidenBinaryFPSDNode_VV_VF_RM<op, instruction_name>,
677677
VPatWidenBinaryFPSDNode_WV_WF_RM<op, instruction_name>;
678678

679-
multiclass VPatWidenFPMulAccSDNode_VV_VF_RM<string instruction_name> {
680-
foreach vtiToWti = AllWidenableFloatVectors in {
679+
multiclass VPatWidenFPMulAccSDNode_VV_VF_RM<string instruction_name,
680+
list <VTypeInfoToWide> vtiToWtis,
681+
PatFrags extop> {
682+
foreach vtiToWti = vtiToWtis in {
681683
defvar vti = vtiToWti.Vti;
682684
defvar wti = vtiToWti.Wti;
683685
defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
684686
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
685-
GetVTypePredicates<wti>.Predicates) in {
687+
GetVTypePredicates<wti>.Predicates,
688+
!if(!eq(vti.Scalar, bf16),
689+
[HasStdExtZvfbfwma],
690+
[])) in {
686691
def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
687692
(vti.Vector vti.RegClass:$rs1),
688693
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -697,7 +702,7 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF_RM<string instruction_name> {
697702
FRM_DYN,
698703
vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
699704
def : Pat<(fma (wti.Vector (SplatFPOp
700-
(fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1)))),
705+
(extop (vti.Scalar vti.ScalarRegClass:$rs1)))),
701706
(wti.Vector (riscv_fpextend_vl_oneuse
702707
(vti.Vector vti.RegClass:$rs2),
703708
(vti.Mask true_mask), (XLenVT srcvalue))),
@@ -1284,7 +1289,12 @@ foreach fvti = AllFloatVectors in {
12841289
}
12851290

12861291
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
1287-
defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACC">;
1292+
defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACC",
1293+
AllWidenableFloatVectors,
1294+
fpext_oneuse>;
1295+
defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACCBF16",
1296+
AllWidenableBFloatToFloatVectors,
1297+
riscv_fpextend_bf16_oneuse>;
12881298
defm : VPatWidenFPNegMulAccSDNode_VV_VF_RM<"PseudoVFWNMACC">;
12891299
defm : VPatWidenFPMulSacSDNode_VV_VF_RM<"PseudoVFWMSAC">;
12901300
defm : VPatWidenFPNegMulSacSDNode_VV_VF_RM<"PseudoVFWNMSAC">;

llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ def riscv_fpround_bf16
2626
: SDNode<"RISCVISD::FP_ROUND_BF16", SDT_RISCVFP_ROUND_BF16>;
2727
def riscv_fpextend_bf16
2828
: SDNode<"RISCVISD::FP_EXTEND_BF16", SDT_RISCVFP_EXTEND_BF16>;
29+
def riscv_fpextend_bf16_oneuse : PatFrag<(ops node:$A),
30+
(riscv_fpextend_bf16 node:$A), [{
31+
return N->hasOneUse();
32+
}]>;
2933

3034
//===----------------------------------------------------------------------===//
3135
// Instructions
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
4+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
5+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
6+
7+
define <vscale x 1 x float> @vfwmaccbf16_vv_nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c) {
8+
; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv1f32:
9+
; ZVFBFWMA: # %bb.0:
10+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
11+
; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10
12+
; ZVFBFWMA-NEXT: ret
13+
;
14+
; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv1f32:
15+
; ZVFBFMIN: # %bb.0:
16+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
17+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9
18+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10
19+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
20+
; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9
21+
; ZVFBFMIN-NEXT: ret
22+
%b.ext = fpext <vscale x 1 x bfloat> %b to <vscale x 1 x float>
23+
%c.ext = fpext <vscale x 1 x bfloat> %c to <vscale x 1 x float>
24+
%res = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> %b.ext, <vscale x 1 x float> %c.ext, <vscale x 1 x float> %a)
25+
ret <vscale x 1 x float> %res
26+
}
27+
28+
define <vscale x 1 x float> @vfwmaccbf16_vf_nxv1f32(<vscale x 1 x float> %a, bfloat %b, <vscale x 1 x bfloat> %c) {
29+
; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv1f32:
30+
; ZVFBFWMA: # %bb.0:
31+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
32+
; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v9
33+
; ZVFBFWMA-NEXT: ret
34+
;
35+
; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv1f32:
36+
; ZVFBFMIN: # %bb.0:
37+
; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
38+
; ZVFBFMIN-NEXT: slli a0, a0, 16
39+
; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
40+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
41+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
42+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
43+
; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v10
44+
; ZVFBFMIN-NEXT: ret
45+
%b.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
46+
%b.splat = shufflevector <vscale x 1 x bfloat> %b.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
47+
%b.ext = fpext <vscale x 1 x bfloat> %b.splat to <vscale x 1 x float>
48+
%c.ext = fpext <vscale x 1 x bfloat> %c to <vscale x 1 x float>
49+
%res = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> %b.ext, <vscale x 1 x float> %c.ext, <vscale x 1 x float> %a)
50+
ret <vscale x 1 x float> %res
51+
}
52+
53+
define <vscale x 2 x float> @vfwmaccbf16_vv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c) {
54+
; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv2f32:
55+
; ZVFBFWMA: # %bb.0:
56+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
57+
; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10
58+
; ZVFBFWMA-NEXT: ret
59+
;
60+
; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv2f32:
61+
; ZVFBFMIN: # %bb.0:
62+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
63+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9
64+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10
65+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
66+
; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9
67+
; ZVFBFMIN-NEXT: ret
68+
%b.ext = fpext <vscale x 2 x bfloat> %b to <vscale x 2 x float>
69+
%c.ext = fpext <vscale x 2 x bfloat> %c to <vscale x 2 x float>
70+
%res = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %b.ext, <vscale x 2 x float> %c.ext, <vscale x 2 x float> %a)
71+
ret <vscale x 2 x float> %res
72+
}
73+
74+
define <vscale x 2 x float> @vfwmaccbf16_vf_nxv2f32(<vscale x 2 x float> %a, bfloat %b, <vscale x 2 x bfloat> %c) {
75+
; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv2f32:
76+
; ZVFBFWMA: # %bb.0:
77+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
78+
; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v9
79+
; ZVFBFWMA-NEXT: ret
80+
;
81+
; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv2f32:
82+
; ZVFBFMIN: # %bb.0:
83+
; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
84+
; ZVFBFMIN-NEXT: slli a0, a0, 16
85+
; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
86+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
87+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
88+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
89+
; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v10
90+
; ZVFBFMIN-NEXT: ret
91+
%b.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
92+
%b.splat = shufflevector <vscale x 2 x bfloat> %b.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
93+
%b.ext = fpext <vscale x 2 x bfloat> %b.splat to <vscale x 2 x float>
94+
%c.ext = fpext <vscale x 2 x bfloat> %c to <vscale x 2 x float>
95+
%res = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %b.ext, <vscale x 2 x float> %c.ext, <vscale x 2 x float> %a)
96+
ret <vscale x 2 x float> %res
97+
}
98+
99+
define <vscale x 4 x float> @vfwmaccbf16_vv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c) {
100+
; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv4f32:
101+
; ZVFBFWMA: # %bb.0:
102+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
103+
; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v11
104+
; ZVFBFWMA-NEXT: ret
105+
;
106+
; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv4f32:
107+
; ZVFBFMIN: # %bb.0:
108+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
109+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
110+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v11
111+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
112+
; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14
113+
; ZVFBFMIN-NEXT: ret
114+
%b.ext = fpext <vscale x 4 x bfloat> %b to <vscale x 4 x float>
115+
%c.ext = fpext <vscale x 4 x bfloat> %c to <vscale x 4 x float>
116+
%res = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %b.ext, <vscale x 4 x float> %c.ext, <vscale x 4 x float> %a)
117+
ret <vscale x 4 x float> %res
118+
}
119+
120+
define <vscale x 4 x float> @vfwmaccbf16_vf_nxv4f32(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c) {
121+
; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv4f32:
122+
; ZVFBFWMA: # %bb.0:
123+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
124+
; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v10
125+
; ZVFBFWMA-NEXT: ret
126+
;
127+
; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv4f32:
128+
; ZVFBFMIN: # %bb.0:
129+
; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
130+
; ZVFBFMIN-NEXT: slli a0, a0, 16
131+
; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
132+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
133+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
134+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
135+
; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v12
136+
; ZVFBFMIN-NEXT: ret
137+
%b.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
138+
%b.splat = shufflevector <vscale x 4 x bfloat> %b.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
139+
%b.ext = fpext <vscale x 4 x bfloat> %b.splat to <vscale x 4 x float>
140+
%c.ext = fpext <vscale x 4 x bfloat> %c to <vscale x 4 x float>
141+
%res = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %b.ext, <vscale x 4 x float> %c.ext, <vscale x 4 x float> %a)
142+
ret <vscale x 4 x float> %res
143+
}
144+
145+
define <vscale x 8 x float> @vfwmaccbf16_vv_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) {
146+
; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv8f32:
147+
; ZVFBFWMA: # %bb.0:
148+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
149+
; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v12, v14
150+
; ZVFBFWMA-NEXT: ret
151+
;
152+
; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv8f32:
153+
; ZVFBFMIN: # %bb.0:
154+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
155+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
156+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v14
157+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
158+
; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20
159+
; ZVFBFMIN-NEXT: ret
160+
%b.ext = fpext <vscale x 8 x bfloat> %b to <vscale x 8 x float>
161+
%c.ext = fpext <vscale x 8 x bfloat> %c to <vscale x 8 x float>
162+
%res = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> %b.ext, <vscale x 8 x float> %c.ext, <vscale x 8 x float> %a)
163+
ret <vscale x 8 x float> %res
164+
}
165+
166+
define <vscale x 8 x float> @vfwmaccbf16_vf_nxv8f32(<vscale x 8 x float> %a, bfloat %b, <vscale x 8 x bfloat> %c) {
167+
; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv8f32:
168+
; ZVFBFWMA: # %bb.0:
169+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
170+
; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v12
171+
; ZVFBFWMA-NEXT: ret
172+
;
173+
; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv8f32:
174+
; ZVFBFMIN: # %bb.0:
175+
; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
176+
; ZVFBFMIN-NEXT: slli a0, a0, 16
177+
; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
178+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
179+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
180+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
181+
; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v16
182+
; ZVFBFMIN-NEXT: ret
183+
%b.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
184+
%b.splat = shufflevector <vscale x 8 x bfloat> %b.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
185+
%b.ext = fpext <vscale x 8 x bfloat> %b.splat to <vscale x 8 x float>
186+
%c.ext = fpext <vscale x 8 x bfloat> %c to <vscale x 8 x float>
187+
%res = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> %b.ext, <vscale x 8 x float> %c.ext, <vscale x 8 x float> %a)
188+
ret <vscale x 8 x float> %res
189+
}
190+
191+
define <vscale x 16 x float> @vfwmaccbf16_vv_nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c) {
192+
; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv16f32:
193+
; ZVFBFWMA: # %bb.0:
194+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
195+
; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v16, v20
196+
; ZVFBFWMA-NEXT: ret
197+
;
198+
; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv16f32:
199+
; ZVFBFMIN: # %bb.0:
200+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
201+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
202+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v20
203+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
204+
; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0
205+
; ZVFBFMIN-NEXT: ret
206+
%b.ext = fpext <vscale x 16 x bfloat> %b to <vscale x 16 x float>
207+
%c.ext = fpext <vscale x 16 x bfloat> %c to <vscale x 16 x float>
208+
%res = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> %b.ext, <vscale x 16 x float> %c.ext, <vscale x 16 x float> %a)
209+
ret <vscale x 16 x float> %res
210+
}
211+
212+
define <vscale x 16 x float> @vfwmaccbf16_vf_nxv16f32(<vscale x 16 x float> %a, bfloat %b, <vscale x 16 x bfloat> %c) {
213+
; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv16f32:
214+
; ZVFBFWMA: # %bb.0:
215+
; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
216+
; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v16
217+
; ZVFBFWMA-NEXT: ret
218+
;
219+
; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv16f32:
220+
; ZVFBFMIN: # %bb.0:
221+
; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
222+
; ZVFBFMIN-NEXT: slli a0, a0, 16
223+
; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
224+
; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
225+
; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
226+
; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
227+
; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v24
228+
; ZVFBFMIN-NEXT: ret
229+
%b.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
230+
%b.splat = shufflevector <vscale x 16 x bfloat> %b.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
231+
%b.ext = fpext <vscale x 16 x bfloat> %b.splat to <vscale x 16 x float>
232+
%c.ext = fpext <vscale x 16 x bfloat> %c to <vscale x 16 x float>
233+
%res = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> %b.ext, <vscale x 16 x float> %c.ext, <vscale x 16 x float> %a)
234+
ret <vscale x 16 x float> %res
235+
}

0 commit comments

Comments
 (0)