Skip to content

Commit 8ce81f1

Browse files
authored
[LegalizeVectorOps][RISCV] Use VP_FP_EXTEND/ROUND when promoting VP_FP* operations. (#122784)
This preserves the original VL leading to more reuse of VL for vsetvli. The VLOptimizer can also clean up a lot of this, but I'm not sure if it gets all of it. There are some regressions in here from propagating the mask too, but I'm not sure if that's a concern.
1 parent cd264f0 commit 8ce81f1

33 files changed

+10284
-9783
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,17 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
737737
.getVectorElementType()
738738
.isFloatingPoint() &&
739739
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
740-
Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
740+
if (ISD::isVPOpcode(Node->getOpcode())) {
741+
unsigned EVLIdx =
742+
*ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
743+
unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
744+
Operands[j] =
745+
DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j),
746+
Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
747+
} else {
748+
Operands[j] =
749+
DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
750+
}
741751
else
742752
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
743753
else
@@ -750,8 +760,15 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
750760
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
751761
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
752762
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
753-
Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
754-
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
763+
if (ISD::isVPOpcode(Node->getOpcode())) {
764+
unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
765+
unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
766+
Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res,
767+
Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
768+
} else {
769+
Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
770+
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
771+
}
755772
else
756773
Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
757774

llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll

Lines changed: 310 additions & 234 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll

Lines changed: 74 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,27 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
3030
;
3131
; ZVFHMIN-LABEL: vp_ceil_v2f16:
3232
; ZVFHMIN: # %bb.0:
33-
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
34-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
35-
; ZVFHMIN-NEXT: lui a1, 307200
36-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
37-
; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t
38-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
33+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
34+
; ZVFHMIN-NEXT: vmv1r.v v9, v0
35+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
36+
; ZVFHMIN-NEXT: lui a0, 307200
37+
; ZVFHMIN-NEXT: vmv1r.v v8, v0
38+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
39+
; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t
40+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
3941
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
40-
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
42+
; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t
4143
; ZVFHMIN-NEXT: fsrmi a0, 3
44+
; ZVFHMIN-NEXT: vmv1r.v v0, v8
4245
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
43-
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
46+
; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t
4447
; ZVFHMIN-NEXT: fsrm a0
45-
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
48+
; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t
4649
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
47-
; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
48-
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
49-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
50+
; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t
51+
; ZVFHMIN-NEXT: vmv1r.v v0, v9
52+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
53+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
5054
; ZVFHMIN-NEXT: ret
5155
%v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
5256
ret <2 x half> %v
@@ -70,20 +74,20 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
7074
;
7175
; ZVFHMIN-LABEL: vp_ceil_v2f16_unmasked:
7276
; ZVFHMIN: # %bb.0:
73-
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
77+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
7478
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
75-
; ZVFHMIN-NEXT: lui a1, 307200
76-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
79+
; ZVFHMIN-NEXT: lui a0, 307200
80+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
7781
; ZVFHMIN-NEXT: vfabs.v v8, v9
78-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
82+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
7983
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
8084
; ZVFHMIN-NEXT: fsrmi a0, 3
8185
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
8286
; ZVFHMIN-NEXT: fsrm a0
8387
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
8488
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
8589
; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
86-
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
90+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
8791
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
8892
; ZVFHMIN-NEXT: ret
8993
%v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
@@ -112,23 +116,27 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
112116
;
113117
; ZVFHMIN-LABEL: vp_ceil_v4f16:
114118
; ZVFHMIN: # %bb.0:
115-
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
116-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
117-
; ZVFHMIN-NEXT: lui a1, 307200
118-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
119-
; ZVFHMIN-NEXT: vfabs.v v8, v9, v0.t
120-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
119+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
120+
; ZVFHMIN-NEXT: vmv1r.v v9, v0
121+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
122+
; ZVFHMIN-NEXT: lui a0, 307200
123+
; ZVFHMIN-NEXT: vmv1r.v v8, v0
124+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
125+
; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t
126+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
121127
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
122-
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t
128+
; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t
123129
; ZVFHMIN-NEXT: fsrmi a0, 3
130+
; ZVFHMIN-NEXT: vmv.v.v v0, v8
124131
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
125-
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
132+
; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t
126133
; ZVFHMIN-NEXT: fsrm a0
127-
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
134+
; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t
128135
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
129-
; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
130-
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
131-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
136+
; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t
137+
; ZVFHMIN-NEXT: vmv1r.v v0, v9
138+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
139+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
132140
; ZVFHMIN-NEXT: ret
133141
%v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
134142
ret <4 x half> %v
@@ -152,20 +160,20 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
152160
;
153161
; ZVFHMIN-LABEL: vp_ceil_v4f16_unmasked:
154162
; ZVFHMIN: # %bb.0:
155-
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
163+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
156164
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
157-
; ZVFHMIN-NEXT: lui a1, 307200
158-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
165+
; ZVFHMIN-NEXT: lui a0, 307200
166+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
159167
; ZVFHMIN-NEXT: vfabs.v v8, v9
160-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
168+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
161169
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
162170
; ZVFHMIN-NEXT: fsrmi a0, 3
163171
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
164172
; ZVFHMIN-NEXT: fsrm a0
165173
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
166174
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
167175
; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
168-
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
176+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
169177
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
170178
; ZVFHMIN-NEXT: ret
171179
%v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
@@ -194,25 +202,27 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
194202
;
195203
; ZVFHMIN-LABEL: vp_ceil_v8f16:
196204
; ZVFHMIN: # %bb.0:
197-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
205+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
198206
; ZVFHMIN-NEXT: vmv1r.v v9, v0
199-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
200-
; ZVFHMIN-NEXT: lui a1, 307200
201-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
207+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
208+
; ZVFHMIN-NEXT: lui a0, 307200
209+
; ZVFHMIN-NEXT: vmv1r.v v8, v0
210+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
202211
; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
203-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
212+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
204213
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
205-
; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t
214+
; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t
206215
; ZVFHMIN-NEXT: fsrmi a0, 3
207-
; ZVFHMIN-NEXT: vmv1r.v v0, v9
216+
; ZVFHMIN-NEXT: vmv1r.v v0, v8
208217
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
209218
; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
210219
; ZVFHMIN-NEXT: fsrm a0
211220
; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
212221
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
213222
; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
214-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
215-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
223+
; ZVFHMIN-NEXT: vmv1r.v v0, v9
224+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
225+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
216226
; ZVFHMIN-NEXT: ret
217227
%v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
218228
ret <8 x half> %v
@@ -236,20 +246,20 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
236246
;
237247
; ZVFHMIN-LABEL: vp_ceil_v8f16_unmasked:
238248
; ZVFHMIN: # %bb.0:
239-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
249+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
240250
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
241-
; ZVFHMIN-NEXT: lui a1, 307200
242-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
251+
; ZVFHMIN-NEXT: lui a0, 307200
252+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
243253
; ZVFHMIN-NEXT: vfabs.v v8, v10
244-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
254+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
245255
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
246256
; ZVFHMIN-NEXT: fsrmi a0, 3
247257
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
248258
; ZVFHMIN-NEXT: fsrm a0
249259
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
250260
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
251261
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
252-
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
262+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
253263
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
254264
; ZVFHMIN-NEXT: ret
255265
%v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
@@ -280,25 +290,27 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e
280290
;
281291
; ZVFHMIN-LABEL: vp_ceil_v16f16:
282292
; ZVFHMIN: # %bb.0:
283-
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
293+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
284294
; ZVFHMIN-NEXT: vmv1r.v v10, v0
285-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
286-
; ZVFHMIN-NEXT: lui a1, 307200
287-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
295+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
296+
; ZVFHMIN-NEXT: lui a0, 307200
297+
; ZVFHMIN-NEXT: vmv1r.v v8, v0
298+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
288299
; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
289-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
300+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
290301
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
291-
; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t
302+
; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t
292303
; ZVFHMIN-NEXT: fsrmi a0, 3
293-
; ZVFHMIN-NEXT: vmv1r.v v0, v10
304+
; ZVFHMIN-NEXT: vmv1r.v v0, v8
294305
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
295306
; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
296307
; ZVFHMIN-NEXT: fsrm a0
297308
; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
298309
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
299310
; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
300-
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
301-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
311+
; ZVFHMIN-NEXT: vmv1r.v v0, v10
312+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
313+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
302314
; ZVFHMIN-NEXT: ret
303315
%v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
304316
ret <16 x half> %v
@@ -322,20 +334,20 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
322334
;
323335
; ZVFHMIN-LABEL: vp_ceil_v16f16_unmasked:
324336
; ZVFHMIN: # %bb.0:
325-
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
337+
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
326338
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
327-
; ZVFHMIN-NEXT: lui a1, 307200
328-
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
339+
; ZVFHMIN-NEXT: lui a0, 307200
340+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
329341
; ZVFHMIN-NEXT: vfabs.v v8, v12
330-
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
342+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
331343
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
332344
; ZVFHMIN-NEXT: fsrmi a0, 3
333345
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
334346
; ZVFHMIN-NEXT: fsrm a0
335347
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
336348
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
337349
; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
338-
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
350+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
339351
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
340352
; ZVFHMIN-NEXT: ret
341353
%v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)

0 commit comments

Comments
 (0)