Skip to content

Commit ac16d96

Browse files
committed
[RISCV] Remove hasSideEffects=1 for vsetvli pseudos
In a similar vein to #90049, it looks like we currently model all of the effects of a vsetvli pseudo: * VL and VTYPE are marked as defs * VL preserving x0,x0 vsetvlis doesn't get emitted until RISCVInsertVSETVLI, and when they are they have implicit-defs on VL * Regular vector pseudos are fully modelled too: Before RISCVInsertVSETVLI they can be moved between vsetvli pseudos because we will eventually insert vsetvlis to correct VL and VTYPE. Afterwards, they will have implicit-defs on VL and VTYPE. I may be missing something but otherwise it seems ok to remove hasSideEffects=1. This gives us some improvements like sinking in vsetvli-insert-crossbb.ll. We need to update RISCVDeadRegisterDefinitions to keep handling vsetvli pseudos since it only operates on instructions with unmodelled side effects.
1 parent 48b6f4a commit ac16d96

37 files changed

+1118
-1175
lines changed

llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
7272
// are reserved for HINT instructions.
7373
const MCInstrDesc &Desc = MI.getDesc();
7474
if (!Desc.mayLoad() && !Desc.mayStore() &&
75-
!Desc.hasUnmodeledSideEffects())
75+
!Desc.hasUnmodeledSideEffects() &&
76+
MI.getOpcode() != RISCV::PseudoVSETVLI &&
77+
MI.getOpcode() != RISCV::PseudoVSETIVLI)
7678
continue;
7779
// For PseudoVSETVLIX0, Rd = X0 has special meaning.
7880
if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6181,7 +6181,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0,
61816181
//===----------------------------------------------------------------------===//
61826182

61836183
// Pseudos.
6184-
let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
6184+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
61856185
// Due to rs1=X0 having special meaning, we need a GPRNoX0 register class for
61866186
// the when we aren't using one of the special X0 encodings. Otherwise it could
61876187
// be accidentally be made X0 by MachineIR optimizations. To satisfy the

llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -236,41 +236,53 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
236236
; CHECK-NEXT: addi sp, sp, -16
237237
; CHECK-NEXT: .cfi_def_cfa_offset 16
238238
; CHECK-NEXT: csrr a1, vlenb
239-
; CHECK-NEXT: slli a1, a1, 4
239+
; CHECK-NEXT: li a3, 24
240+
; CHECK-NEXT: mul a1, a1, a3
240241
; CHECK-NEXT: sub sp, sp, a1
241-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
242+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
242243
; CHECK-NEXT: csrr a1, vlenb
243-
; CHECK-NEXT: slli a1, a1, 3
244+
; CHECK-NEXT: slli a1, a1, 4
244245
; CHECK-NEXT: add a1, sp, a1
245246
; CHECK-NEXT: addi a1, a1, 16
246247
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
247248
; CHECK-NEXT: vmv8r.v v24, v8
248249
; CHECK-NEXT: csrr a1, vlenb
249250
; CHECK-NEXT: slli a1, a1, 3
250251
; CHECK-NEXT: add a3, a2, a1
251-
; CHECK-NEXT: vl8re32.v v8, (a3)
252-
; CHECK-NEXT: addi a3, sp, 16
253-
; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
254252
; CHECK-NEXT: add a1, a0, a1
255-
; CHECK-NEXT: vl8re32.v v0, (a0)
256253
; CHECK-NEXT: vl8re32.v v8, (a1)
257-
; CHECK-NEXT: vl8re32.v v16, (a2)
254+
; CHECK-NEXT: csrr a1, vlenb
255+
; CHECK-NEXT: slli a1, a1, 3
256+
; CHECK-NEXT: add a1, sp, a1
257+
; CHECK-NEXT: addi a1, a1, 16
258+
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
259+
; CHECK-NEXT: vl8re32.v v0, (a0)
258260
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
261+
; CHECK-NEXT: vl8re32.v v8, (a3)
262+
; CHECK-NEXT: addi a0, sp, 16
263+
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
264+
; CHECK-NEXT: vl8re32.v v16, (a2)
259265
; CHECK-NEXT: vadd.vv v0, v24, v0
260266
; CHECK-NEXT: csrr a0, vlenb
261-
; CHECK-NEXT: slli a0, a0, 3
267+
; CHECK-NEXT: slli a0, a0, 4
262268
; CHECK-NEXT: add a0, sp, a0
263269
; CHECK-NEXT: addi a0, a0, 16
264270
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
265-
; CHECK-NEXT: vadd.vv v8, v24, v8
271+
; CHECK-NEXT: csrr a0, vlenb
272+
; CHECK-NEXT: slli a0, a0, 3
273+
; CHECK-NEXT: add a0, sp, a0
274+
; CHECK-NEXT: addi a0, a0, 16
275+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
276+
; CHECK-NEXT: vadd.vv v24, v24, v8
266277
; CHECK-NEXT: addi a0, sp, 16
267-
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
268-
; CHECK-NEXT: vadd.vv v8, v8, v24
278+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
279+
; CHECK-NEXT: vadd.vv v8, v24, v8
269280
; CHECK-NEXT: vadd.vv v24, v0, v16
270281
; CHECK-NEXT: vadd.vx v16, v8, a4
271282
; CHECK-NEXT: vadd.vx v8, v24, a4
272283
; CHECK-NEXT: csrr a0, vlenb
273-
; CHECK-NEXT: slli a0, a0, 4
284+
; CHECK-NEXT: li a1, 24
285+
; CHECK-NEXT: mul a0, a0, a1
274286
; CHECK-NEXT: add sp, sp, a0
275287
; CHECK-NEXT: addi sp, sp, 16
276288
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/calling-conv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
3939
; RV32-NEXT: vs8r.v v8, (a0)
4040
; RV32-NEXT: csrr a1, vlenb
4141
; RV32-NEXT: slli a1, a1, 3
42-
; RV32-NEXT: add a0, a0, a1
43-
; RV32-NEXT: vs8r.v v16, (a0)
42+
; RV32-NEXT: add a1, a0, a1
4443
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
4544
; RV32-NEXT: vmv.v.i v8, 0
4645
; RV32-NEXT: addi a0, sp, 128
46+
; RV32-NEXT: vs8r.v v16, (a1)
4747
; RV32-NEXT: vmv.v.i v16, 0
4848
; RV32-NEXT: call callee_scalable_vector_split_indirect
4949
; RV32-NEXT: addi sp, s0, -144
@@ -70,11 +70,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
7070
; RV64-NEXT: vs8r.v v8, (a0)
7171
; RV64-NEXT: csrr a1, vlenb
7272
; RV64-NEXT: slli a1, a1, 3
73-
; RV64-NEXT: add a0, a0, a1
74-
; RV64-NEXT: vs8r.v v16, (a0)
73+
; RV64-NEXT: add a1, a0, a1
7574
; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
7675
; RV64-NEXT: vmv.v.i v8, 0
7776
; RV64-NEXT: addi a0, sp, 128
77+
; RV64-NEXT: vs8r.v v16, (a1)
7878
; RV64-NEXT: vmv.v.i v16, 0
7979
; RV64-NEXT: call callee_scalable_vector_split_indirect
8080
; RV64-NEXT: addi sp, s0, -144

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp {
1919
; CHECK-NEXT: frflags a0
2020
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
2121
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
22-
; CHECK-NEXT: fsflags a0
2322
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
2423
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
24+
; CHECK-NEXT: fsflags a0
2525
; CHECK-NEXT: ret
2626
%r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
2727
ret <2 x half> %r
@@ -42,9 +42,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp {
4242
; CHECK-NEXT: frflags a0
4343
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4444
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
45-
; CHECK-NEXT: fsflags a0
4645
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
4746
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
47+
; CHECK-NEXT: fsflags a0
4848
; CHECK-NEXT: ret
4949
%r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
5050
ret <4 x half> %r
@@ -65,9 +65,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp {
6565
; CHECK-NEXT: frflags a0
6666
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
6767
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
68-
; CHECK-NEXT: fsflags a0
6968
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7069
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
70+
; CHECK-NEXT: fsflags a0
7171
; CHECK-NEXT: ret
7272
%r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
7373
ret <8 x half> %r
@@ -88,9 +88,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp {
8888
; CHECK-NEXT: frflags a0
8989
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
9090
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
91-
; CHECK-NEXT: fsflags a0
9291
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
9392
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
93+
; CHECK-NEXT: fsflags a0
9494
; CHECK-NEXT: ret
9595
%r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
9696
ret <16 x half> %r
@@ -112,9 +112,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp {
112112
; CHECK-NEXT: frflags a0
113113
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
114114
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
115-
; CHECK-NEXT: fsflags a0
116115
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
117116
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
117+
; CHECK-NEXT: fsflags a0
118118
; CHECK-NEXT: ret
119119
%r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
120120
ret <32 x half> %r
@@ -135,9 +135,9 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp {
135135
; CHECK-NEXT: frflags a0
136136
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
137137
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
138-
; CHECK-NEXT: fsflags a0
139138
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
140139
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
140+
; CHECK-NEXT: fsflags a0
141141
; CHECK-NEXT: ret
142142
%r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
143143
ret <2 x float> %r
@@ -158,9 +158,9 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp {
158158
; CHECK-NEXT: frflags a0
159159
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
160160
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
161-
; CHECK-NEXT: fsflags a0
162161
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
163162
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
163+
; CHECK-NEXT: fsflags a0
164164
; CHECK-NEXT: ret
165165
%r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
166166
ret <4 x float> %r
@@ -181,9 +181,9 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp {
181181
; CHECK-NEXT: frflags a0
182182
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
183183
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
184-
; CHECK-NEXT: fsflags a0
185184
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
186185
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
186+
; CHECK-NEXT: fsflags a0
187187
; CHECK-NEXT: ret
188188
%r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
189189
ret <8 x float> %r
@@ -204,9 +204,9 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp {
204204
; CHECK-NEXT: frflags a0
205205
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
206206
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
207-
; CHECK-NEXT: fsflags a0
208207
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
209208
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
209+
; CHECK-NEXT: fsflags a0
210210
; CHECK-NEXT: ret
211211
%r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
212212
ret <16 x float> %r
@@ -227,9 +227,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
227227
; CHECK-NEXT: frflags a0
228228
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
229229
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
230-
; CHECK-NEXT: fsflags a0
231230
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
232231
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
232+
; CHECK-NEXT: fsflags a0
233233
; CHECK-NEXT: ret
234234
%r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
235235
ret <2 x double> %r
@@ -250,9 +250,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
250250
; CHECK-NEXT: frflags a0
251251
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
252252
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
253-
; CHECK-NEXT: fsflags a0
254253
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
255254
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
255+
; CHECK-NEXT: fsflags a0
256256
; CHECK-NEXT: ret
257257
%r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
258258
ret <4 x double> %r
@@ -273,9 +273,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp {
273273
; CHECK-NEXT: frflags a0
274274
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
275275
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
276-
; CHECK-NEXT: fsflags a0
277276
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
278277
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
278+
; CHECK-NEXT: fsflags a0
279279
; CHECK-NEXT: ret
280280
%r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
281281
ret <8 x double> %r

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -359,13 +359,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
359359
; RV32-NEXT: feq.d a0, fa3, fa3
360360
; RV32-NEXT: fmax.d fa3, fa3, fa5
361361
; RV32-NEXT: fmin.d fa3, fa3, fa4
362-
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
363-
; RV32-NEXT: fld fa2, 40(sp)
364362
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
363+
; RV32-NEXT: fld fa3, 40(sp)
365364
; RV32-NEXT: neg a0, a0
366365
; RV32-NEXT: and a0, a0, a2
367-
; RV32-NEXT: feq.d a2, fa2, fa2
368-
; RV32-NEXT: fmax.d fa3, fa2, fa5
366+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
367+
; RV32-NEXT: feq.d a2, fa3, fa3
368+
; RV32-NEXT: fmax.d fa3, fa3, fa5
369369
; RV32-NEXT: fmin.d fa3, fa3, fa4
370370
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
371371
; RV32-NEXT: fld fa3, 32(sp)
@@ -460,13 +460,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
460460
; RV64-NEXT: feq.d a0, fa3, fa3
461461
; RV64-NEXT: fmax.d fa3, fa3, fa5
462462
; RV64-NEXT: fmin.d fa3, fa3, fa4
463-
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
464-
; RV64-NEXT: fld fa2, 40(sp)
465463
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
464+
; RV64-NEXT: fld fa3, 40(sp)
466465
; RV64-NEXT: neg a0, a0
467466
; RV64-NEXT: and a0, a0, a2
468-
; RV64-NEXT: feq.d a2, fa2, fa2
469-
; RV64-NEXT: fmax.d fa3, fa2, fa5
467+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
468+
; RV64-NEXT: feq.d a2, fa3, fa3
469+
; RV64-NEXT: fmax.d fa3, fa3, fa5
470470
; RV64-NEXT: fmin.d fa3, fa3, fa4
471471
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
472472
; RV64-NEXT: fld fa3, 32(sp)
@@ -557,7 +557,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
557557
; RV32-NEXT: vslidedown.vi v8, v8, 3
558558
; RV32-NEXT: vfmv.f.s fa4, v8
559559
; RV32-NEXT: fmax.d fa4, fa4, fa3
560-
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
561560
; RV32-NEXT: fld fa2, 40(sp)
562561
; RV32-NEXT: fmin.d fa4, fa4, fa5
563562
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
@@ -566,9 +565,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
566565
; RV32-NEXT: fmin.d fa2, fa2, fa5
567566
; RV32-NEXT: fcvt.wu.d a2, fa2, rtz
568567
; RV32-NEXT: fmax.d fa4, fa4, fa3
569-
; RV32-NEXT: fld fa2, 48(sp)
570568
; RV32-NEXT: fmin.d fa4, fa4, fa5
569+
; RV32-NEXT: fld fa2, 48(sp)
571570
; RV32-NEXT: fcvt.wu.d a3, fa4, rtz
571+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
572572
; RV32-NEXT: vslide1down.vx v8, v10, a0
573573
; RV32-NEXT: fmax.d fa4, fa2, fa3
574574
; RV32-NEXT: fmin.d fa4, fa4, fa5
@@ -633,7 +633,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
633633
; RV64-NEXT: vslidedown.vi v8, v8, 3
634634
; RV64-NEXT: vfmv.f.s fa4, v8
635635
; RV64-NEXT: fmax.d fa4, fa4, fa3
636-
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
637636
; RV64-NEXT: fld fa2, 40(sp)
638637
; RV64-NEXT: fmin.d fa4, fa4, fa5
639638
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
@@ -642,9 +641,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
642641
; RV64-NEXT: fmin.d fa2, fa2, fa5
643642
; RV64-NEXT: fcvt.lu.d a2, fa2, rtz
644643
; RV64-NEXT: fmax.d fa4, fa4, fa3
645-
; RV64-NEXT: fld fa2, 48(sp)
646644
; RV64-NEXT: fmin.d fa4, fa4, fa5
645+
; RV64-NEXT: fld fa2, 48(sp)
647646
; RV64-NEXT: fcvt.lu.d a3, fa4, rtz
647+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
648648
; RV64-NEXT: vslide1down.vx v8, v10, a0
649649
; RV64-NEXT: fmax.d fa4, fa2, fa3
650650
; RV64-NEXT: fmin.d fa4, fa4, fa5

0 commit comments

Comments
 (0)