-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Remove hasSideEffects=1 for vsetvli pseudos #91319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesIn a similar vein to #90049, it looks like we currently model all of the effects of a vsetvli pseudo:
I may be missing something but otherwise it seems ok to remove hasSideEffects=1. This gives us some improvements like sinking in vsetvli-insert-crossbb.ll. We need to update RISCVDeadRegisterDefinitions to keep handling vsetvli pseudos since it only operates on instructions with unmodelled side effects. Patch is 222.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91319.diff 38 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
index 5e6b7891449fe..7de48d8218f06 100644
--- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -72,7 +72,9 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
// are reserved for HINT instructions.
const MCInstrDesc &Desc = MI.getDesc();
if (!Desc.mayLoad() && !Desc.mayStore() &&
- !Desc.hasUnmodeledSideEffects())
+ !Desc.hasUnmodeledSideEffects() &&
+ MI.getOpcode() != RISCV::PseudoVSETVLI &&
+ MI.getOpcode() != RISCV::PseudoVSETIVLI)
continue;
// For PseudoVSETVLIX0, Rd = X0 has special meaning.
if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 4adc26f628914..317a6d7d4c52f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6181,7 +6181,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0,
//===----------------------------------------------------------------------===//
// Pseudos.
-let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
// Due to rs1=X0 having special meaning, we need a GPRNoX0 register class for
// the when we aren't using one of the special X0 encodings. Otherwise it could
// be accidentally be made X0 by MachineIR optimizations. To satisfy the
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 187f758b78020..0a7fa38b0c8ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -236,11 +236,12 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -248,29 +249,40 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a3, a2, a1
-; CHECK-NEXT: vl8re32.v v8, (a3)
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: vl8re32.v v8, (a1)
-; CHECK-NEXT: vl8re32.v v16, (a2)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vl8re32.v v8, (a3)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re32.v v16, (a2)
; CHECK-NEXT: vadd.vv v0, v24, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vv v8, v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vadd.vv v24, v24, v8
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vv v8, v8, v24
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vadd.vv v8, v24, v8
; CHECK-NEXT: vadd.vv v24, v0, v16
; CHECK-NEXT: vadd.vx v16, v8, a4
; CHECK-NEXT: vadd.vx v8, v24, a4
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
index 647d3158b6167..fa62143546df6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -39,11 +39,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vs8r.v v16, (a0)
+; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: addi a0, sp, 128
+; RV32-NEXT: vs8r.v v16, (a1)
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: call callee_scalable_vector_split_indirect
; RV32-NEXT: addi sp, s0, -144
@@ -70,11 +70,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vs8r.v v16, (a0)
+; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: addi a0, sp, 128
+; RV64-NEXT: vs8r.v v16, (a1)
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: call callee_scalable_vector_split_indirect
; RV64-NEXT: addi sp, s0, -144
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
index 1b50214bbf164..9e9a8b8a4b644 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
@@ -19,9 +19,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x half> %r
@@ -42,9 +42,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x half> %r
@@ -65,9 +65,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x half> %r
@@ -88,9 +88,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x half> %r
@@ -112,9 +112,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <32 x half> %r
@@ -135,9 +135,9 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x float> %r
@@ -158,9 +158,9 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x float> %r
@@ -181,9 +181,9 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x float> %r
@@ -204,9 +204,9 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x float> %r
@@ -227,9 +227,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x double> %r
@@ -250,9 +250,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %r
@@ -273,9 +273,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x double> %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index a8e4af2d7368e..6320b07125bb0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -359,13 +359,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
+; RV32-NEXT: fld fa3, 40(sp)
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: feq.d a2, fa2, fa2
-; RV32-NEXT: fmax.d fa3, fa2, fa5
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV32-NEXT: feq.d a2, fa3, fa3
+; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
; RV32-NEXT: fld fa3, 32(sp)
@@ -460,13 +460,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: feq.d a0, fa3, fa3
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
+; RV64-NEXT: fld fa3, 40(sp)
; RV64-NEXT: neg a0, a0
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: feq.d a2, fa2, fa2
-; RV64-NEXT: fmax.d fa3, fa2, fa5
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64-NEXT: feq.d a2, fa3, fa3
+; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
; RV64-NEXT: fld fa3, 32(sp)
@@ -557,7 +557,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
@@ -566,9 +565,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa2, fa2, fa5
; RV32-NEXT: fcvt.wu.d a2, fa2, rtz
; RV32-NEXT: fmax.d fa4, fa4, fa3
-; RV32-NEXT: fld fa2, 48(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
+; RV32-NEXT: fld fa2, 48(sp)
; RV32-NEXT: fcvt.wu.d a3, fa4, rtz
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vslide1down.vx v8, v10, a0
; RV32-NEXT: fmax.d fa4, fa2, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
@@ -633,7 +633,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
@@ -642,9 +641,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa2, fa2, fa5
; RV64-NEXT: fcvt.lu.d a2, fa2, rtz
; RV64-NEXT: fmax.d fa4, fa4, fa3
-; RV64-NEXT: fld fa2, 48(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
+; RV64-NEXT: fld fa2, 48(sp)
; RV64-NEXT: fcvt.lu.d a3, fa4, rtz
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vslide1down.vx v8, v10, a0
; RV64-NEXT: fmax.d fa4, fa2, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 6ffa6ac250ed7..9c76b83d0974a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -132,12 +132,12 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) {
define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
; ZVFH32-LABEL: si2fp_v3i7_v3f32:
; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: lw a1, 4(a0)
-; ZVFH32-NEXT: lw a2, 0(a0)
-; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: lw a1, 0(a0)
+; ZVFH32-NEXT: lw a2, 4(a0)
; ZVFH32-NEXT: lw a0, 8(a0)
-; ZVFH32-NEXT: vmv.v.x v8, a2
-; ZVFH32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: vmv.v.x v8, a1
+; ZVFH32-NEXT: vslide1down.vx v8, v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a0
; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
; ZVFH32-NEXT: vadd.vv v8, v8, v8
@@ -149,12 +149,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFH64-LABEL: si2fp_v3i7_v3f32:
; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: ld a1, 8(a0)
-; ZVFH64-NEXT: ld a2, 0(a0)
-; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH64-NEXT: ld a1, 0(a0)
+; ZVFH64-NEXT: ld a2, 8(a0)
; ZVFH64-NEXT: ld a0, 16(a0)
-; ZVFH64-NEXT: vmv.v.x v8, a2
-; ZVFH64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH64-NEXT: vmv.v.x v8, a1
+; ZVFH64-NEXT: vslide1down.vx v8, v8, a2
; ZVFH64-NEXT: vslide1down.vx v8, v8, a0
; ZVFH64-NEXT: vslidedown.vi v8, v8, 1
; ZVFH64-NEXT: vadd.vv v8, v8, v8
@@ -166,12 +166,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN32-LABEL: si2fp_v3i7_v3f32:
; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: lw a1, 4(a0)
-; ZVFHMIN32-NEXT: lw a2, 0(a0)
-; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT: lw a1, 0(a0)
+; ZVFHMIN32-NEXT: lw a2, 4(a0)
; ZVFHMIN32-NEXT: lw a0, 8(a0)
-; ZVFHMIN32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN32-NEXT: vadd.vv v8, v8, v8
@@ -183,12 +183,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN64-LABEL: si2fp_v3i7_v3f32:
; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: ld a1, 8(a0)
-; ZVFHMIN64-NEXT: ld a2, 0(a0)
-; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT: ld a1, 0(a0)
+; ZVFHMIN64-NEXT: ld a2, 8(a0)
; ZVFHMIN64-NEXT: ld a0, 16(a0)
-; ZVFHMIN64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT: vmv.v.x v8, a1
+; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN64-NEXT: vadd.vv v8, v8, v8
@@ -205,12 +205,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
; ZVFH32-LABEL: ui2fp_v3i7_v3f32:
; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: lw a1, 4(a0)
-; ZVFH32-NEXT: lw a2, 0(a0)
-; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: lw a1, 0(a0)
+; ZVFH32-NEXT: lw a2, 4(a0)
; ZVFH32-NEXT: lw a0, 8(a0)
-; ZVFH32-NEXT: vmv.v.x v8, a2
-; ZVFH32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: vmv.v.x v8, a1
+; ZVFH32-NEXT: vslide1down.vx v8, v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a0
; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
; ZVFH32-NEXT: li a0, 127
@@ -222,12 +222,12 @@ define <3 x floa...
[truncated]
|
It was relying on the fact that vsetvlis have side effects to prevent reordering, but llvm#91319 proposes to remove the side effects. This reworks to use volatile loads and stores instead.
) It was relying on the fact that vsetvlis have side effects to prevent reordering, but #91319 proposes to remove the side effects. This reworks it to use volatile loads and stores instead.
69f1b1a
to
ac16d96
Compare
Ping |
The middle end will remove the inner vsetvli otherwise, and it's more typical to set the AVL to the remaining VL. This also prevents the test from showing up as a regression in llvm#91319
The middle end will remove the inner vsetvli otherwise, and it's more typical to set the AVL to the remaining VL. This also prevents the test from showing up as a regression in llvm#91319
The middle end will remove the inner vsetvli otherwise, and it's more typical to set the AVL to the remaining VL. This also prevents the test from showing up as a regression in #91319
In a similar vein to llvm#90049, it looks like we currently model all of the effects of a vsetvli pseudo: * VL and VTYPE are marked as defs * VL preserving x0,x0 vsetvlis doesn't get emitted until RISCVInsertVSETVLI, and when they are they have implicit-defs on VL * Regular vector pseudos are fully modelled too: Before RISCVInsertVSETVLI they can be moved between vsetvli pseudos because we will eventually insert vsetvlis to correct VL and VTYPE. Afterwards, they will have implicit-defs on VL and VTYPE. I may be missing something but otherwise it seems ok to remove hasSideEffects=1. This gives us some improvements like sinking in vsetvli-insert-crossbb.ll. We need to update RISCVDeadRegisterDefinitions to keep handling vsetvli pseudos since it only operates on instructions with unmodelled side effects.
ac16d96
to
8742154
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
In a similar vein to #90049, we currently model all of the effects of a vsetvli pseudo:
Since we model everything we can remove hasSideEffects=1. This gives us some improvements like sinking in vsetvli-insert-crossbb.ll.
We need to update RISCVDeadRegisterDefinitions to keep handling vsetvli pseudos since it only operates on instructions with unmodelled side effects.