-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Mark V0 regclasses as larger superclasses of non-V0 classes #70109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This allows the register coalescer to inflate virtual registers from e.g. vrnov0 -> vr after deleting copies, which in turn avoids some spills.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesThis allows the register coalescer to inflate virtual registers from e.g. Patch is 62.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70109.diff 16 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index fcfc5c7821ffe29..cd0bcb8e6cfc495 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -663,6 +663,14 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const {
if (RC == &RISCV::VMV0RegClass)
return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRNoV0RegClass)
+ return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRM2NoV0RegClass)
+ return &RISCV::VRM2RegClass;
+ if (RC == &RISCV::VRM4NoV0RegClass)
+ return &RISCV::VRM4RegClass;
+ if (RC == &RISCV::VRM8NoV0RegClass)
+ return &RISCV::VRM8RegClass;
return RC;
}
diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
index c15321057aeb86b..84ff1bf646280ef 100644
--- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
+++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
@@ -20,17 +20,13 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 24 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
; CHECK-NEXT: li a0, 55
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vloxseg2ei32.v v16, (a0), v8
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
@@ -42,47 +38,39 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v16, v8, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call func@plt
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vrgather.vv v4, v8, v8, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: add a1, a1, a2
-; CHECK-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwsub.wv v8, v16, v24
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwsub.wv v8, v0, v20
; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT: vssubu.vv v4, v4, v8, v0.t
+; CHECK-NEXT: vssubu.vv v16, v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, s0, e32, m8, tu, mu
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfdiv.vv v8, v16, v8, v0.t
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 7698f860589aaf1..f41a2a06c69bf44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -2348,16 +2348,14 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -2369,15 +2367,13 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 16
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
@@ -2712,16 +2708,14 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -2733,15 +2727,13 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 16
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 6aac13a0bcbb873..9952e49116bfcc1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -1068,16 +1068,14 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -1089,15 +1087,13 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
@@ -1317,16 +1313,14 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -1338,15 +1332,13 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 9767ba4bbc3b7a6..d8689ecf05a5e3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -2070,7 +2070,8 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; RV32-NEXT: vmv1r.v v24, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a2, 40
+; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -2089,35 +2090,30 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: lui a3, 349525
; RV32-NEXT: addi a3, a3, 1365
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.x v16, a3
+; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 24
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 1dda8aa458d9069..dbd2a1fc6eac31f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -2439,13 +2439,13 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 40
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index 22f92fe48e22e71..91bf3e981e0a6c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -1710,16 +1710,14 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 48
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 48
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -1731,15 +1729,13 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 48
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 48
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
@@ -2080,16 +2076,14 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 48
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 48
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -2101,15 +2095,13 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 48
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 48
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
index 22061040ddbc1c2..6308f73e219da10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
@@ -796,16 +796,14 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -817,15 +815,13 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV3...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This allows the register coalescer to inflate virtual registers from e.g.
vrnov0 -> vr after deleting copies, which in turn avoids some spills.