Skip to content

Commit 77fc8da

Browse files
authored
[RISCV] Rematerialize vmv.v.x (#107993)
Even though vmv.v.x has a non constant scalar operand, we can still rematerialize it because we have split register allocation between vectors and scalars. InlineSpiller will check to make sure that the scalar operand is live at the point where the rematerialization occurs, so this won't extend any scalar live ranges. However this also means we may not be able to rematerialize in some cases, as shown in @vmv.v.x_needs_extended. It might be worthwhile teaching InlineSpiller to extend scalar live ranges in a future patch. I experimented with this locally and it reduced spills on 531.deepsjeng_r by a further 3%.
1 parent 68f31aa commit 77fc8da

File tree

5 files changed

+300
-196
lines changed

5 files changed

+300
-196
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
169169
bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
170170
const MachineInstr &MI) const {
171171
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
172+
case RISCV::VMV_V_X:
172173
case RISCV::VMV_V_I:
173174
case RISCV::VID_V:
174175
if (MI.getOperand(1).isUndef() &&

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2475,6 +2475,7 @@ multiclass VPseudoUnaryVMV_V_X_I {
24752475
def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
24762476
SchedUnary<"WriteVIMovV", "ReadVIMovV", mx,
24772477
forcePassthruRead=true>;
2478+
let isReMaterializable = 1 in
24782479
def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>,
24792480
SchedUnary<"WriteVIMovX", "ReadVIMovX", mx,
24802481
forcePassthruRead=true>;

llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll

Lines changed: 66 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -2022,14 +2022,9 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
20222022
; RV32-NEXT: mul a1, a1, a2
20232023
; RV32-NEXT: sub sp, sp, a1
20242024
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
2025-
; RV32-NEXT: vmv1r.v v24, v0
2025+
; RV32-NEXT: vmv1r.v v7, v0
20262026
; RV32-NEXT: csrr a1, vlenb
2027-
; RV32-NEXT: slli a1, a1, 5
2028-
; RV32-NEXT: add a1, sp, a1
2029-
; RV32-NEXT: addi a1, a1, 16
2030-
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2031-
; RV32-NEXT: csrr a1, vlenb
2032-
; RV32-NEXT: li a2, 48
2027+
; RV32-NEXT: li a2, 40
20332028
; RV32-NEXT: mul a1, a1, a2
20342029
; RV32-NEXT: add a1, sp, a1
20352030
; RV32-NEXT: addi a1, a1, 16
@@ -2045,101 +2040,88 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
20452040
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
20462041
; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
20472042
; RV32-NEXT: csrr a3, vlenb
2048-
; RV32-NEXT: li a4, 40
2043+
; RV32-NEXT: li a4, 48
20492044
; RV32-NEXT: mul a3, a3, a4
20502045
; RV32-NEXT: add a3, sp, a3
20512046
; RV32-NEXT: addi a3, a3, 16
20522047
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
20532048
; RV32-NEXT: lui a3, 349525
20542049
; RV32-NEXT: addi a3, a3, 1365
20552050
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2056-
; RV32-NEXT: vmv.v.x v16, a3
2057-
; RV32-NEXT: csrr a3, vlenb
2058-
; RV32-NEXT: li a4, 24
2059-
; RV32-NEXT: mul a3, a3, a4
2060-
; RV32-NEXT: add a3, sp, a3
2061-
; RV32-NEXT: addi a3, a3, 16
2062-
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2051+
; RV32-NEXT: vmv.v.x v8, a3
20632052
; RV32-NEXT: csrr a3, vlenb
2064-
; RV32-NEXT: li a4, 40
2065-
; RV32-NEXT: mul a3, a3, a4
2053+
; RV32-NEXT: slli a3, a3, 5
20662054
; RV32-NEXT: add a3, sp, a3
20672055
; RV32-NEXT: addi a3, a3, 16
2068-
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2069-
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2070-
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2056+
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
20712057
; RV32-NEXT: csrr a3, vlenb
20722058
; RV32-NEXT: slli a3, a3, 5
20732059
; RV32-NEXT: add a3, sp, a3
20742060
; RV32-NEXT: addi a3, a3, 16
2075-
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
2076-
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
2061+
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
20772062
; RV32-NEXT: csrr a3, vlenb
2078-
; RV32-NEXT: slli a3, a3, 5
2063+
; RV32-NEXT: li a4, 48
2064+
; RV32-NEXT: mul a3, a3, a4
20792065
; RV32-NEXT: add a3, sp, a3
20802066
; RV32-NEXT: addi a3, a3, 16
2081-
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
2067+
; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
2068+
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2069+
; RV32-NEXT: vand.vv v8, v24, v8, v0.t
2070+
; RV32-NEXT: vsub.vv v16, v16, v8, v0.t
20822071
; RV32-NEXT: lui a3, 209715
20832072
; RV32-NEXT: addi a3, a3, 819
20842073
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2085-
; RV32-NEXT: vmv.v.x v16, a3
2086-
; RV32-NEXT: csrr a3, vlenb
2087-
; RV32-NEXT: slli a3, a3, 5
2088-
; RV32-NEXT: add a3, sp, a3
2089-
; RV32-NEXT: addi a3, a3, 16
2090-
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2091-
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2092-
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2074+
; RV32-NEXT: vmv.v.x v8, a3
20932075
; RV32-NEXT: csrr a3, vlenb
2094-
; RV32-NEXT: slli a3, a3, 4
2076+
; RV32-NEXT: li a4, 48
2077+
; RV32-NEXT: mul a3, a3, a4
20952078
; RV32-NEXT: add a3, sp, a3
20962079
; RV32-NEXT: addi a3, a3, 16
20972080
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
20982081
; RV32-NEXT: csrr a3, vlenb
2099-
; RV32-NEXT: slli a3, a3, 5
2082+
; RV32-NEXT: li a4, 48
2083+
; RV32-NEXT: mul a3, a3, a4
21002084
; RV32-NEXT: add a3, sp, a3
21012085
; RV32-NEXT: addi a3, a3, 16
21022086
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
2103-
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
2087+
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2088+
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
2089+
; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
21042090
; RV32-NEXT: csrr a3, vlenb
2105-
; RV32-NEXT: li a4, 40
2091+
; RV32-NEXT: li a4, 48
21062092
; RV32-NEXT: mul a3, a3, a4
21072093
; RV32-NEXT: add a3, sp, a3
21082094
; RV32-NEXT: addi a3, a3, 16
2109-
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2110-
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
2111-
; RV32-NEXT: csrr a3, vlenb
2112-
; RV32-NEXT: slli a3, a3, 4
2113-
; RV32-NEXT: add a3, sp, a3
2114-
; RV32-NEXT: addi a3, a3, 16
2115-
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
2116-
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
2095+
; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
2096+
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
2097+
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
21172098
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
21182099
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
21192100
; RV32-NEXT: lui a3, 61681
21202101
; RV32-NEXT: addi a3, a3, -241
21212102
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
21222103
; RV32-NEXT: vmv.v.x v16, a3
21232104
; RV32-NEXT: csrr a3, vlenb
2124-
; RV32-NEXT: slli a3, a3, 5
2105+
; RV32-NEXT: li a4, 24
2106+
; RV32-NEXT: mul a3, a3, a4
21252107
; RV32-NEXT: add a3, sp, a3
21262108
; RV32-NEXT: addi a3, a3, 16
21272109
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
21282110
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2129-
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2111+
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
21302112
; RV32-NEXT: lui a3, 4112
21312113
; RV32-NEXT: addi a3, a3, 257
21322114
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
2133-
; RV32-NEXT: vmv.v.x v8, a3
2115+
; RV32-NEXT: vmv.v.x v16, a3
21342116
; RV32-NEXT: csrr a3, vlenb
21352117
; RV32-NEXT: slli a3, a3, 4
21362118
; RV32-NEXT: add a3, sp, a3
21372119
; RV32-NEXT: addi a3, a3, 16
2138-
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
2120+
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
21392121
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2140-
; RV32-NEXT: vmul.vv v16, v16, v8, v0.t
2122+
; RV32-NEXT: vmul.vv v8, v8, v16, v0.t
21412123
; RV32-NEXT: li a2, 56
2142-
; RV32-NEXT: vsrl.vx v8, v16, a2, v0.t
2124+
; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t
21432125
; RV32-NEXT: csrr a3, vlenb
21442126
; RV32-NEXT: slli a3, a3, 3
21452127
; RV32-NEXT: add a3, sp, a3
@@ -2149,8 +2131,8 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
21492131
; RV32-NEXT: # %bb.1:
21502132
; RV32-NEXT: mv a0, a1
21512133
; RV32-NEXT: .LBB46_2:
2152-
; RV32-NEXT: vmv1r.v v0, v24
2153-
; RV32-NEXT: li a3, 48
2134+
; RV32-NEXT: vmv1r.v v0, v7
2135+
; RV32-NEXT: li a3, 40
21542136
; RV32-NEXT: mul a1, a1, a3
21552137
; RV32-NEXT: add a1, sp, a1
21562138
; RV32-NEXT: addi a1, a1, 16
@@ -2160,71 +2142,64 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
21602142
; RV32-NEXT: addi a0, sp, 16
21612143
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
21622144
; RV32-NEXT: csrr a0, vlenb
2163-
; RV32-NEXT: li a1, 24
2164-
; RV32-NEXT: mul a0, a0, a1
2165-
; RV32-NEXT: add a0, sp, a0
2166-
; RV32-NEXT: addi a0, a0, 16
2167-
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2168-
; RV32-NEXT: addi a0, sp, 16
2169-
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2170-
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
2171-
; RV32-NEXT: csrr a0, vlenb
2172-
; RV32-NEXT: li a1, 48
2173-
; RV32-NEXT: mul a0, a0, a1
2145+
; RV32-NEXT: slli a0, a0, 5
21742146
; RV32-NEXT: add a0, sp, a0
21752147
; RV32-NEXT: addi a0, a0, 16
21762148
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2177-
; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
2149+
; RV32-NEXT: addi a0, sp, 16
2150+
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2151+
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
21782152
; RV32-NEXT: csrr a0, vlenb
2179-
; RV32-NEXT: li a1, 48
2153+
; RV32-NEXT: li a1, 40
21802154
; RV32-NEXT: mul a0, a0, a1
21812155
; RV32-NEXT: add a0, sp, a0
21822156
; RV32-NEXT: addi a0, a0, 16
2183-
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
2157+
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2158+
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
21842159
; RV32-NEXT: csrr a0, vlenb
21852160
; RV32-NEXT: li a1, 40
21862161
; RV32-NEXT: mul a0, a0, a1
21872162
; RV32-NEXT: add a0, sp, a0
21882163
; RV32-NEXT: addi a0, a0, 16
2189-
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2164+
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
21902165
; RV32-NEXT: csrr a0, vlenb
21912166
; RV32-NEXT: li a1, 48
21922167
; RV32-NEXT: mul a0, a0, a1
21932168
; RV32-NEXT: add a0, sp, a0
21942169
; RV32-NEXT: addi a0, a0, 16
21952170
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2196-
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
21972171
; RV32-NEXT: csrr a0, vlenb
2198-
; RV32-NEXT: li a1, 24
2172+
; RV32-NEXT: li a1, 40
21992173
; RV32-NEXT: mul a0, a0, a1
22002174
; RV32-NEXT: add a0, sp, a0
22012175
; RV32-NEXT: addi a0, a0, 16
2202-
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
2176+
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2177+
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
22032178
; RV32-NEXT: csrr a0, vlenb
2204-
; RV32-NEXT: li a1, 48
2205-
; RV32-NEXT: mul a0, a0, a1
2179+
; RV32-NEXT: slli a0, a0, 5
22062180
; RV32-NEXT: add a0, sp, a0
22072181
; RV32-NEXT: addi a0, a0, 16
2208-
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2209-
; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
2182+
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
2183+
; RV32-NEXT: vmv8r.v v16, v8
22102184
; RV32-NEXT: csrr a0, vlenb
22112185
; RV32-NEXT: li a1, 40
22122186
; RV32-NEXT: mul a0, a0, a1
22132187
; RV32-NEXT: add a0, sp, a0
22142188
; RV32-NEXT: addi a0, a0, 16
22152189
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2216-
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
2190+
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
2191+
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
22172192
; RV32-NEXT: csrr a0, vlenb
2218-
; RV32-NEXT: li a1, 24
2219-
; RV32-NEXT: mul a0, a0, a1
2193+
; RV32-NEXT: slli a0, a0, 5
22202194
; RV32-NEXT: add a0, sp, a0
22212195
; RV32-NEXT: addi a0, a0, 16
2222-
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
2223-
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
2196+
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
2197+
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
22242198
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
22252199
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
22262200
; RV32-NEXT: csrr a0, vlenb
2227-
; RV32-NEXT: slli a0, a0, 5
2201+
; RV32-NEXT: li a1, 24
2202+
; RV32-NEXT: mul a0, a0, a1
22282203
; RV32-NEXT: add a0, sp, a0
22292204
; RV32-NEXT: addi a0, a0, 16
22302205
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -2386,23 +2361,23 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
23862361
; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
23872362
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
23882363
; RV32-NEXT: vand.vv v24, v24, v0
2389-
; RV32-NEXT: vsub.vv v24, v16, v24
2364+
; RV32-NEXT: vsub.vv v16, v16, v24
23902365
; RV32-NEXT: lui a3, 209715
23912366
; RV32-NEXT: addi a3, a3, 819
23922367
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
23932368
; RV32-NEXT: vmv.v.x v0, a3
23942369
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2395-
; RV32-NEXT: vand.vv v16, v24, v0
2396-
; RV32-NEXT: vsrl.vi v24, v24, 2
2370+
; RV32-NEXT: vand.vv v24, v16, v0
2371+
; RV32-NEXT: vsrl.vi v16, v16, 2
23972372
; RV32-NEXT: csrr a3, vlenb
23982373
; RV32-NEXT: slli a3, a3, 4
23992374
; RV32-NEXT: add a3, sp, a3
24002375
; RV32-NEXT: addi a3, a3, 16
24012376
; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill
2402-
; RV32-NEXT: vand.vv v24, v24, v0
2403-
; RV32-NEXT: vadd.vv v24, v16, v24
2404-
; RV32-NEXT: vsrl.vi v16, v24, 4
2377+
; RV32-NEXT: vand.vv v16, v16, v0
24052378
; RV32-NEXT: vadd.vv v16, v24, v16
2379+
; RV32-NEXT: vsrl.vi v24, v16, 4
2380+
; RV32-NEXT: vadd.vv v16, v16, v24
24062381
; RV32-NEXT: lui a3, 61681
24072382
; RV32-NEXT: addi a3, a3, -241
24082383
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
@@ -2437,16 +2412,16 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
24372412
; RV32-NEXT: addi a0, a0, 16
24382413
; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
24392414
; RV32-NEXT: vand.vv v24, v24, v0
2440-
; RV32-NEXT: vsub.vv v24, v8, v24
2415+
; RV32-NEXT: vsub.vv v8, v8, v24
24412416
; RV32-NEXT: csrr a0, vlenb
24422417
; RV32-NEXT: slli a0, a0, 4
24432418
; RV32-NEXT: add a0, sp, a0
24442419
; RV32-NEXT: addi a0, a0, 16
24452420
; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
2446-
; RV32-NEXT: vand.vv v8, v24, v0
2447-
; RV32-NEXT: vsrl.vi v24, v24, 2
2448-
; RV32-NEXT: vand.vv v24, v24, v0
2449-
; RV32-NEXT: vadd.vv v8, v8, v24
2421+
; RV32-NEXT: vand.vv v24, v8, v0
2422+
; RV32-NEXT: vsrl.vi v8, v8, 2
2423+
; RV32-NEXT: vand.vv v8, v8, v0
2424+
; RV32-NEXT: vadd.vv v8, v24, v8
24502425
; RV32-NEXT: vsrl.vi v24, v8, 4
24512426
; RV32-NEXT: vadd.vv v8, v8, v24
24522427
; RV32-NEXT: csrr a0, vlenb

0 commit comments

Comments
 (0)