Skip to content

Commit f77d01d

Browse files
committed
[RISCV][VLOPT] Allow users that are passthrus if tail elements aren't demanded
1 parent 3f55135 commit f77d01d

File tree

4 files changed

+49
-46
lines changed

4 files changed

+49
-46
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,25 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
11881188
return std::nullopt;
11891189
}
11901190

1191+
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1192+
const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1193+
// Looking for an immediate or a register VL that isn't X0.
1194+
assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1195+
"Did not expect X0 VL");
1196+
1197+
// If the user is a passthru it will read the elements past VL, so
1198+
// abort if any of the elements past VL are demanded.
1199+
if (UserOp.isTied()) {
1200+
assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() &&
1201+
RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc()));
1202+
auto DemandedVL = DemandedVLs[&UserMI];
1203+
if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) {
1204+
LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
1205+
"instruction with demanded tail\n");
1206+
return std::nullopt;
1207+
}
1208+
}
1209+
11911210
// Instructions like reductions may use a vector register as a scalar
11921211
// register. In this case, we should treat it as only reading the first lane.
11931212
if (isVectorOpUsedAsScalarOp(UserOp)) {
@@ -1200,12 +1219,6 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
12001219
return MachineOperand::CreateImm(1);
12011220
}
12021221

1203-
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1204-
const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1205-
// Looking for an immediate or a register VL that isn't X0.
1206-
assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1207-
"Did not expect X0 VL");
1208-
12091222
// If we know the demanded VL of UserMI, then we can reduce the VL it
12101223
// requires.
12111224
if (auto DemandedVL = DemandedVLs[&UserMI]) {
@@ -1227,12 +1240,6 @@ std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
12271240
return std::nullopt;
12281241
}
12291242

1230-
// If used as a passthru, elements past VL will be read.
1231-
if (UserOp.isTied()) {
1232-
LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
1233-
return std::nullopt;
1234-
}
1235-
12361243
auto VLOp = getMinimumVLForUser(UserOp);
12371244
if (!VLOp)
12381245
return std::nullopt;

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -195,14 +195,22 @@ define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale
195195
}
196196

197197
define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, ptr %p, iXLen %vl) {
198-
; CHECK-LABEL: optimize_ternary_use:
199-
; CHECK: # %bb.0:
200-
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
201-
; CHECK-NEXT: vzext.vf2 v14, v8
202-
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
203-
; CHECK-NEXT: vmadd.vv v14, v10, v12
204-
; CHECK-NEXT: vse32.v v14, (a0)
205-
; CHECK-NEXT: ret
198+
; NOVLOPT-LABEL: optimize_ternary_use:
199+
; NOVLOPT: # %bb.0:
200+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
201+
; NOVLOPT-NEXT: vzext.vf2 v14, v8
202+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
203+
; NOVLOPT-NEXT: vmadd.vv v14, v10, v12
204+
; NOVLOPT-NEXT: vse32.v v14, (a0)
205+
; NOVLOPT-NEXT: ret
206+
;
207+
; VLOPT-LABEL: optimize_ternary_use:
208+
; VLOPT: # %bb.0:
209+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
210+
; VLOPT-NEXT: vzext.vf2 v14, v8
211+
; VLOPT-NEXT: vmadd.vv v14, v10, v12
212+
; VLOPT-NEXT: vse32.v v14, (a0)
213+
; VLOPT-NEXT: ret
206214
%1 = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
207215
%2 = mul <vscale x 4 x i32> %b, %1
208216
%3 = add <vscale x 4 x i32> %2, %c

llvm/test/CodeGen/RISCV/rvv/vl-opt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ name: passthru_not_demanded
216216
body: |
217217
bb.0:
218218
; CHECK-LABEL: name: passthru_not_demanded
219-
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
219+
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
220220
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
221221
; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
222222
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
@@ -242,7 +242,7 @@ name: passthru_not_demanded_passthru_chain
242242
body: |
243243
bb.0:
244244
; CHECK-LABEL: name: passthru_not_demanded_passthru_chain
245-
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
245+
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
246246
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 %x, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
247247
; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 %y, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
248248
; CHECK-NEXT: %a:vr = PseudoVADD_VV_M1 %z, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */

llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,9 +1638,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vsca
16381638
; RV32-NEXT: sw a0, 8(sp)
16391639
; RV32-NEXT: sw a1, 12(sp)
16401640
; RV32-NEXT: addi a0, sp, 8
1641-
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1642-
; RV32-NEXT: vlse64.v v10, (a0), zero
16431641
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1642+
; RV32-NEXT: vlse64.v v10, (a0), zero
16441643
; RV32-NEXT: vmadd.vv v10, v8, v9
16451644
; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma
16461645
; RV32-NEXT: vmerge.vvm v8, v8, v10, v0
@@ -1669,9 +1668,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64
16691668
; RV32-NEXT: sw a0, 8(sp)
16701669
; RV32-NEXT: sw a1, 12(sp)
16711670
; RV32-NEXT: addi a0, sp, 8
1672-
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1673-
; RV32-NEXT: vlse64.v v10, (a0), zero
16741671
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1672+
; RV32-NEXT: vlse64.v v10, (a0), zero
16751673
; RV32-NEXT: vmadd.vv v10, v8, v9
16761674
; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma
16771675
; RV32-NEXT: vmv.v.v v8, v10
@@ -1713,9 +1711,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <v
17131711
; RV32-NEXT: sw a0, 8(sp)
17141712
; RV32-NEXT: sw a1, 12(sp)
17151713
; RV32-NEXT: addi a0, sp, 8
1716-
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1717-
; RV32-NEXT: vlse64.v v10, (a0), zero
17181714
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1715+
; RV32-NEXT: vlse64.v v10, (a0), zero
17191716
; RV32-NEXT: vmadd.vv v10, v8, v9
17201717
; RV32-NEXT: vmerge.vvm v8, v8, v10, v0
17211718
; RV32-NEXT: addi sp, sp, 16
@@ -1776,9 +1773,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vsca
17761773
; RV32-NEXT: sw a0, 8(sp)
17771774
; RV32-NEXT: sw a1, 12(sp)
17781775
; RV32-NEXT: addi a0, sp, 8
1779-
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1780-
; RV32-NEXT: vlse64.v v12, (a0), zero
17811776
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1777+
; RV32-NEXT: vlse64.v v12, (a0), zero
17821778
; RV32-NEXT: vmadd.vv v12, v8, v10
17831779
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma
17841780
; RV32-NEXT: vmerge.vvm v8, v8, v12, v0
@@ -1807,9 +1803,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64
18071803
; RV32-NEXT: sw a0, 8(sp)
18081804
; RV32-NEXT: sw a1, 12(sp)
18091805
; RV32-NEXT: addi a0, sp, 8
1810-
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1811-
; RV32-NEXT: vlse64.v v12, (a0), zero
18121806
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1807+
; RV32-NEXT: vlse64.v v12, (a0), zero
18131808
; RV32-NEXT: vmadd.vv v12, v8, v10
18141809
; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma
18151810
; RV32-NEXT: vmv.v.v v8, v12
@@ -1851,9 +1846,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <v
18511846
; RV32-NEXT: sw a0, 8(sp)
18521847
; RV32-NEXT: sw a1, 12(sp)
18531848
; RV32-NEXT: addi a0, sp, 8
1854-
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1855-
; RV32-NEXT: vlse64.v v12, (a0), zero
18561849
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1850+
; RV32-NEXT: vlse64.v v12, (a0), zero
18571851
; RV32-NEXT: vmadd.vv v12, v8, v10
18581852
; RV32-NEXT: vmerge.vvm v8, v8, v12, v0
18591853
; RV32-NEXT: addi sp, sp, 16
@@ -1914,9 +1908,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vsca
19141908
; RV32-NEXT: sw a0, 8(sp)
19151909
; RV32-NEXT: sw a1, 12(sp)
19161910
; RV32-NEXT: addi a0, sp, 8
1917-
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1918-
; RV32-NEXT: vlse64.v v16, (a0), zero
19191911
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
1912+
; RV32-NEXT: vlse64.v v16, (a0), zero
19201913
; RV32-NEXT: vmadd.vv v16, v8, v12
19211914
; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma
19221915
; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
@@ -1945,9 +1938,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64
19451938
; RV32-NEXT: sw a0, 8(sp)
19461939
; RV32-NEXT: sw a1, 12(sp)
19471940
; RV32-NEXT: addi a0, sp, 8
1948-
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1949-
; RV32-NEXT: vlse64.v v16, (a0), zero
19501941
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
1942+
; RV32-NEXT: vlse64.v v16, (a0), zero
19511943
; RV32-NEXT: vmadd.vv v16, v8, v12
19521944
; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma
19531945
; RV32-NEXT: vmv.v.v v8, v16
@@ -1989,9 +1981,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <v
19891981
; RV32-NEXT: sw a0, 8(sp)
19901982
; RV32-NEXT: sw a1, 12(sp)
19911983
; RV32-NEXT: addi a0, sp, 8
1992-
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1993-
; RV32-NEXT: vlse64.v v16, (a0), zero
19941984
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
1985+
; RV32-NEXT: vlse64.v v16, (a0), zero
19951986
; RV32-NEXT: vmadd.vv v16, v8, v12
19961987
; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
19971988
; RV32-NEXT: addi sp, sp, 16
@@ -2054,9 +2045,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vsca
20542045
; RV32-NEXT: sw a0, 8(sp)
20552046
; RV32-NEXT: sw a1, 12(sp)
20562047
; RV32-NEXT: addi a0, sp, 8
2057-
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2058-
; RV32-NEXT: vlse64.v v24, (a0), zero
20592048
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2049+
; RV32-NEXT: vlse64.v v24, (a0), zero
20602050
; RV32-NEXT: vmadd.vv v24, v8, v16
20612051
; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma
20622052
; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
@@ -2085,9 +2075,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64
20852075
; RV32-NEXT: sw a0, 8(sp)
20862076
; RV32-NEXT: sw a1, 12(sp)
20872077
; RV32-NEXT: addi a0, sp, 8
2088-
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2089-
; RV32-NEXT: vlse64.v v24, (a0), zero
20902078
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2079+
; RV32-NEXT: vlse64.v v24, (a0), zero
20912080
; RV32-NEXT: vmadd.vv v24, v8, v16
20922081
; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma
20932082
; RV32-NEXT: vmv.v.v v8, v24
@@ -2130,9 +2119,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <v
21302119
; RV32-NEXT: sw a0, 8(sp)
21312120
; RV32-NEXT: sw a1, 12(sp)
21322121
; RV32-NEXT: addi a0, sp, 8
2133-
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2134-
; RV32-NEXT: vlse64.v v24, (a0), zero
21352122
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2123+
; RV32-NEXT: vlse64.v v24, (a0), zero
21362124
; RV32-NEXT: vmadd.vv v24, v8, v16
21372125
; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
21382126
; RV32-NEXT: addi sp, sp, 16

0 commit comments

Comments
 (0)