Skip to content

Commit cb6f021

Browse files
authored
[RISCV][VLOPT] Remove unnecessary passthru restriction (#124549)
We currently check for passthrus in two places, on the instruction to reduce in isCandidate, and on the users in checkUsers. We cannot reduce the VL if an instruction has a user that's a passthru, because the user will read elements past VL in the tail. However it's fine to reduce an instruction if it itself contains a non-undef passthru. Since the VL can only be reduced, not increased, the previous tail will always remain the same.
1 parent f95f10c commit cb6f021

File tree

3 files changed

+69
-36
lines changed

3 files changed

+69
-36
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,27 +1143,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
11431143
if (MI.getNumDefs() != 1)
11441144
return false;
11451145

1146-
// If we're not using VLMAX, then we need to be careful whether we are using
1147-
// TA/TU when there is a non-undef Passthru. But when we are using VLMAX, it
1148-
// does not matter whether we are using TA/TU with a non-undef Passthru, since
1149-
// there are no tail elements to be preserved.
11501146
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
11511147
const MachineOperand &VLOp = MI.getOperand(VLOpNum);
1152-
if (VLOp.isReg() || VLOp.getImm() != RISCV::VLMaxSentinel) {
1153-
// If MI has a non-undef passthru, we will not try to optimize it since
1154-
// that requires us to preserve tail elements according to TA/TU.
1155-
// Otherwise, The MI has an undef Passthru, so it doesn't matter whether we
1156-
// are using TA/TU.
1157-
bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
1158-
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
1159-
if (HasPassthru &&
1160-
MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) {
1161-
LLVM_DEBUG(
1162-
dbgs() << " Not a candidate because it uses non-undef passthru"
1163-
" with non-VLMAX VL\n");
1164-
return false;
1165-
}
1166-
}
11671148

11681149
// If the VL is 1, then there is no need to reduce it. This is an
11691150
// optimization, not needed to preserve correctness.
@@ -1247,7 +1228,7 @@ std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
12471228
return std::nullopt;
12481229
}
12491230

1250-
// Tied operands might pass through.
1231+
// If used as a passthru, elements past VL will be read.
12511232
if (UserOp.isTied()) {
12521233
LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
12531234
return std::nullopt;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3919,11 +3919,12 @@ define void @trunc_v6bf16(ptr %x) {
39193919
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
39203920
; CHECK-NEXT: vfabs.v v8, v10
39213921
; CHECK-NEXT: vmflt.vf v0, v8, fa5
3922+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
39223923
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
39233924
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
39243925
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
39253926
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
3926-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3927+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
39273928
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
39283929
; CHECK-NEXT: vse16.v v8, (a0)
39293930
; CHECK-NEXT: ret
@@ -4002,11 +4003,12 @@ define void @trunc_v6f16(ptr %x) {
40024003
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
40034004
; ZVFHMIN-NEXT: vfabs.v v8, v10
40044005
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4006+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
40054007
; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
40064008
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
40074009
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
40084010
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4009-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4011+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
40104012
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
40114013
; ZVFHMIN-NEXT: vse16.v v8, (a0)
40124014
; ZVFHMIN-NEXT: ret
@@ -4098,12 +4100,13 @@ define void @ceil_v6bf16(ptr %x) {
40984100
; CHECK-NEXT: vfabs.v v8, v10
40994101
; CHECK-NEXT: vmflt.vf v0, v8, fa5
41004102
; CHECK-NEXT: fsrmi a1, 3
4103+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
41014104
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
41024105
; CHECK-NEXT: fsrm a1
41034106
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
41044107
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
41054108
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4106-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4109+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
41074110
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
41084111
; CHECK-NEXT: vse16.v v8, (a0)
41094112
; CHECK-NEXT: ret
@@ -4189,12 +4192,13 @@ define void @ceil_v6f16(ptr %x) {
41894192
; ZVFHMIN-NEXT: vfabs.v v8, v10
41904193
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
41914194
; ZVFHMIN-NEXT: fsrmi a1, 3
4195+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
41924196
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
41934197
; ZVFHMIN-NEXT: fsrm a1
41944198
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
41954199
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
41964200
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4197-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4201+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
41984202
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
41994203
; ZVFHMIN-NEXT: vse16.v v8, (a0)
42004204
; ZVFHMIN-NEXT: ret
@@ -4290,12 +4294,13 @@ define void @floor_v6bf16(ptr %x) {
42904294
; CHECK-NEXT: vfabs.v v8, v10
42914295
; CHECK-NEXT: vmflt.vf v0, v8, fa5
42924296
; CHECK-NEXT: fsrmi a1, 2
4297+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
42934298
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
42944299
; CHECK-NEXT: fsrm a1
42954300
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
42964301
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
42974302
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4298-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4303+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
42994304
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
43004305
; CHECK-NEXT: vse16.v v8, (a0)
43014306
; CHECK-NEXT: ret
@@ -4381,12 +4386,13 @@ define void @floor_v6f16(ptr %x) {
43814386
; ZVFHMIN-NEXT: vfabs.v v8, v10
43824387
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
43834388
; ZVFHMIN-NEXT: fsrmi a1, 2
4389+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
43844390
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
43854391
; ZVFHMIN-NEXT: fsrm a1
43864392
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
43874393
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
43884394
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4389-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4395+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
43904396
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
43914397
; ZVFHMIN-NEXT: vse16.v v8, (a0)
43924398
; ZVFHMIN-NEXT: ret
@@ -4482,12 +4488,13 @@ define void @round_v6bf16(ptr %x) {
44824488
; CHECK-NEXT: vfabs.v v8, v10
44834489
; CHECK-NEXT: vmflt.vf v0, v8, fa5
44844490
; CHECK-NEXT: fsrmi a1, 4
4491+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
44854492
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
44864493
; CHECK-NEXT: fsrm a1
44874494
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
44884495
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
44894496
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4490-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4497+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
44914498
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
44924499
; CHECK-NEXT: vse16.v v8, (a0)
44934500
; CHECK-NEXT: ret
@@ -4573,12 +4580,13 @@ define void @round_v6f16(ptr %x) {
45734580
; ZVFHMIN-NEXT: vfabs.v v8, v10
45744581
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
45754582
; ZVFHMIN-NEXT: fsrmi a1, 4
4583+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
45764584
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
45774585
; ZVFHMIN-NEXT: fsrm a1
45784586
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
45794587
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
45804588
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4581-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4589+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
45824590
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
45834591
; ZVFHMIN-NEXT: vse16.v v8, (a0)
45844592
; ZVFHMIN-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x
107107
ret <vscale x 4 x i32> %w
108108
}
109109

110-
; Test case to make sure VL won't propgate if using tail-undisturbed policy.
110+
; We can propagate VL to a tail-undisturbed policy, provided none of its users
111+
; are passthrus (i.e. read past VL).
111112
define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
112113
; CHECK-LABEL: different_vl_with_tu:
113114
; CHECK: # %bb.0:
@@ -118,22 +119,65 @@ define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <v
118119
; CHECK-NEXT: vadd.vv v8, v14, v10
119120
; CHECK-NEXT: ret
120121
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
121-
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
122+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl2)
122123
ret <vscale x 4 x i32> %w
123124
}
124125

125-
; Test case to make sure VL won't propgate if using tail-undisturbed policy.
126+
; We can propagate VL to a tail-undisturbed policy, provided none of its users
127+
; are passthrus (i.e. read past VL).
126128
define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
127-
; CHECK-LABEL: different_imm_vl_with_tu:
129+
; NOVLOPT-LABEL: different_imm_vl_with_tu:
130+
; NOVLOPT: # %bb.0:
131+
; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, tu, ma
132+
; NOVLOPT-NEXT: vmv2r.v v14, v10
133+
; NOVLOPT-NEXT: vadd.vv v14, v10, v12
134+
; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
135+
; NOVLOPT-NEXT: vadd.vv v8, v14, v10
136+
; NOVLOPT-NEXT: ret
137+
;
138+
; VLOPT-LABEL: different_imm_vl_with_tu:
139+
; VLOPT: # %bb.0:
140+
; VLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
141+
; VLOPT-NEXT: vmv2r.v v14, v10
142+
; VLOPT-NEXT: vadd.vv v14, v10, v12
143+
; VLOPT-NEXT: vadd.vv v8, v14, v10
144+
; VLOPT-NEXT: ret
145+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
146+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
147+
ret <vscale x 4 x i32> %w
148+
}
149+
150+
; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
151+
; are demanded.
152+
define <vscale x 4 x i32> @different_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
153+
; CHECK-LABEL: different_vl_as_passthru:
154+
; CHECK: # %bb.0:
155+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
156+
; CHECK-NEXT: vmv2r.v v12, v8
157+
; CHECK-NEXT: vadd.vv v12, v8, v10
158+
; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
159+
; CHECK-NEXT: vadd.vv v12, v8, v10
160+
; CHECK-NEXT: vmv2r.v v8, v12
161+
; CHECK-NEXT: ret
162+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
163+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl2)
164+
ret <vscale x 4 x i32> %w
165+
}
166+
167+
; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL
168+
; are demanded.
169+
define <vscale x 4 x i32> @different_imm_vl_as_passthru(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
170+
; CHECK-LABEL: different_imm_vl_as_passthru:
128171
; CHECK: # %bb.0:
129172
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
130-
; CHECK-NEXT: vmv2r.v v14, v10
131-
; CHECK-NEXT: vadd.vv v14, v10, v12
173+
; CHECK-NEXT: vmv2r.v v12, v8
174+
; CHECK-NEXT: vadd.vv v12, v8, v10
132175
; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma
133-
; CHECK-NEXT: vadd.vv v8, v14, v10
176+
; CHECK-NEXT: vadd.vv v12, v8, v10
177+
; CHECK-NEXT: vmv2r.v v8, v12
134178
; CHECK-NEXT: ret
135179
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
136-
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
180+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
137181
ret <vscale x 4 x i32> %w
138182
}
139183

0 commit comments

Comments
 (0)