Skip to content

Commit c58011d

Browse files
authored
[RISCV][VLOPT] Peek through copies in checkUsers (#127656)
Currently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist. This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away.
1 parent f8bdbed commit c58011d

File tree

4 files changed

+89
-3
lines changed

4 files changed

+89
-3
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1315,9 +1315,24 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
13151315

13161316
std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
13171317
std::optional<MachineOperand> CommonVL;
1318-
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
1318+
SmallSetVector<MachineOperand *, 8> Worklist;
1319+
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
1320+
Worklist.insert(&UserOp);
1321+
1322+
while (!Worklist.empty()) {
1323+
MachineOperand &UserOp = *Worklist.pop_back_val();
13191324
const MachineInstr &UserMI = *UserOp.getParent();
13201325
LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
1326+
1327+
if (UserMI.isCopy() && UserMI.getOperand(0).getReg().isVirtual() &&
1328+
UserMI.getOperand(0).getSubReg() == RISCV::NoSubRegister &&
1329+
UserMI.getOperand(1).getSubReg() == RISCV::NoSubRegister) {
1330+
LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n");
1331+
for (auto &CopyUse : MRI->use_operands(UserMI.getOperand(0).getReg()))
1332+
Worklist.insert(&CopyUse);
1333+
continue;
1334+
}
1335+
13211336
if (mayReadPastVL(UserMI)) {
13221337
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
13231338
return std::nullopt;

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -901,9 +901,9 @@ define void @test_dag_loop() {
901901
; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t
902902
; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
903903
; CHECK-NEXT: vmseq.vv v0, v12, v8
904-
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
904+
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
905905
; CHECK-NEXT: vmv.v.i v8, 0
906-
; CHECK-NEXT: vsetivli zero, 0, e16, m8, tu, mu
906+
; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu
907907
; CHECK-NEXT: vle16.v v8, (zero), v0.t
908908
; CHECK-NEXT: vse16.v v8, (zero)
909909
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,36 @@ define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b,
217217
call void @llvm.riscv.vse(<vscale x 4 x i32> %3, ptr %p, iXLen %vl)
218218
ret void
219219
}
220+
221+
; This function has a copy between two vrm2 virtual registers, make sure we can
222+
; reduce vl between it.
223+
define void @fadd_fcmp_select_copy(<vscale x 4 x float> %v, <vscale x 4 x i1> %c, ptr %p, iXLen %vl) {
224+
; NOVLOPT-LABEL: fadd_fcmp_select_copy:
225+
; NOVLOPT: # %bb.0:
226+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
227+
; NOVLOPT-NEXT: vfadd.vv v8, v8, v8
228+
; NOVLOPT-NEXT: fmv.w.x fa5, zero
229+
; NOVLOPT-NEXT: vmflt.vf v10, v8, fa5
230+
; NOVLOPT-NEXT: vmand.mm v10, v0, v10
231+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
232+
; NOVLOPT-NEXT: vse32.v v8, (a0)
233+
; NOVLOPT-NEXT: vsm.v v10, (a0)
234+
; NOVLOPT-NEXT: ret
235+
;
236+
; VLOPT-LABEL: fadd_fcmp_select_copy:
237+
; VLOPT: # %bb.0:
238+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
239+
; VLOPT-NEXT: vfadd.vv v8, v8, v8
240+
; VLOPT-NEXT: fmv.w.x fa5, zero
241+
; VLOPT-NEXT: vmflt.vf v10, v8, fa5
242+
; VLOPT-NEXT: vmand.mm v10, v0, v10
243+
; VLOPT-NEXT: vse32.v v8, (a0)
244+
; VLOPT-NEXT: vsm.v v10, (a0)
245+
; VLOPT-NEXT: ret
246+
%fadd = fadd <vscale x 4 x float> %v, %v
247+
%fcmp = fcmp olt <vscale x 4 x float> %fadd, zeroinitializer
248+
%select = select <vscale x 4 x i1> %c, <vscale x 4 x i1> %fcmp, <vscale x 4 x i1> zeroinitializer
249+
call void @llvm.riscv.vse(<vscale x 4 x float> %fadd, ptr %p, iXLen %vl)
250+
call void @llvm.riscv.vsm(<vscale x 4 x i1> %select, ptr %p, iXLen %vl)
251+
ret void
252+
}

llvm/test/CodeGen/RISCV/rvv/vl-opt.mir

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,3 +330,41 @@ body: |
330330
%x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat
331331
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
332332
...
333+
---
334+
name: copy
335+
body: |
336+
bb.0:
337+
; CHECK-LABEL: name: copy
338+
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
339+
; CHECK-NEXT: %y:vr = COPY %x
340+
; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
341+
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
342+
%y:vr = COPY %x
343+
%z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
344+
...
345+
---
346+
name: copy_multiple_users
347+
body: |
348+
bb.0:
349+
; CHECK-LABEL: name: copy_multiple_users
350+
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
351+
; CHECK-NEXT: %y:vr = COPY %x
352+
; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
353+
; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
354+
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
355+
%y:vr = COPY %x
356+
%z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
357+
%z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
358+
...
359+
---
360+
name: copy_user_invalid_sew
361+
body: |
362+
bb.0:
363+
; CHECK-LABEL: name: copy_user_invalid_sew
364+
; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
365+
; CHECK-NEXT: %y:vr = COPY %x
366+
; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
367+
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
368+
%y:vr = COPY %x
369+
%z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
370+
...

0 commit comments

Comments
 (0)