-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][VLOPT] Peek through copies in checkUsers #127656
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Currently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist. This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesCurrently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist. This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away. Full diff: https://github.com/llvm/llvm-project/pull/127656.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 1ba7f0b522a2b..67024d720ccfa 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1311,9 +1311,24 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
std::optional<MachineOperand> CommonVL;
- for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
+ SmallSetVector<MachineOperand *, 8> Worklist;
+ for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
+ Worklist.insert(&UserOp);
+
+ while (!Worklist.empty()) {
+ MachineOperand &UserOp = *Worklist.pop_back_val();
const MachineInstr &UserMI = *UserOp.getParent();
LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
+
+ if (UserMI.isCopy() && UserMI.getOperand(0).getReg().isVirtual() &&
+ UserMI.getOperand(0).getSubReg() == RISCV::NoSubRegister &&
+ UserMI.getOperand(1).getSubReg() == RISCV::NoSubRegister) {
+ LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n");
+ for (auto &CopyUse : MRI->use_operands(UserMI.getOperand(0).getReg()))
+ Worklist.insert(&CopyUse);
+ continue;
+ }
+
if (mayReadPastVL(UserMI)) {
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
return std::nullopt;
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 403cc0eb9dce1..c249b3c5376fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -901,9 +901,9 @@ define void @test_dag_loop() {
; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; CHECK-NEXT: vmseq.vv v0, v12, v8
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetivli zero, 0, e16, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu
; CHECK-NEXT: vle16.v v8, (zero), v0.t
; CHECK-NEXT: vse16.v v8, (zero)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index f0b05d2420b1a..823c2bbd0c968 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -217,3 +217,36 @@ define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b,
call void @llvm.riscv.vse(<vscale x 4 x i32> %3, ptr %p, iXLen %vl)
ret void
}
+
+; This function has a copy between two vrm2 virtual registers, make sure we can
+; reduce vl between it.
+define void @fadd_fcmp_select_copy(<vscale x 4 x float> %v, <vscale x 4 x i1> %c, ptr %p, iXLen %vl) {
+; NOVLOPT-LABEL: fadd_fcmp_select_copy:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v8
+; NOVLOPT-NEXT: fmv.w.x fa5, zero
+; NOVLOPT-NEXT: vmflt.vf v10, v8, fa5
+; NOVLOPT-NEXT: vmand.mm v10, v0, v10
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vse32.v v8, (a0)
+; NOVLOPT-NEXT: vsm.v v10, (a0)
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: fadd_fcmp_select_copy:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v8
+; VLOPT-NEXT: fmv.w.x fa5, zero
+; VLOPT-NEXT: vmflt.vf v10, v8, fa5
+; VLOPT-NEXT: vmand.mm v10, v0, v10
+; VLOPT-NEXT: vse32.v v8, (a0)
+; VLOPT-NEXT: vsm.v v10, (a0)
+; VLOPT-NEXT: ret
+ %fadd = fadd <vscale x 4 x float> %v, %v
+ %fcmp = fcmp olt <vscale x 4 x float> %fadd, zeroinitializer
+ %select = select <vscale x 4 x i1> %c, <vscale x 4 x i1> %fcmp, <vscale x 4 x i1> zeroinitializer
+ call void @llvm.riscv.vse(<vscale x 4 x float> %fadd, ptr %p, iXLen %vl)
+ call void @llvm.riscv.vsm(<vscale x 4 x i1> %select, ptr %p, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 0475a988e9851..abf4faa59a98e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -290,3 +290,41 @@ body: |
%x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
...
+---
+name: copy
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vr = COPY %x
+ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ %y:vr = COPY %x
+ %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+...
+---
+name: copy_multiple_users
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy_multiple_users
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vr = COPY %x
+ ; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
+ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ %y:vr = COPY %x
+ %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
+...
+---
+name: copy_user_invalid_sew
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy_user_invalid_sew
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vr = COPY %x
+ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
+ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ %y:vr = COPY %x
+ %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
+...
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Currently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist.
This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away.