Skip to content

Commit 0d87bb7

Browse files
[RISCV][VLOPT] Allow propogation even when VL isn't VLMAX
The original goal of this pass was to focus on vector operations with VLMAX. However, users often utilize only part of the result, and such usage may come from the vectorizer. We found that relaxing this constraint can capture more optimization opportunities, such as non-power-of-2 code generation and vector operation sequences with different VLs.t show --------- Co-authored-by: Kito Cheng <[email protected]>
1 parent 8a98642 commit 0d87bb7

File tree

2 files changed

+126
-38
lines changed

2 files changed

+126
-38
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 95 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,44 @@ using namespace llvm;
3131

3232
namespace {
3333

34+
struct VLInfo {
35+
VLInfo(const MachineOperand &VLOp) {
36+
IsImm = VLOp.isImm();
37+
if (IsImm)
38+
Imm = VLOp.getImm();
39+
else
40+
Reg = VLOp.getReg();
41+
}
42+
43+
Register Reg;
44+
int64_t Imm;
45+
bool IsImm;
46+
47+
bool isCompatible(const MachineOperand &VLOp) const {
48+
if (IsImm != VLOp.isImm())
49+
return false;
50+
if (IsImm)
51+
return Imm == VLOp.getImm();
52+
return Reg == VLOp.getReg();
53+
}
54+
55+
bool isValid() const { return IsImm || Reg.isVirtual(); }
56+
57+
bool hasBenefit(const MachineOperand &VLOp) const {
58+
if (IsImm && Imm == RISCV::VLMaxSentinel)
59+
return false;
60+
61+
if (!IsImm || !VLOp.isImm())
62+
return true;
63+
64+
if (VLOp.getImm() == RISCV::VLMaxSentinel)
65+
return true;
66+
67+
// No benefit if the current VL is already smaller than the new one.
68+
return Imm < VLOp.getImm();
69+
}
70+
};
71+
3472
class RISCVVLOptimizer : public MachineFunctionPass {
3573
const MachineRegisterInfo *MRI;
3674
const MachineDominatorTree *MDT;
@@ -51,7 +89,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
5189
StringRef getPassName() const override { return PASS_NAME; }
5290

5391
private:
54-
bool checkUsers(std::optional<Register> &CommonVL, MachineInstr &MI);
92+
bool checkUsers(std::optional<VLInfo> &CommonVL, MachineInstr &MI);
5593
bool tryReduceVL(MachineInstr &MI);
5694
bool isCandidate(const MachineInstr &MI) const;
5795
};
@@ -643,8 +681,34 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
643681

644682
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
645683
const MachineOperand &VLOp = MI.getOperand(VLOpNum);
646-
if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel)
684+
if (((VLOp.isImm() && VLOp.getImm() != RISCV::VLMaxSentinel) ||
685+
VLOp.isReg())) {
686+
bool UseTAPolicy = false;
687+
bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
688+
if (RISCVII::hasVecPolicyOp(Desc.TSFlags)) {
689+
unsigned PolicyOpNum = RISCVII::getVecPolicyOpNum(Desc);
690+
const MachineOperand &PolicyOp = MI.getOperand(PolicyOpNum);
691+
uint64_t Policy = PolicyOp.getImm();
692+
UseTAPolicy = (Policy & RISCVII::TAIL_AGNOSTIC) == RISCVII::TAIL_AGNOSTIC;
693+
if (HasPassthru) {
694+
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
695+
UseTAPolicy = UseTAPolicy || (MI.getOperand(PassthruOpIdx).getReg() ==
696+
RISCV::NoRegister);
697+
}
698+
}
699+
if (!UseTAPolicy) {
700+
LLVM_DEBUG(
701+
dbgs() << " Not a candidate because it uses tail-undisturbed policy"
702+
" with non-VLMAX VL\n");
703+
return false;
704+
}
705+
}
706+
707+
// If the VL is 1, then there is no need to reduce it.
708+
if (VLOp.isImm() && VLOp.getImm() == 1) {
709+
LLVM_DEBUG(dbgs() << " Not a candidate because VL is already 1\n");
647710
return false;
711+
}
648712

649713
// Some instructions that produce vectors have semantics that make it more
650714
// difficult to determine whether the VL can be reduced. For example, some
@@ -667,7 +731,7 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
667731
return true;
668732
}
669733

670-
bool RISCVVLOptimizer::checkUsers(std::optional<Register> &CommonVL,
734+
bool RISCVVLOptimizer::checkUsers(std::optional<VLInfo> &CommonVL,
671735
MachineInstr &MI) {
672736
// FIXME: Avoid visiting each user for each time we visit something on the
673737
// worklist, combined with an extra visit from the outer loop. Restructure
@@ -721,8 +785,9 @@ bool RISCVVLOptimizer::checkUsers(std::optional<Register> &CommonVL,
721785
}
722786

723787
if (!CommonVL) {
724-
CommonVL = VLOp.getReg();
725-
} else if (*CommonVL != VLOp.getReg()) {
788+
CommonVL = VLInfo(VLOp);
789+
LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n");
790+
} else if (!CommonVL->isCompatible(VLOp)) {
726791
LLVM_DEBUG(dbgs() << " Abort because users have different VL\n");
727792
CanReduceVL = false;
728793
break;
@@ -759,29 +824,43 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
759824
MachineInstr &MI = *Worklist.pop_back_val();
760825
LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
761826

762-
std::optional<Register> CommonVL;
827+
std::optional<VLInfo> CommonVL;
763828
bool CanReduceVL = true;
764829
if (isVectorRegClass(MI.getOperand(0).getReg(), MRI))
765830
CanReduceVL = checkUsers(CommonVL, MI);
766831

767832
if (!CanReduceVL || !CommonVL)
768833
continue;
769834

770-
if (!CommonVL->isVirtual()) {
771-
LLVM_DEBUG(
772-
dbgs() << " Abort due to new VL is not virtual register.\n");
835+
if (!CommonVL->isValid()) {
836+
LLVM_DEBUG(dbgs() << " Abort due to common VL is not valid.\n");
773837
continue;
774838
}
775839

776-
const MachineInstr *VLMI = MRI->getVRegDef(*CommonVL);
777-
if (!MDT->dominates(VLMI, &MI))
778-
continue;
779-
780-
// All our checks passed. We can reduce VL.
781-
LLVM_DEBUG(dbgs() << " Reducing VL for: " << MI << "\n");
782840
unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
783841
MachineOperand &VLOp = MI.getOperand(VLOpNum);
784-
VLOp.ChangeToRegister(*CommonVL, false);
842+
843+
if (!CommonVL->hasBenefit(VLOp)) {
844+
LLVM_DEBUG(dbgs() << " Abort due to no benefit.\n");
845+
continue;
846+
}
847+
848+
if (CommonVL->IsImm) {
849+
LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
850+
<< CommonVL->Imm << " for " << MI << "\n");
851+
VLOp.ChangeToImmediate(CommonVL->Imm);
852+
} else {
853+
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->Reg);
854+
if (!MDT->dominates(VLMI, &MI))
855+
continue;
856+
LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
857+
<< printReg(CommonVL->Reg, MRI->getTargetRegisterInfo())
858+
<< " for " << MI << "\n");
859+
860+
// All our checks passed. We can reduce VL.
861+
VLOp.ChangeToRegister(CommonVL->Reg, false);
862+
}
863+
785864
MadeChange = true;
786865

787866
// Now add all inputs to this instruction to the worklist.

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | \
33
; RUN: FileCheck %s -check-prefixes=CHECK,NOVLOPT
44
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | \
@@ -23,7 +23,7 @@ define <vscale x 4 x i32> @different_imm_vl_with_ta(<vscale x 4 x i32> %passthru
2323
ret <vscale x 4 x i32> %w
2424
}
2525

26-
; No benificial to propagate VL since VL is larger in the use side.
26+
; Not beneficial to propagate VL since VL is larger in the use side.
2727
define <vscale x 4 x i32> @different_imm_vl_with_ta_larger_vl(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
2828
; CHECK-LABEL: different_imm_vl_with_ta_larger_vl:
2929
; CHECK: # %bb.0:
@@ -38,20 +38,26 @@ define <vscale x 4 x i32> @different_imm_vl_with_ta_larger_vl(<vscale x 4 x i32>
3838
}
3939

4040
define <vscale x 4 x i32> @different_imm_reg_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
41-
; CHECK-LABEL: different_imm_reg_vl_with_ta:
42-
; CHECK: # %bb.0:
43-
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
44-
; CHECK-NEXT: vadd.vv v8, v10, v12
45-
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
46-
; CHECK-NEXT: vadd.vv v8, v8, v10
47-
; CHECK-NEXT: ret
41+
; NOVLOPT-LABEL: different_imm_reg_vl_with_ta:
42+
; NOVLOPT: # %bb.0:
43+
; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma
44+
; NOVLOPT-NEXT: vadd.vv v8, v10, v12
45+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
46+
; NOVLOPT-NEXT: vadd.vv v8, v8, v10
47+
; NOVLOPT-NEXT: ret
48+
;
49+
; VLOPT-LABEL: different_imm_reg_vl_with_ta:
50+
; VLOPT: # %bb.0:
51+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
52+
; VLOPT-NEXT: vadd.vv v8, v10, v12
53+
; VLOPT-NEXT: vadd.vv v8, v8, v10
54+
; VLOPT-NEXT: ret
4855
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
4956
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
5057
ret <vscale x 4 x i32> %w
5158
}
5259

53-
54-
; No benificial to propagate VL since VL is already one.
60+
; Not beneficial to propagate VL since VL is already one.
5561
define <vscale x 4 x i32> @different_imm_vl_with_ta_1(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
5662
; CHECK-LABEL: different_imm_vl_with_ta_1:
5763
; CHECK: # %bb.0:
@@ -69,13 +75,20 @@ define <vscale x 4 x i32> @different_imm_vl_with_ta_1(<vscale x 4 x i32> %passth
6975
; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are
7076
; undefined value.
7177
define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
72-
; CHECK-LABEL: different_vl_with_ta:
73-
; CHECK: # %bb.0:
74-
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
75-
; CHECK-NEXT: vadd.vv v10, v8, v10
76-
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
77-
; CHECK-NEXT: vadd.vv v8, v10, v8
78-
; CHECK-NEXT: ret
78+
; NOVLOPT-LABEL: different_vl_with_ta:
79+
; NOVLOPT: # %bb.0:
80+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
81+
; NOVLOPT-NEXT: vadd.vv v10, v8, v10
82+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
83+
; NOVLOPT-NEXT: vadd.vv v8, v10, v8
84+
; NOVLOPT-NEXT: ret
85+
;
86+
; VLOPT-LABEL: different_vl_with_ta:
87+
; VLOPT: # %bb.0:
88+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
89+
; VLOPT-NEXT: vadd.vv v10, v8, v10
90+
; VLOPT-NEXT: vadd.vv v8, v10, v8
91+
; VLOPT-NEXT: ret
7992
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
8093
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
8194
ret <vscale x 4 x i32> %w
@@ -110,7 +123,3 @@ define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru
110123
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
111124
ret <vscale x 4 x i32> %w
112125
}
113-
114-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
115-
; NOVLOPT: {{.*}}
116-
; VLOPT: {{.*}}

0 commit comments

Comments
 (0)