Skip to content

Commit 6e749f5

Browse files
authored
[RISCV] Adjust RISCVVectorMaskDAGMutation to look for copy to V0 (llvm#129296)
This mutation was introduced in 01a15dc with the goal of avoiding many copies from V1-v31 to v0 immediately before a mask consuming instruction. I noticed in a workload that this was not applying to vmv.s.x (which we use for short vector masks). We'd had a whitelist of instructions. Instead, we can directly inspect the user of the current node to see if it's a copy to V0. This isn't quite precise (as the mask producing instruction could already be scheduled fairly far from it's single use), but is probably good enough. As with all schedule changes, results are mixed. Some significant improvements, some regressions.
1 parent f39e81e commit 6e749f5

23 files changed

+939
-1086
lines changed

llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp

Lines changed: 21 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,8 @@
1313
// The reason why we need to do this:
1414
// 1. When tracking register pressure, we don't track physical registers.
1515
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
16-
// use it in most RVV pseudos (only used in inline asm constraint and add/sub
17-
// with carry instructions). Instead, we use physical register V0 directly
18-
// and insert a `$v0 = COPY ...` before the use. And, there is a fundamental
19-
// issue in register allocator when handling RegisterClass with only one
20-
// physical register, so we can't simply replace V0 with VMV0.
16+
// use it by the time we reach scheduling. Instead, we use physical
17+
// register V0 directly and insert a `$v0 = COPY ...` before the use.
2118
// 3. For mask producers, we are using VR RegisterClass (we can allocate V0-V31
2219
// to it). So if V0 is not available, there are still 31 available registers
2320
// out there.
@@ -43,66 +40,24 @@
4340

4441
namespace llvm {
4542

46-
static inline bool isVectorMaskProducer(const MachineInstr *MI) {
47-
switch (RISCV::getRVVMCOpcode(MI->getOpcode())) {
48-
// Vector Mask Instructions
49-
case RISCV::VMAND_MM:
50-
case RISCV::VMNAND_MM:
51-
case RISCV::VMANDN_MM:
52-
case RISCV::VMXOR_MM:
53-
case RISCV::VMOR_MM:
54-
case RISCV::VMNOR_MM:
55-
case RISCV::VMORN_MM:
56-
case RISCV::VMXNOR_MM:
57-
case RISCV::VMSBF_M:
58-
case RISCV::VMSIF_M:
59-
case RISCV::VMSOF_M:
60-
// Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
61-
case RISCV::VMADC_VV:
62-
case RISCV::VMADC_VX:
63-
case RISCV::VMADC_VI:
64-
case RISCV::VMADC_VVM:
65-
case RISCV::VMADC_VXM:
66-
case RISCV::VMADC_VIM:
67-
case RISCV::VMSBC_VV:
68-
case RISCV::VMSBC_VX:
69-
case RISCV::VMSBC_VVM:
70-
case RISCV::VMSBC_VXM:
71-
// Vector Integer Compare Instructions
72-
case RISCV::VMSEQ_VV:
73-
case RISCV::VMSEQ_VX:
74-
case RISCV::VMSEQ_VI:
75-
case RISCV::VMSNE_VV:
76-
case RISCV::VMSNE_VX:
77-
case RISCV::VMSNE_VI:
78-
case RISCV::VMSLT_VV:
79-
case RISCV::VMSLT_VX:
80-
case RISCV::VMSLTU_VV:
81-
case RISCV::VMSLTU_VX:
82-
case RISCV::VMSLE_VV:
83-
case RISCV::VMSLE_VX:
84-
case RISCV::VMSLE_VI:
85-
case RISCV::VMSLEU_VV:
86-
case RISCV::VMSLEU_VX:
87-
case RISCV::VMSLEU_VI:
88-
case RISCV::VMSGTU_VX:
89-
case RISCV::VMSGTU_VI:
90-
case RISCV::VMSGT_VX:
91-
case RISCV::VMSGT_VI:
92-
// Vector Floating-Point Compare Instructions
93-
case RISCV::VMFEQ_VV:
94-
case RISCV::VMFEQ_VF:
95-
case RISCV::VMFNE_VV:
96-
case RISCV::VMFNE_VF:
97-
case RISCV::VMFLT_VV:
98-
case RISCV::VMFLT_VF:
99-
case RISCV::VMFLE_VV:
100-
case RISCV::VMFLE_VF:
101-
case RISCV::VMFGT_VF:
102-
case RISCV::VMFGE_VF:
103-
return true;
104-
}
105-
return false;
43+
static bool isCopyToV0(const MachineInstr &MI) {
44+
return MI.isCopy() && MI.getOperand(0).getReg() == RISCV::V0 &&
45+
MI.getOperand(1).getReg().isVirtual() &&
46+
MI.getOperand(1).getSubReg() == RISCV::NoSubRegister;
47+
}
48+
49+
static bool isSoleUseCopyToV0(SUnit &SU) {
50+
if (SU.Succs.size() != 1)
51+
return false;
52+
SDep &Dep = SU.Succs[0];
53+
// Ignore dependencies other than data or strong ordering.
54+
if (Dep.isWeak())
55+
return false;
56+
57+
SUnit &DepSU = *Dep.getSUnit();
58+
if (DepSU.isBoundaryNode())
59+
return false;
60+
return isCopyToV0(*DepSU.getInstr());
10661
}
10762

10863
class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
@@ -119,7 +74,7 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
11974
if (MI->findRegisterUseOperand(RISCV::V0, TRI))
12075
NearestUseV0SU = &SU;
12176

122-
if (NearestUseV0SU && NearestUseV0SU != &SU && isVectorMaskProducer(MI) &&
77+
if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
12378
// For LMUL=8 cases, there will be more possibilities to spill.
12479
// FIXME: We should use RegPressureTracker to do fine-grained
12580
// controls.

0 commit comments

Comments
 (0)