Skip to content

Commit ac25c42

Browse files
jrbyrnesbcahoon
authored andcommitted
[AMDGPU] Constrain use LiveMask by the operand's LaneMask for RP calculation.
For speculative RP queries, recede may calculate inaccurate masks for subreg uses. Previously, the calculation would look at any live lane for the use at the position of the MI in the LIS. This also adds lanes for any subregs which are live at but not used by the instruction. By constraining against the getSubRegIndexLaneMask for the operand's subreg, we are sure to not pick up on these extra lanes. For current clients of recede, this is not an issue. This is because 1. the current clients do not violate the program order in the LIS, and 2. the change to RP is based on the difference between previous mask and new mask. Since current clients are not exposed to this issue, this patch is sort of NFC. Co-authored-by: Valery Pykhtin [email protected] Change-Id: Iaed80271226b2587297e6fb78fe081afec1a9275 (cherry picked from commit 5cb6b15)
1 parent 1ff3a83 commit ac25c42

File tree

2 files changed

+40
-25
lines changed

2 files changed

+40
-25
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -258,33 +258,40 @@ static void
258258
collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
259259
const MachineInstr &MI, const LiveIntervals &LIS,
260260
const MachineRegisterInfo &MRI) {
261-
SlotIndex InstrSI;
261+
262+
auto &TRI = *MRI.getTargetRegisterInfo();
262263
for (const auto &MO : MI.operands()) {
263264
if (!MO.isReg() || !MO.getReg().isVirtual())
264265
continue;
265266
if (!MO.isUse() || !MO.readsReg())
266267
continue;
267268

268269
Register Reg = MO.getReg();
269-
if (llvm::any_of(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
270-
return RM.RegUnit == Reg;
271-
}))
272-
continue;
270+
auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
271+
return RM.RegUnit == Reg;
272+
});
273+
274+
auto &P = I == RegMaskPairs.end()
275+
? RegMaskPairs.emplace_back(Reg, LaneBitmask::getNone())
276+
: *I;
273277

274-
LaneBitmask UseMask;
275-
auto &LI = LIS.getInterval(Reg);
278+
P.LaneMask |= MO.getSubReg() ? TRI.getSubRegIndexLaneMask(MO.getSubReg())
279+
: MRI.getMaxLaneMaskForVReg(Reg);
280+
}
281+
282+
SlotIndex InstrSI;
283+
for (auto &P : RegMaskPairs) {
284+
auto &LI = LIS.getInterval(P.RegUnit);
276285
if (!LI.hasSubRanges())
277-
UseMask = MRI.getMaxLaneMaskForVReg(Reg);
278-
else {
279-
// For a tentative schedule LIS isn't updated yet but livemask should
280-
// remain the same on any schedule. Subreg defs can be reordered but they
281-
// all must dominate uses anyway.
282-
if (!InstrSI)
283-
InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
284-
UseMask = getLiveLaneMask(LI, InstrSI, MRI);
285-
}
286+
continue;
287+
288+
// For a tentative schedule LIS isn't updated yet but livemask should
289+
// remain the same on any schedule. Subreg defs can be reordered but they
290+
// all must dominate uses anyway.
291+
if (!InstrSI)
292+
InstrSI = LIS.getInstructionIndex(MI).getBaseIndex();
286293

287-
RegMaskPairs.emplace_back(Reg, UseMask);
294+
P.LaneMask = getLiveLaneMask(LI, InstrSI, MRI, P.LaneMask);
288295
}
289296
}
290297

@@ -293,22 +300,25 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
293300

294301
LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
295302
const LiveIntervals &LIS,
296-
const MachineRegisterInfo &MRI) {
297-
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI);
303+
const MachineRegisterInfo &MRI,
304+
LaneBitmask LaneMaskFilter) {
305+
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
298306
}
299307

300308
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
301-
const MachineRegisterInfo &MRI) {
309+
const MachineRegisterInfo &MRI,
310+
LaneBitmask LaneMaskFilter) {
302311
LaneBitmask LiveMask;
303312
if (LI.hasSubRanges()) {
304313
for (const auto &S : LI.subranges())
305-
if (S.liveAt(SI)) {
314+
if ((S.LaneMask & LaneMaskFilter).any() && S.liveAt(SI)) {
306315
LiveMask |= S.LaneMask;
307316
assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
308317
}
309318
} else if (LI.liveAt(SI)) {
310319
LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
311320
}
321+
LiveMask &= LaneMaskFilter;
312322
return LiveMask;
313323
}
314324

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,13 +265,18 @@ class GCNDownwardRPTracker : public GCNRPTracker {
265265
const LiveRegSet *LiveRegsCopy = nullptr);
266266
};
267267

268-
LaneBitmask getLiveLaneMask(unsigned Reg,
269-
SlotIndex SI,
268+
/// \returns the LaneMask of live lanes of \p Reg at position \p SI. Only the
269+
/// active lanes of \p LaneMaskFilter will be set in the return value. This is
270+
/// used, for example, to limit the live lanes to a specific subreg when
271+
/// calculating use masks.
272+
LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI,
270273
const LiveIntervals &LIS,
271-
const MachineRegisterInfo &MRI);
274+
const MachineRegisterInfo &MRI,
275+
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
272276

273277
LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
274-
const MachineRegisterInfo &MRI);
278+
const MachineRegisterInfo &MRI,
279+
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
275280

276281
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
277282
const MachineRegisterInfo &MRI);

0 commit comments

Comments
 (0)