Skip to content

Commit 0f615fb

Browse files
authored
[AMDGPU] Simplify GCNRewritePartialRegUses pass. (#135199)
Cleanup some leftover hacks that existed before #67245, more details in #69957.
1 parent 33af951 commit 0f615fb

File tree

1 file changed

+37
-86
lines changed

1 file changed

+37
-86
lines changed

llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp

Lines changed: 37 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,8 @@ class GCNRewritePartialRegUsesImpl {
5656
/// size. Return true if the change has been made.
5757
bool rewriteReg(Register Reg) const;
5858

59-
/// Value type for SubRegMap below.
60-
struct SubRegInfo {
61-
/// Register class required to hold the value stored in the SubReg.
62-
const TargetRegisterClass *RC;
63-
64-
/// Index for the right-shifted subregister. If 0 this is the "covering"
65-
/// subreg i.e. subreg that covers all others. Covering subreg becomes the
66-
/// whole register after the replacement.
67-
unsigned SubReg = AMDGPU::NoSubRegister;
68-
SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {}
69-
};
70-
71-
/// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
72-
using SubRegMap = SmallDenseMap<unsigned, SubRegInfo>;
59+
/// Map OldSubReg -> NewSubReg. Used as in/out container.
60+
using SubRegMap = SmallDenseMap<unsigned, unsigned>;
7361

7462
/// Given register class RC and the set of used subregs as keys in the SubRegs
7563
/// map return new register class and indexes of right-shifted subregs as
@@ -78,24 +66,22 @@ class GCNRewritePartialRegUsesImpl {
7866
const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
7967
SubRegMap &SubRegs) const;
8068

81-
/// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
69+
/// Given regclass RC and pairs of [OldSubReg, NewSubReg] in SubRegs try to
8270
/// find new regclass such that:
8371
/// 1. It has subregs obtained by shifting each OldSubReg by RShift number
8472
/// of bits to the right. Every "shifted" subreg should have the same
8573
/// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
8674
/// all other subregs in pairs. Basically such subreg becomes a whole
8775
/// register.
88-
/// 2. Resulting register class contains registers of minimal size but not
89-
/// less than RegNumBits.
76+
/// 2. Resulting register class contains registers of minimal size.
9077
///
91-
/// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
78+
/// SubRegs is map of OldSubReg -> NewSubReg and is used as in/out
9279
/// parameter:
9380
/// OldSubReg - input parameter,
94-
/// SubRegRC - input parameter (cannot be null),
9581
/// NewSubReg - output, contains shifted subregs on return.
9682
const TargetRegisterClass *
9783
getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
98-
unsigned RegNumBits, unsigned CoverSubregIdx,
84+
unsigned CoverSubregIdx,
9985
SubRegMap &SubRegs) const;
10086

10187
/// Update live intervals after rewriting OldReg to NewReg with SubRegs map
@@ -105,9 +91,6 @@ class GCNRewritePartialRegUsesImpl {
10591

10692
/// Helper methods.
10793

108-
/// Return reg class expected by a MO's parent instruction for a given MO.
109-
const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const;
110-
11194
/// Find right-shifted by RShift amount version of the SubReg if it exists,
11295
/// return 0 otherwise.
11396
unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;
@@ -221,20 +204,23 @@ GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
221204

222205
const TargetRegisterClass *
223206
GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
224-
const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
225-
unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
207+
const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx,
208+
SubRegMap &SubRegs) const {
226209

227210
unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
228211
LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign
229212
<< '\n');
230213

231214
BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
232-
for (auto &[OldSubReg, SRI] : SubRegs) {
233-
auto &[SubRegRC, NewSubReg] = SRI;
234-
assert(SubRegRC);
215+
for (auto &[OldSubReg, NewSubReg] : SubRegs) {
216+
LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':');
235217

236-
LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':'
237-
<< TRI->getRegClassName(SubRegRC)
218+
auto *SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
219+
if (!SubRegRC) {
220+
LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
221+
return nullptr;
222+
}
223+
LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC)
238224
<< (SubRegRC->isAllocatable() ? "" : " not alloc")
239225
<< " -> ");
240226

@@ -266,27 +252,23 @@ GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
266252
// ClassMask is the set of all register classes such that each class is
267253
// allocatable, aligned, has all shifted subregs and each subreg has required
268254
// register class (see SubRegRC above). Now select first (that is largest)
269-
// register class with registers of minimal but not less than RegNumBits size.
270-
// We have to check register size because we may encounter classes of smaller
271-
// registers like VReg_1 in some situations.
255+
// register class with registers of minimal size.
272256
const TargetRegisterClass *MinRC = nullptr;
273257
unsigned MinNumBits = std::numeric_limits<unsigned>::max();
274258
for (unsigned ClassID : ClassMask.set_bits()) {
275259
auto *RC = TRI->getRegClass(ClassID);
276260
unsigned NumBits = TRI->getRegSizeInBits(*RC);
277-
if (NumBits < MinNumBits && NumBits >= RegNumBits) {
261+
if (NumBits < MinNumBits) {
278262
MinNumBits = NumBits;
279263
MinRC = RC;
280264
}
281-
if (MinNumBits == RegNumBits)
282-
break;
283265
}
284266
#ifndef NDEBUG
285267
if (MinRC) {
286268
assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
287-
for (auto [SubReg, SRI] : SubRegs)
288-
// Check that all registers in MinRC support SRI.SubReg subregister.
289-
assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
269+
for (auto [OldSubReg, NewSubReg] : SubRegs)
270+
// Check that all registers in MinRC support NewSubReg subregister.
271+
assert(MinRC == TRI->getSubClassWithSubReg(MinRC, NewSubReg));
290272
}
291273
#endif
292274
// There might be zero RShift - in this case we just trying to find smaller
@@ -317,8 +299,7 @@ GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
317299
// If covering subreg is found shift everything so the covering subreg would
318300
// be in the rightmost position.
319301
if (CoverSubreg != AMDGPU::NoSubRegister)
320-
return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
321-
SubRegs);
302+
return getRegClassWithShiftedSubregs(RC, Offset, CoverSubreg, SubRegs);
322303

323304
// Otherwise find subreg with maximum required alignment and shift it and all
324305
// other subregs to the rightmost possible position with respect to the
@@ -344,7 +325,7 @@ GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
344325
llvm_unreachable("misaligned subreg");
345326

346327
unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
347-
return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
328+
return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs);
348329
}
349330

350331
// Only the subrange's lanemasks of the original interval need to be modified.
@@ -390,7 +371,7 @@ void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
390371
return;
391372
}
392373

393-
if (unsigned NewSubReg = I->second.SubReg)
374+
if (unsigned NewSubReg = I->second)
394375
NewLI.createSubRangeFrom(Allocator,
395376
TRI->getSubRegIndexLaneMask(NewSubReg), SR);
396377
else // This is the covering subreg (0 index) - set it as main range.
@@ -404,53 +385,23 @@ void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
404385
LIS->removeInterval(OldReg);
405386
}
406387

407-
const TargetRegisterClass *
408-
GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const {
409-
MachineInstr *MI = MO.getParent();
410-
return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
411-
*MI->getParent()->getParent());
412-
}
413-
414388
bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
415-
auto Range = MRI->reg_nodbg_operands(Reg);
416-
if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
417-
return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
418-
}))
389+
390+
// Collect used subregs.
391+
SubRegMap SubRegs;
392+
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
393+
if (MO.getSubReg() == AMDGPU::NoSubRegister)
394+
return false; // Whole reg used.
395+
SubRegs.try_emplace(MO.getSubReg());
396+
}
397+
398+
if (SubRegs.empty())
419399
return false;
420400

421401
auto *RC = MRI->getRegClass(Reg);
422402
LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
423403
<< ':' << TRI->getRegClassName(RC) << '\n');
424404

425-
// Collect used subregs and their reg classes infered from instruction
426-
// operands.
427-
SubRegMap SubRegs;
428-
for (MachineOperand &MO : Range) {
429-
const unsigned SubReg = MO.getSubReg();
430-
assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
431-
LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':');
432-
433-
const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
434-
const TargetRegisterClass *&SubRegRC = I->second.RC;
435-
436-
if (Inserted)
437-
SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
438-
439-
if (SubRegRC) {
440-
if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
441-
LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
442-
<< TRI->getRegClassName(OpDescRC) << " = ");
443-
SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
444-
}
445-
}
446-
447-
if (!SubRegRC) {
448-
LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
449-
return false;
450-
}
451-
LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
452-
}
453-
454405
auto *NewRC = getMinSizeReg(RC, SubRegs);
455406
if (!NewRC) {
456407
LLVM_DEBUG(dbgs() << " No improvement achieved\n");
@@ -469,9 +420,9 @@ bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
469420
// TODO: create some DI shift expression?
470421
if (MO.isDebug() && MO.getSubReg() == 0)
471422
continue;
472-
unsigned SubReg = SubRegs[MO.getSubReg()].SubReg;
473-
MO.setSubReg(SubReg);
474-
if (SubReg == AMDGPU::NoSubRegister && MO.isDef())
423+
unsigned NewSubReg = SubRegs[MO.getSubReg()];
424+
MO.setSubReg(NewSubReg);
425+
if (NewSubReg == AMDGPU::NoSubRegister && MO.isDef())
475426
MO.setIsUndef(false);
476427
}
477428

0 commit comments

Comments
 (0)