Skip to content

Commit f4cf15d

Browse files
authored
[RegAllocFast] Replace UsedInInstr with vector (#96323)
A SparseSet adds an avoidable layer of indirection and possibly looping control flow. Avoid this overhead by using a vector to store UsedInInstrs and PhysRegUses. To avoid clearing the vector after every instruction, use a monotonically increasing counter. The two maps are now merged and the lowest bit indicates whether the use is relevant for the livethrough handling code only.
1 parent 131bc03 commit f4cf15d

File tree

1 file changed

+31
-20
lines changed

1 file changed

+31
-20
lines changed

llvm/lib/CodeGen/RegAllocFast.cpp

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -253,11 +253,22 @@ class RegAllocFastImpl {
253253

254254
SmallVector<MachineInstr *, 32> Coalesced;
255255

256-
using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
257-
/// Set of register units that are used in the current instruction, and so
256+
/// Track register units that are used in the current instruction, and so
258257
/// cannot be allocated.
259-
RegUnitSet UsedInInstr;
260-
RegUnitSet PhysRegUses;
258+
///
259+
/// In the first phase (tied defs/early clobber), we consider also physical
260+
/// uses, afterwards, we don't. If the lowest bit isn't set, it's a solely
261+
/// physical use (markPhysRegUsedInInstr), otherwise, it's a normal use. To
262+
/// avoid resetting the entire vector after every instruction, we track the
263+
/// instruction "generation" in the remaining 31 bits -- this means, that if
264+
/// UsedInInstr[Idx] < InstrGen, the register unit is unused. InstrGen is
265+
/// never zero and always incremented by two.
266+
///
267+
/// Don't allocate inline storage: the number of register units is typically
268+
/// quite large (e.g., AArch64 > 100, X86 > 200, AMDGPU > 1000).
269+
uint32_t InstrGen;
270+
SmallVector<unsigned, 0> UsedInInstr;
271+
261272
SmallVector<unsigned, 8> DefOperandIndexes;
262273
// Register masks attached to the current instruction.
263274
SmallVector<const uint32_t *> RegMasks;
@@ -271,7 +282,7 @@ class RegAllocFastImpl {
271282
/// Mark a physreg as used in this instruction.
272283
void markRegUsedInInstr(MCPhysReg PhysReg) {
273284
for (MCRegUnit Unit : TRI->regunits(PhysReg))
274-
UsedInInstr.insert(Unit);
285+
UsedInInstr[Unit] = InstrGen | 1;
275286
}
276287

277288
// Check if physreg is clobbered by instruction's regmask(s).
@@ -285,26 +296,25 @@ class RegAllocFastImpl {
285296
bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
286297
if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
287298
return true;
288-
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
289-
if (UsedInInstr.count(Unit))
290-
return true;
291-
if (LookAtPhysRegUses && PhysRegUses.count(Unit))
299+
for (MCRegUnit Unit : TRI->regunits(PhysReg))
300+
if (UsedInInstr[Unit] >= (InstrGen | !LookAtPhysRegUses))
292301
return true;
293-
}
294302
return false;
295303
}
296304

297305
/// Mark physical register as being used in a register use operand.
298306
/// This is only used by the special livethrough handling code.
299307
void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
300-
for (MCRegUnit Unit : TRI->regunits(PhysReg))
301-
PhysRegUses.insert(Unit);
308+
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
309+
assert(UsedInInstr[Unit] <= InstrGen && "non-phys use before phys use?");
310+
UsedInInstr[Unit] = InstrGen;
311+
}
302312
}
303313

304314
/// Remove mark of physical register being used in the instruction.
305315
void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
306316
for (MCRegUnit Unit : TRI->regunits(PhysReg))
307-
UsedInInstr.erase(Unit);
317+
UsedInInstr[Unit] = 0;
308318
}
309319

310320
enum : unsigned {
@@ -1382,7 +1392,12 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
13821392
// - The "free def operands" step has to come last instead of first for tied
13831393
// operands and early-clobbers.
13841394

1385-
UsedInInstr.clear();
1395+
InstrGen += 2;
1396+
// In the event we ever get more than 2**31 instructions...
1397+
if (LLVM_UNLIKELY(InstrGen == 0)) {
1398+
UsedInInstr.assign(UsedInInstr.size(), 0);
1399+
InstrGen = 2;
1400+
}
13861401
RegMasks.clear();
13871402
BundleVirtRegsMap.clear();
13881403

@@ -1443,8 +1458,6 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
14431458
// heuristic to figure out a good operand order before doing
14441459
// assignments.
14451460
if (NeedToAssignLiveThroughs) {
1446-
PhysRegUses.clear();
1447-
14481461
while (ReArrangedImplicitOps) {
14491462
ReArrangedImplicitOps = false;
14501463
findAndSortDefOperandIndexes(MI);
@@ -1769,10 +1782,8 @@ bool RegAllocFastImpl::runOnMachineFunction(MachineFunction &MF) {
17691782
MRI->freezeReservedRegs();
17701783
RegClassInfo.runOnMachineFunction(MF);
17711784
unsigned NumRegUnits = TRI->getNumRegUnits();
1772-
UsedInInstr.clear();
1773-
UsedInInstr.setUniverse(NumRegUnits);
1774-
PhysRegUses.clear();
1775-
PhysRegUses.setUniverse(NumRegUnits);
1785+
InstrGen = 0;
1786+
UsedInInstr.assign(NumRegUnits, 0);
17761787

17771788
// initialize the virtual->physical register map to have a 'null'
17781789
// mapping for all virtual registers

0 commit comments

Comments
 (0)