-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RFC][MC] Cache MCRegAliasIterator #93510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -187,6 +187,9 @@ class MCRegisterInfo { | |
DenseMap<MCRegister, int> L2SEHRegs; // LLVM to SEH regs mapping | ||
DenseMap<MCRegister, int> L2CVRegs; // LLVM to CV regs mapping | ||
|
||
mutable std::vector<std::vector<MCPhysReg>> RegAliasesCache; | ||
ArrayRef<MCPhysReg> getCachedAliasesOf(MCPhysReg R) const; | ||
|
||
/// Iterator class that can traverse the differentially encoded values in | ||
/// DiffLists. Don't use this class directly, use one of the adaptors below. | ||
class DiffListIterator | ||
|
@@ -263,6 +266,7 @@ class MCRegisterInfo { | |
friend class MCRegUnitIterator; | ||
friend class MCRegUnitMaskIterator; | ||
friend class MCRegUnitRootIterator; | ||
friend class MCRegAliasIterator; | ||
|
||
/// Initialize MCRegisterInfo, called by TableGen | ||
/// auto-generated routines. *DO NOT USE*. | ||
|
@@ -298,6 +302,8 @@ class MCRegisterInfo { | |
EHDwarf2LRegsSize = 0; | ||
Dwarf2LRegs = nullptr; | ||
Dwarf2LRegsSize = 0; | ||
|
||
RegAliasesCache.resize(NumRegs); | ||
} | ||
|
||
/// Used to initialize LLVM register to Dwarf | ||
|
@@ -723,63 +729,30 @@ class MCRegUnitRootIterator { | |
} | ||
}; | ||
|
||
/// MCRegAliasIterator enumerates all registers aliasing Reg. If IncludeSelf is | ||
/// set, Reg itself is included in the list. This iterator does not guarantee | ||
/// any ordering or that entries are unique. | ||
/// MCRegAliasIterator enumerates all registers aliasing Reg. | ||
class MCRegAliasIterator { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if I should move the old implementation to a It makes things a bit cleaner (and makes the caching system easier to remove someday), but makes it very difficult to add a cache "killswitch" if we feel like one is necessary (like if we don't want caching on all targets). |
||
private: | ||
MCRegister Reg; | ||
const MCRegisterInfo *MCRI; | ||
bool IncludeSelf; | ||
|
||
MCRegUnitIterator RI; | ||
MCRegUnitRootIterator RRI; | ||
MCSuperRegIterator SI; | ||
const MCPhysReg *It = nullptr; | ||
const MCPhysReg *End = nullptr; | ||
|
||
public: | ||
MCRegAliasIterator(MCRegister Reg, const MCRegisterInfo *MCRI, | ||
bool IncludeSelf) | ||
: Reg(Reg), MCRI(MCRI), IncludeSelf(IncludeSelf) { | ||
// Initialize the iterators. | ||
for (RI = MCRegUnitIterator(Reg, MCRI); RI.isValid(); ++RI) { | ||
for (RRI = MCRegUnitRootIterator(*RI, MCRI); RRI.isValid(); ++RRI) { | ||
for (SI = MCSuperRegIterator(*RRI, MCRI, true); SI.isValid(); ++SI) { | ||
if (!(!IncludeSelf && Reg == *SI)) | ||
return; | ||
} | ||
} | ||
} | ||
} | ||
|
||
bool isValid() const { return RI.isValid(); } | ||
|
||
MCRegister operator*() const { | ||
assert(SI.isValid() && "Cannot dereference an invalid iterator."); | ||
return *SI; | ||
bool IncludeSelf) { | ||
ArrayRef<MCPhysReg> Cache = MCRI->getCachedAliasesOf(Reg); | ||
assert(Cache.back() == Reg); | ||
It = Cache.begin(); | ||
End = Cache.end(); | ||
if (!IncludeSelf) | ||
--End; | ||
} | ||
|
||
void advance() { | ||
// Assuming SI is valid. | ||
++SI; | ||
if (SI.isValid()) return; | ||
|
||
++RRI; | ||
if (RRI.isValid()) { | ||
SI = MCSuperRegIterator(*RRI, MCRI, true); | ||
return; | ||
} | ||
bool isValid() const { return It != End; } | ||
|
||
++RI; | ||
if (RI.isValid()) { | ||
RRI = MCRegUnitRootIterator(*RI, MCRI); | ||
SI = MCSuperRegIterator(*RRI, MCRI, true); | ||
} | ||
} | ||
MCRegister operator*() const { return *It; } | ||
|
||
MCRegAliasIterator &operator++() { | ||
assert(isValid() && "Cannot move off the end of the list."); | ||
do advance(); | ||
while (!IncludeSelf && isValid() && *SI == Reg); | ||
++It; | ||
return *this; | ||
} | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: I wanted to use a vector of
unique_ptr
but it makes it too awkward to use. I can't even usereserve
in that case, so I didn't do that indirection in the end.The vector is resized exactly once upon init, and each vector is 3 pointers, so for AMDGPU we have 9000*24 bytes so about 216KB for this map, which is worth it IMO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can reduce the size by using
SmallVector<, 0>
instead ofstd::vector
. (Using SmallVector is generally always preferred in LLVM, even if you don't use the inline storage.)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could use std::optional in place of unique_ptr.
Sentinel looks fine, too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nikic SmallVector is 32 bytes, it has one more pointer for some reason.
Though it means I can use
SmallVector<MCPhysReg, 4>
and store small alias sets inline, so I think it's worth the size increase.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, but SmallVector doesn't have a shrink_to_fit, so it may end up using quite a bit more memory. I don't think it's worth it then.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That doesn't sound right. SmallVector should be 16 bytes large if the element type is 4 bytes... uh, I just saw that MCPhyReg is only 2 bytes, which increases SmallVector size to 24 bytes. And of course there's no way to disable that heuristic. So yeah, SmallVector won't make things smaller here (but it also shouldn't be 32 bytes either...)
But this also means that you allocate those 4 inline elements for all of the registers that are not actually queried. It's probably not worthwhile in this context.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does it get bigger when the element type gets smaller? Is it worth fixing that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is because
SmallVector<char>
sometimes needs to hold more than 4GB of data. Use of 64-bit sizes should probably be limited to just that case and not also extend toSmallVector<uint16_t>
...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#95536