Skip to content

Commit 9c31155

Browse files
authored
TableGen: Optimize super-register class computation (#134865)
Inferring super-register classes naively requires checking every register class against every other register class and sub-register index. Each of those checks is itself a non-trivial operation on register sets. Culling as many (RC, RC, SubIdx) triples as possible is important for the running time of TableGen for architectures with complex sub-register relations. Use transitivity to cull many (RC, RC, SubIdx) triples. This unfortunately requires us to complete the transitive closure of super-register classes explicitly, but it still cuts down the running time on AMDGPU substantially -- in some upcoming work in the backend by more than half (in very rough measurements). This changes the names of some of the inferred register classes, since the order in which they are inferred changes. The names of the inferred register classes become shorter, which reduces the size of the generated files. Replacing some uses of SmallPtrSet by DenseSet shaves off a few more percent; there are hundreds of register classes in AMDGPU. Tweaking the topological signature check to skip reigsters without super-registers further helps skip register classes that have "pseudo" registers in them whose sub- and super-register structure is trivial.
1 parent ca53463 commit 9c31155

File tree

3 files changed

+225
-33
lines changed

3 files changed

+225
-33
lines changed

llvm/utils/TableGen/Common/CodeGenRegisters.cpp

Lines changed: 182 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "llvm/ADT/BitVector.h"
1717
#include "llvm/ADT/DenseMap.h"
1818
#include "llvm/ADT/IntEqClasses.h"
19+
#include "llvm/ADT/PointerUnion.h"
20+
#include "llvm/ADT/PostOrderIterator.h"
1921
#include "llvm/ADT/STLExtras.h"
2022
#include "llvm/ADT/SetVector.h"
2123
#include "llvm/ADT/SmallPtrSet.h"
@@ -765,7 +767,8 @@ static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
765767
CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
766768
const Record *R)
767769
: TheDef(R), Name(std::string(R->getName())),
768-
TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), TSFlags(0) {
770+
RegsWithSuperRegsTopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1),
771+
TSFlags(0) {
769772
GeneratePressureSet = R->getValueAsBit("GeneratePressureSet");
770773
std::vector<const Record *> TypeList = R->getValueAsListOfDefs("RegTypes");
771774
if (TypeList.empty())
@@ -791,7 +794,8 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
791794
const CodeGenRegister *Reg = RegBank.getReg((*Elements)[i]);
792795
Members.push_back(Reg);
793796
Artificial &= Reg->Artificial;
794-
TopoSigs.set(Reg->getTopoSig());
797+
if (!Reg->getSuperRegs().empty())
798+
RegsWithSuperRegsTopoSigs.set(Reg->getTopoSig());
795799
}
796800
sortAndUniqueRegisters(Members);
797801

@@ -849,13 +853,14 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
849853
CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
850854
StringRef Name, Key Props)
851855
: Members(*Props.Members), TheDef(nullptr), Name(std::string(Name)),
852-
TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), RSI(Props.RSI),
853-
CopyCost(0), Allocatable(true), AllocationPriority(0),
856+
RegsWithSuperRegsTopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1),
857+
RSI(Props.RSI), CopyCost(0), Allocatable(true), AllocationPriority(0),
854858
GlobalPriority(false), TSFlags(0) {
855859
Artificial = true;
856860
GeneratePressureSet = false;
857861
for (const auto R : Members) {
858-
TopoSigs.set(R->getTopoSig());
862+
if (!R->getSuperRegs().empty())
863+
RegsWithSuperRegsTopoSigs.set(R->getTopoSig());
859864
Artificial &= R->Artificial;
860865
}
861866
}
@@ -1173,6 +1178,28 @@ void CodeGenRegisterClass::buildRegUnitSet(
11731178
std::back_inserter(RegUnits));
11741179
}
11751180

1181+
// Combine our super classes of the given sub-register index with all of their
1182+
// super classes in turn.
1183+
void CodeGenRegisterClass::extendSuperRegClasses(CodeGenSubRegIndex *SubIdx) {
1184+
auto It = SuperRegClasses.find(SubIdx);
1185+
if (It == SuperRegClasses.end())
1186+
return;
1187+
1188+
SmallVector<CodeGenRegisterClass *> MidRCs;
1189+
MidRCs.insert(MidRCs.end(), It->second.begin(), It->second.end());
1190+
1191+
for (CodeGenRegisterClass *MidRC : MidRCs) {
1192+
for (auto &Pair : MidRC->SuperRegClasses) {
1193+
CodeGenSubRegIndex *ComposedSubIdx = Pair.first->compose(SubIdx);
1194+
if (!ComposedSubIdx)
1195+
continue;
1196+
1197+
for (CodeGenRegisterClass *SuperRC : Pair.second)
1198+
addSuperRegClass(ComposedSubIdx, SuperRC);
1199+
}
1200+
}
1201+
}
1202+
11761203
//===----------------------------------------------------------------------===//
11771204
// CodeGenRegisterCategory
11781205
//===----------------------------------------------------------------------===//
@@ -1290,6 +1317,8 @@ CodeGenRegBank::CodeGenRegBank(const RecordKeeper &Records,
12901317
}
12911318
}
12921319

1320+
computeSubRegIndicesRPOT();
1321+
12931322
// Native register units are associated with a leaf register. They've all been
12941323
// discovered now.
12951324
NumNativeRegUnits = RegUnits.size();
@@ -1364,20 +1393,20 @@ void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) {
13641393
}
13651394

13661395
// Create a synthetic sub-class if it is missing.
1367-
CodeGenRegisterClass *
1396+
std::pair<CodeGenRegisterClass *, bool>
13681397
CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC,
13691398
const CodeGenRegister::Vec *Members,
13701399
StringRef Name) {
13711400
// Synthetic sub-class has the same size and alignment as RC.
13721401
CodeGenRegisterClass::Key K(Members, RC->RSI);
13731402
RCKeyMap::const_iterator FoundI = Key2RC.find(K);
13741403
if (FoundI != Key2RC.end())
1375-
return FoundI->second;
1404+
return {FoundI->second, false};
13761405

13771406
// Sub-class doesn't exist, create a new one.
13781407
RegClasses.emplace_back(*this, Name, K);
13791408
addToMaps(&RegClasses.back());
1380-
return &RegClasses.back();
1409+
return {&RegClasses.back(), true};
13811410
}
13821411

13831412
CodeGenRegisterClass *CodeGenRegBank::getRegClass(const Record *Def) const {
@@ -1694,6 +1723,81 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
16941723

16951724
namespace {
16961725

1726+
// A directed graph on sub-register indices with a virtual source node that
1727+
// has an arc to all other nodes, and an arc from A to B if sub-register index
1728+
// B can be obtained by composing A with some other sub-register index.
1729+
struct SubRegIndexCompositionGraph {
1730+
std::deque<CodeGenSubRegIndex> &SubRegIndices;
1731+
CodeGenSubRegIndex::CompMap EntryNode;
1732+
1733+
SubRegIndexCompositionGraph(std::deque<CodeGenSubRegIndex> &SubRegIndices)
1734+
: SubRegIndices(SubRegIndices) {
1735+
for (CodeGenSubRegIndex &Idx : SubRegIndices) {
1736+
EntryNode.try_emplace(&Idx, &Idx);
1737+
}
1738+
}
1739+
};
1740+
1741+
} // namespace
1742+
1743+
template <> struct llvm::GraphTraits<SubRegIndexCompositionGraph> {
1744+
using NodeRef =
1745+
PointerUnion<CodeGenSubRegIndex *, const CodeGenSubRegIndex::CompMap *>;
1746+
1747+
// Using a reverse iterator causes sub-register indices to appear in their
1748+
// more natural order in RPOT.
1749+
using CompMapIt = CodeGenSubRegIndex::CompMap::const_reverse_iterator;
1750+
struct ChildIteratorType
1751+
: public iterator_adaptor_base<
1752+
ChildIteratorType, CompMapIt,
1753+
typename std::iterator_traits<CompMapIt>::iterator_category,
1754+
NodeRef> {
1755+
ChildIteratorType(CompMapIt I)
1756+
: ChildIteratorType::iterator_adaptor_base(I) {}
1757+
1758+
NodeRef operator*() const { return wrapped()->second; }
1759+
};
1760+
1761+
static NodeRef getEntryNode(const SubRegIndexCompositionGraph &G) {
1762+
return &G.EntryNode;
1763+
}
1764+
1765+
static const CodeGenSubRegIndex::CompMap *children(NodeRef N) {
1766+
if (auto *Idx = dyn_cast<CodeGenSubRegIndex *>(N))
1767+
return &Idx->getComposites();
1768+
return cast<const CodeGenSubRegIndex::CompMap *>(N);
1769+
}
1770+
1771+
static ChildIteratorType child_begin(NodeRef N) {
1772+
return ChildIteratorType(children(N)->rbegin());
1773+
}
1774+
static ChildIteratorType child_end(NodeRef N) {
1775+
return ChildIteratorType(children(N)->rend());
1776+
}
1777+
1778+
static auto nodes_begin(SubRegIndexCompositionGraph *G) {
1779+
return G->SubRegIndices.begin();
1780+
}
1781+
static auto nodes_end(SubRegIndexCompositionGraph *G) {
1782+
return G->SubRegIndices.end();
1783+
}
1784+
1785+
static unsigned size(SubRegIndexCompositionGraph *G) {
1786+
return G->SubRegIndices.size();
1787+
}
1788+
};
1789+
1790+
void CodeGenRegBank::computeSubRegIndicesRPOT() {
1791+
SubRegIndexCompositionGraph G(SubRegIndices);
1792+
ReversePostOrderTraversal<SubRegIndexCompositionGraph> RPOT(G);
1793+
for (const auto N : RPOT) {
1794+
if (auto *Idx = dyn_cast<CodeGenSubRegIndex *>(N))
1795+
SubRegIndicesRPOT.push_back(Idx);
1796+
}
1797+
}
1798+
1799+
namespace {
1800+
16971801
// UberRegSet is a helper class for computeRegUnitWeights. Each UberRegSet is
16981802
// the transitive closure of the union of overlapping register
16991803
// classes. Together, the UberRegSets form a partition of the registers. If we
@@ -2323,8 +2427,10 @@ void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
23232427
if (SubIdx.Artificial)
23242428
continue;
23252429
// This is a real subset. See if we have a matching class.
2326-
CodeGenRegisterClass *SubRC = getOrCreateSubClass(
2327-
RC, &I->second, RC->getName() + "_with_" + I->first->getName());
2430+
CodeGenRegisterClass *SubRC =
2431+
getOrCreateSubClass(RC, &I->second,
2432+
RC->getName() + "_with_" + I->first->getName())
2433+
.first;
23282434
RC->setSubClassWithSubReg(&SubIdx, SubRC);
23292435
}
23302436
}
@@ -2339,24 +2445,30 @@ void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
23392445
void CodeGenRegBank::inferMatchingSuperRegClass(
23402446
CodeGenRegisterClass *RC,
23412447
std::list<CodeGenRegisterClass>::iterator FirstSubRegRC) {
2448+
DenseSet<const CodeGenSubRegIndex *> ImpliedSubRegIndices;
23422449
std::vector<std::pair<const CodeGenRegister *, const CodeGenRegister *>>
23432450
SubToSuperRegs;
23442451
BitVector TopoSigs(getNumTopoSigs());
23452452

2346-
// Iterate in SubRegIndex numerical order to visit synthetic indices last.
2347-
for (auto &SubIdx : SubRegIndices) {
2453+
// Iterate subregister indices in topological order to visit larger indices
2454+
// first. This allows us to skip the smaller indices in many cases because
2455+
// their inferred super-register classes are implied.
2456+
for (auto *SubIdx : SubRegIndicesRPOT) {
23482457
// Skip indexes that aren't fully supported by RC's registers. This was
23492458
// computed by inferSubClassWithSubReg() above which should have been
23502459
// called first.
2351-
if (RC->getSubClassWithSubReg(&SubIdx) != RC)
2460+
if (RC->getSubClassWithSubReg(SubIdx) != RC)
2461+
continue;
2462+
2463+
if (ImpliedSubRegIndices.count(SubIdx))
23522464
continue;
23532465

23542466
// Build list of (Sub, Super) pairs for this SubIdx, sorted by Sub. Note
23552467
// that the list may contain entries with the same Sub but different Supers.
23562468
SubToSuperRegs.clear();
23572469
TopoSigs.reset();
23582470
for (const auto Super : RC->getMembers()) {
2359-
const CodeGenRegister *Sub = Super->getSubRegs().find(&SubIdx)->second;
2471+
const CodeGenRegister *Sub = Super->getSubRegs().find(SubIdx)->second;
23602472
assert(Sub && "Missing sub-register");
23612473
SubToSuperRegs.emplace_back(Sub, Super);
23622474
TopoSigs.set(Sub->getTopoSig());
@@ -2374,7 +2486,7 @@ void CodeGenRegBank::inferMatchingSuperRegClass(
23742486
if (SubRC.Artificial)
23752487
continue;
23762488
// Topological shortcut: SubRC members have the wrong shape.
2377-
if (!TopoSigs.anyCommon(SubRC.getTopoSigs()))
2489+
if (!TopoSigs.anyCommon(SubRC.getRegsWithSuperRegsTopoSigs()))
23782490
continue;
23792491
// Compute the subset of RC that maps into SubRC with a single linear scan
23802492
// through SubToSuperRegs and the members of SubRC.
@@ -2395,15 +2507,54 @@ void CodeGenRegBank::inferMatchingSuperRegClass(
23952507
// RC injects completely into SubRC.
23962508
sortAndUniqueRegisters(SubSetVec);
23972509
if (SubSetVec.size() == RC->getMembers().size()) {
2398-
SubRC.addSuperRegClass(&SubIdx, RC);
2510+
SubRC.addSuperRegClass(SubIdx, RC);
2511+
2512+
// We can skip checking subregister indices that can be composed from
2513+
// the current SubIdx.
2514+
//
2515+
// Proof sketch: Let SubRC' be another register class and SubSubIdx
2516+
// a subregister index that can be composed from SubIdx.
2517+
//
2518+
// Calling this function with SubRC in place of RC ensures the existence
2519+
// of a subclass X of SubRC with the registers that have subregisters in
2520+
// SubRC'.
2521+
//
2522+
// The set of registers in RC with SubSubIdx in SubRC' is equal to the
2523+
// set of registers in RC with SubIdx in X (because every register in
2524+
// RC has a corresponding subregister in SubRC), and so checking the
2525+
// pair (SubSubIdx, SubRC') is redundant with checking (SubIdx, X).
2526+
for (const auto &SubSubIdx : SubIdx->getComposites())
2527+
ImpliedSubRegIndices.insert(SubSubIdx.second);
2528+
23992529
continue;
24002530
}
24012531

24022532
// Only a subset of RC maps into SubRC. Make sure it is represented by a
24032533
// class.
2404-
getOrCreateSubClass(RC, &SubSetVec,
2405-
RC->getName() + "_with_" + SubIdx.getName() + "_in_" +
2406-
SubRC.getName());
2534+
//
2535+
// The name of the inferred register class follows the template
2536+
// "<RC>_with_<SubIdx>_in_<SubRC>".
2537+
//
2538+
// When SubRC is already an inferred class, prefer a name of the form
2539+
// "<RC>_with_<CompositeSubIdx>_in_<SubSubRC>" over a chain of the form
2540+
// "<RC>_with_<SubIdx>_in_<OtherRc>_with_<SubSubIdx>_in_<SubSubRC>".
2541+
CodeGenSubRegIndex *CompositeSubIdx = SubIdx;
2542+
CodeGenRegisterClass *CompositeSubRC = &SubRC;
2543+
if (CodeGenSubRegIndex *SubSubIdx = SubRC.getInferredFromSubRegIdx()) {
2544+
auto It = SubIdx->getComposites().find(SubSubIdx);
2545+
if (It != SubIdx->getComposites().end()) {
2546+
CompositeSubIdx = It->second;
2547+
CompositeSubRC = SubRC.getInferredFromRC();
2548+
}
2549+
}
2550+
2551+
auto [SubSetRC, Inserted] = getOrCreateSubClass(
2552+
RC, &SubSetVec,
2553+
RC->getName() + "_with_" + CompositeSubIdx->getName() + "_in_" +
2554+
CompositeSubRC->getName());
2555+
2556+
if (Inserted)
2557+
SubSetRC->setInferredFrom(CompositeSubIdx, CompositeSubRC);
24072558
}
24082559
}
24092560
}
@@ -2438,7 +2589,7 @@ void CodeGenRegBank::computeInferredRegisterClasses() {
24382589
inferMatchingSuperRegClass(RC);
24392590

24402591
// New register classes are created while this loop is running, and we need
2441-
// to visit all of them. I particular, inferMatchingSuperRegClass needs
2592+
// to visit all of them. In particular, inferMatchingSuperRegClass needs
24422593
// to match old super-register classes with sub-register classes created
24432594
// after inferMatchingSuperRegClass was called. At this point,
24442595
// inferMatchingSuperRegClass has checked SuperRC = [0..rci] with SubRC =
@@ -2451,6 +2602,17 @@ void CodeGenRegBank::computeInferredRegisterClasses() {
24512602
FirstNewRC = NextNewRC;
24522603
}
24532604
}
2605+
2606+
// Compute the transitive closure for super-register classes.
2607+
//
2608+
// By iterating over sub-register indices in topological order, we only ever
2609+
// add super-register classes for sub-register indices that have not already
2610+
// been visited. That allows computing the transitive closure in a single
2611+
// pass.
2612+
for (CodeGenSubRegIndex *SubIdx : SubRegIndicesRPOT) {
2613+
for (CodeGenRegisterClass &SubRC : RegClasses)
2614+
SubRC.extendSuperRegClasses(SubIdx);
2615+
}
24542616
}
24552617

24562618
/// getRegisterClassForRegister - Find the register class that contains the

0 commit comments

Comments
 (0)