Skip to content

Commit b42f4a7

Browse files
authored
Merge pull request #72874 from al45tair/eng/PR-125739630-6.0
[Remangler] Improve performance by caching hashes.
2 parents 95e568b + 6e4d97a commit b42f4a7

File tree

4 files changed

+129
-43
lines changed

4 files changed

+129
-43
lines changed

lib/Demangling/OldRemangler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ bool Remangler::trySubstitution(Node *node, SubstitutionEntry &entry) {
242242
return true;
243243

244244
// Go ahead and initialize the substitution entry.
245-
entry.setNode(node, /*treatAsIdentifier=*/ false);
245+
entry = entryForNode(node);
246246

247247
int Idx = findSubstitution(entry);
248248
if (Idx < 0)

lib/Demangling/Remangler.cpp

Lines changed: 100 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -59,36 +59,6 @@ bool SubstitutionEntry::identifierEquals(Node *lhs, Node *rhs) {
5959
return true;
6060
}
6161

62-
void SubstitutionEntry::deepHash(Node *node) {
63-
if (treatAsIdentifier) {
64-
combineHash((size_t) Node::Kind::Identifier);
65-
assert(node->hasText());
66-
switch (node->getKind()) {
67-
case Node::Kind::InfixOperator:
68-
case Node::Kind::PrefixOperator:
69-
case Node::Kind::PostfixOperator:
70-
for (char c : node->getText()) {
71-
combineHash((unsigned char)translateOperatorChar(c));
72-
}
73-
return;
74-
default:
75-
break;
76-
}
77-
} else {
78-
combineHash((size_t) node->getKind());
79-
}
80-
if (node->hasIndex()) {
81-
combineHash(node->getIndex());
82-
} else if (node->hasText()) {
83-
for (char c : node->getText()) {
84-
combineHash((unsigned char) c);
85-
}
86-
}
87-
for (Node *child : *node) {
88-
deepHash(child);
89-
}
90-
}
91-
9262
bool SubstitutionEntry::deepEquals(Node *lhs, Node *rhs) const {
9363
if (lhs->getKind() != rhs->getKind())
9464
return false;
@@ -114,10 +84,108 @@ bool SubstitutionEntry::deepEquals(Node *lhs, Node *rhs) const {
11484
if (!deepEquals(*li, *ri))
11585
return false;
11686
}
117-
87+
11888
return true;
11989
}
12090

91+
static inline size_t combineHash(size_t currentHash, size_t newValue) {
92+
return 33 * currentHash + newValue;
93+
}
94+
95+
/// Calculate the hash for a node.
96+
size_t RemanglerBase::hashForNode(Node *node,
97+
bool treatAsIdentifier) {
98+
size_t hash = 0;
99+
100+
if (treatAsIdentifier) {
101+
hash = combineHash(hash, (size_t)Node::Kind::Identifier);
102+
assert(node->hasText());
103+
switch (node->getKind()) {
104+
case Node::Kind::InfixOperator:
105+
case Node::Kind::PrefixOperator:
106+
case Node::Kind::PostfixOperator:
107+
for (char c : node->getText()) {
108+
hash = combineHash(hash, (unsigned char)translateOperatorChar(c));
109+
}
110+
return hash;
111+
default:
112+
break;
113+
}
114+
} else {
115+
hash = combineHash(hash, (size_t) node->getKind());
116+
}
117+
if (node->hasIndex()) {
118+
hash = combineHash(hash, node->getIndex());
119+
} else if (node->hasText()) {
120+
for (char c : node->getText()) {
121+
hash = combineHash(hash, (unsigned char) c);
122+
}
123+
}
124+
for (Node *child : *node) {
125+
SubstitutionEntry entry = entryForNode(child, treatAsIdentifier);
126+
hash = combineHash(hash, entry.hash());
127+
}
128+
129+
return hash;
130+
}
131+
132+
/// Rotate a size_t by N bits
133+
static inline size_t rotate(size_t value, size_t shift) {
134+
const size_t bits = sizeof(size_t) * 8;
135+
return (value >> shift) | (value << (bits - shift));
136+
}
137+
138+
/// Compute a hash value from a node *pointer*.
139+
/// Used for look-ups in HashHash. The numbers in here were determined
140+
/// experimentally.
141+
static inline size_t nodeHash(Node *node) {
142+
// Multiply by a magic number
143+
const size_t nodePrime = ((size_t)node) * 2043;
144+
145+
// We rotate by a different amount because the alignment of Node
146+
// changes depending on the machine's pointer size
147+
switch (sizeof(size_t)) {
148+
case 4:
149+
return rotate(nodePrime, 11);
150+
case 8:
151+
return rotate(nodePrime, 12);
152+
case 16:
153+
return rotate(nodePrime, 13);
154+
default:
155+
return rotate(nodePrime, 12);
156+
}
157+
}
158+
159+
/// Construct a SubstitutionEntry for a given node.
160+
/// This will look in the HashHash to see if we already know the hash
161+
/// (which avoids recursive hashing on the Node tree).
162+
SubstitutionEntry RemanglerBase::entryForNode(Node *node,
163+
bool treatAsIdentifier) {
164+
const size_t ident = treatAsIdentifier ? 4 : 0;
165+
const size_t hash = nodeHash(node) + ident;
166+
167+
// Use linear probing with a limit
168+
for (size_t n = 0; n < HashHashMaxProbes; ++n) {
169+
const size_t ndx = (hash + n) & (HashHashCapacity - 1);
170+
SubstitutionEntry entry = HashHash[ndx];
171+
172+
if (entry.isEmpty()) {
173+
size_t entryHash = hashForNode(node, treatAsIdentifier);
174+
entry.setNode(node, treatAsIdentifier, entryHash);
175+
HashHash[ndx] = entry;
176+
return entry;
177+
} else if (entry.matches(node, treatAsIdentifier)) {
178+
return entry;
179+
}
180+
}
181+
182+
// Hash table is full at this hash value
183+
SubstitutionEntry entry;
184+
size_t entryHash = hashForNode(node, treatAsIdentifier);
185+
entry.setNode(node, treatAsIdentifier, entryHash);
186+
return entry;
187+
}
188+
121189
// Find a substitution and return its index.
122190
// Returns -1 if no substitution is found.
123191
int RemanglerBase::findSubstitution(const SubstitutionEntry &entry) {
@@ -356,7 +424,7 @@ bool Remangler::trySubstitution(Node *node, SubstitutionEntry &entry,
356424
return true;
357425

358426
// Go ahead and initialize the substitution entry.
359-
entry.setNode(node, treatAsIdentifier);
427+
entry = entryForNode(node, treatAsIdentifier);
360428

361429
int Idx = findSubstitution(entry);
362430
if (Idx < 0)

lib/Demangling/RemanglerBase.h

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ class SubstitutionEntry {
4242
bool treatAsIdentifier = false;
4343

4444
public:
45-
void setNode(Node *node, bool treatAsIdentifier) {
45+
void setNode(Node *node, bool treatAsIdentifier, size_t hash) {
4646
this->treatAsIdentifier = treatAsIdentifier;
4747
TheNode = node;
48-
deepHash(node);
48+
StoredHash = hash;
4949
}
5050

5151
struct Hasher {
@@ -54,6 +54,14 @@ class SubstitutionEntry {
5454
}
5555
};
5656

57+
bool isEmpty() const { return !TheNode; }
58+
59+
bool matches(Node *node, bool treatAsIdentifier) const {
60+
return node == TheNode && treatAsIdentifier == this->treatAsIdentifier;
61+
}
62+
63+
size_t hash() const { return StoredHash; }
64+
5765
private:
5866
friend bool operator==(const SubstitutionEntry &lhs,
5967
const SubstitutionEntry &rhs) {
@@ -69,12 +77,6 @@ class SubstitutionEntry {
6977

7078
static bool identifierEquals(Node *lhs, Node *rhs);
7179

72-
void combineHash(size_t newValue) {
73-
StoredHash = 33 * StoredHash + newValue;
74-
}
75-
76-
void deepHash(Node *node);
77-
7880
bool deepEquals(Node *lhs, Node *rhs) const;
7981
};
8082

@@ -131,6 +133,13 @@ class RemanglerBase {
131133
// Used to allocate temporary nodes and the output string (in Buffer).
132134
NodeFactory &Factory;
133135

136+
// Recursively calculating the node hashes can be expensive if the node tree
137+
// is deep, so we keep a hash table mapping (Node *, treatAsIdentifier) pairs
138+
// to hashes.
139+
static const size_t HashHashCapacity = 512; // Must be a power of 2
140+
static const size_t HashHashMaxProbes = 8;
141+
SubstitutionEntry HashHash[HashHashCapacity] = {};
142+
134143
// An efficient hash-map implementation in the spirit of llvm's SmallPtrSet:
135144
// The first 16 substitutions are stored in an inline-allocated array to avoid
136145
// malloc calls in the common case.
@@ -148,7 +157,16 @@ class RemanglerBase {
148157
RemanglerBuffer Buffer;
149158

150159
protected:
151-
RemanglerBase(NodeFactory &Factory) : Factory(Factory), Buffer(Factory) { }
160+
RemanglerBase(NodeFactory &Factory)
161+
: Factory(Factory), Buffer(Factory) { }
162+
163+
/// Compute the hash for a node.
164+
size_t hashForNode(Node *node, bool treatAsIdentifier = false);
165+
166+
/// Construct a SubstitutionEntry for a given node.
167+
/// This will look in the HashHash to see if we already know the hash,
168+
/// to avoid having to walk the entire subtree.
169+
SubstitutionEntry entryForNode(Node *node, bool treatAsIdentifier = false);
152170

153171
/// Find a substitution and return its index.
154172
/// Returns -1 if no substitution is found.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Error: (3:340) unable to re-mangle $sBf32__t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_tN
1+
Error: (3:408) unable to re-mangle $sBf32__t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_tN

0 commit comments

Comments
 (0)