Skip to content

[Remangler] Improve performance by caching hashes. #72874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/Demangling/OldRemangler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ bool Remangler::trySubstitution(Node *node, SubstitutionEntry &entry) {
return true;

// Go ahead and initialize the substitution entry.
entry.setNode(node, /*treatAsIdentifier=*/ false);
entry = entryForNode(node);

int Idx = findSubstitution(entry);
if (Idx < 0)
Expand Down
132 changes: 100 additions & 32 deletions lib/Demangling/Remangler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,36 +59,6 @@ bool SubstitutionEntry::identifierEquals(Node *lhs, Node *rhs) {
return true;
}

void SubstitutionEntry::deepHash(Node *node) {
if (treatAsIdentifier) {
combineHash((size_t) Node::Kind::Identifier);
assert(node->hasText());
switch (node->getKind()) {
case Node::Kind::InfixOperator:
case Node::Kind::PrefixOperator:
case Node::Kind::PostfixOperator:
for (char c : node->getText()) {
combineHash((unsigned char)translateOperatorChar(c));
}
return;
default:
break;
}
} else {
combineHash((size_t) node->getKind());
}
if (node->hasIndex()) {
combineHash(node->getIndex());
} else if (node->hasText()) {
for (char c : node->getText()) {
combineHash((unsigned char) c);
}
}
for (Node *child : *node) {
deepHash(child);
}
}

bool SubstitutionEntry::deepEquals(Node *lhs, Node *rhs) const {
if (lhs->getKind() != rhs->getKind())
return false;
Expand All @@ -114,10 +84,108 @@ bool SubstitutionEntry::deepEquals(Node *lhs, Node *rhs) const {
if (!deepEquals(*li, *ri))
return false;
}

return true;
}

static inline size_t combineHash(size_t currentHash, size_t newValue) {
return 33 * currentHash + newValue;
}

/// Calculate the hash for a node.
size_t RemanglerBase::hashForNode(Node *node,
bool treatAsIdentifier) {
size_t hash = 0;

if (treatAsIdentifier) {
hash = combineHash(hash, (size_t)Node::Kind::Identifier);
assert(node->hasText());
switch (node->getKind()) {
case Node::Kind::InfixOperator:
case Node::Kind::PrefixOperator:
case Node::Kind::PostfixOperator:
for (char c : node->getText()) {
hash = combineHash(hash, (unsigned char)translateOperatorChar(c));
}
return hash;
default:
break;
}
} else {
hash = combineHash(hash, (size_t) node->getKind());
}
if (node->hasIndex()) {
hash = combineHash(hash, node->getIndex());
} else if (node->hasText()) {
for (char c : node->getText()) {
hash = combineHash(hash, (unsigned char) c);
}
}
for (Node *child : *node) {
SubstitutionEntry entry = entryForNode(child, treatAsIdentifier);
hash = combineHash(hash, entry.hash());
}

return hash;
}

/// Rotate a size_t by N bits
static inline size_t rotate(size_t value, size_t shift) {
const size_t bits = sizeof(size_t) * 8;
return (value >> shift) | (value << (bits - shift));
}

/// Compute a hash value from a node *pointer*.
/// Used for look-ups in HashHash. The numbers in here were determined
/// experimentally.
static inline size_t nodeHash(Node *node) {
// Multiply by a magic number
const size_t nodePrime = ((size_t)node) * 2043;

// We rotate by a different amount because the alignment of Node
// changes depending on the machine's pointer size
switch (sizeof(size_t)) {
case 4:
return rotate(nodePrime, 11);
case 8:
return rotate(nodePrime, 12);
case 16:
return rotate(nodePrime, 13);
default:
return rotate(nodePrime, 12);
}
}

/// Construct a SubstitutionEntry for a given node.
/// This will look in the HashHash to see if we already know the hash
/// (which avoids recursive hashing on the Node tree).
SubstitutionEntry RemanglerBase::entryForNode(Node *node,
bool treatAsIdentifier) {
const size_t ident = treatAsIdentifier ? 4 : 0;
const size_t hash = nodeHash(node) + ident;

// Use linear probing with a limit
for (size_t n = 0; n < HashHashMaxProbes; ++n) {
const size_t ndx = (hash + n) & (HashHashCapacity - 1);
SubstitutionEntry entry = HashHash[ndx];

if (entry.isEmpty()) {
size_t entryHash = hashForNode(node, treatAsIdentifier);
entry.setNode(node, treatAsIdentifier, entryHash);
HashHash[ndx] = entry;
return entry;
} else if (entry.matches(node, treatAsIdentifier)) {
return entry;
}
}

// Hash table is full at this hash value
SubstitutionEntry entry;
size_t entryHash = hashForNode(node, treatAsIdentifier);
entry.setNode(node, treatAsIdentifier, entryHash);
return entry;
}

// Find a substitution and return its index.
// Returns -1 if no substitution is found.
int RemanglerBase::findSubstitution(const SubstitutionEntry &entry) {
Expand Down Expand Up @@ -356,7 +424,7 @@ bool Remangler::trySubstitution(Node *node, SubstitutionEntry &entry,
return true;

// Go ahead and initialize the substitution entry.
entry.setNode(node, treatAsIdentifier);
entry = entryForNode(node, treatAsIdentifier);

int Idx = findSubstitution(entry);
if (Idx < 0)
Expand Down
36 changes: 27 additions & 9 deletions lib/Demangling/RemanglerBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ class SubstitutionEntry {
bool treatAsIdentifier = false;

public:
void setNode(Node *node, bool treatAsIdentifier) {
void setNode(Node *node, bool treatAsIdentifier, size_t hash) {
this->treatAsIdentifier = treatAsIdentifier;
TheNode = node;
deepHash(node);
StoredHash = hash;
}

struct Hasher {
Expand All @@ -54,6 +54,14 @@ class SubstitutionEntry {
}
};

bool isEmpty() const { return !TheNode; }

bool matches(Node *node, bool treatAsIdentifier) const {
return node == TheNode && treatAsIdentifier == this->treatAsIdentifier;
}

size_t hash() const { return StoredHash; }

private:
friend bool operator==(const SubstitutionEntry &lhs,
const SubstitutionEntry &rhs) {
Expand All @@ -69,12 +77,6 @@ class SubstitutionEntry {

static bool identifierEquals(Node *lhs, Node *rhs);

void combineHash(size_t newValue) {
StoredHash = 33 * StoredHash + newValue;
}

void deepHash(Node *node);

bool deepEquals(Node *lhs, Node *rhs) const;
};

Expand Down Expand Up @@ -131,6 +133,13 @@ class RemanglerBase {
// Used to allocate temporary nodes and the output string (in Buffer).
NodeFactory &Factory;

// Recursively calculating the node hashes can be expensive if the node tree
// is deep, so we keep a hash table mapping (Node *, treatAsIdentifier) pairs
// to hashes.
static const size_t HashHashCapacity = 512; // Must be a power of 2
static const size_t HashHashMaxProbes = 8;
SubstitutionEntry HashHash[HashHashCapacity] = {};

// An efficient hash-map implementation in the spirit of llvm's SmallPtrSet:
// The first 16 substitutions are stored in an inline-allocated array to avoid
// malloc calls in the common case.
Expand All @@ -148,7 +157,16 @@ class RemanglerBase {
RemanglerBuffer Buffer;

protected:
RemanglerBase(NodeFactory &Factory) : Factory(Factory), Buffer(Factory) { }
RemanglerBase(NodeFactory &Factory)
: Factory(Factory), Buffer(Factory) { }

/// Compute the hash for a node.
size_t hashForNode(Node *node, bool treatAsIdentifier = false);

/// Construct a SubstitutionEntry for a given node.
/// This will look in the HashHash to see if we already know the hash,
/// to avoid having to walk the entire subtree.
SubstitutionEntry entryForNode(Node *node, bool treatAsIdentifier = false);

/// Find a substitution and return its index.
/// Returns -1 if no substitution is found.
Expand Down
2 changes: 1 addition & 1 deletion test/Demangle/Inputs/bigtype-remangle.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Error: (3:340) unable to re-mangle $sBf32__t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_tN
Error: (3:408) unable to re-mangle $sBf32__t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_t_tN