-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang] Improve EmitClangAttrSpellingListIndex #114899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang Author: Chinmay Deshpande (chinmaydd) Changes
New and old Previous discussion can be found here: #114285. Full diff: https://github.com/llvm/llvm-project/pull/114899.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h
index 5f024b4b5fd782..834b3ca62adced 100644
--- a/clang/include/clang/Basic/AttributeCommonInfo.h
+++ b/clang/include/clang/Basic/AttributeCommonInfo.h
@@ -67,6 +67,16 @@ class AttributeCommonInfo {
IgnoredAttribute,
UnknownAttribute,
};
+ enum Scope {
+ SC_NONE,
+ SC_CLANG,
+ SC_GNU,
+ SC_MSVC,
+ SC_OMP,
+ SC_HLSL,
+ SC_GSL,
+ SC_RISCV
+ };
private:
const IdentifierInfo *AttrName = nullptr;
diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp
index 867d241a2cf847..6f5a70674cbd4b 100644
--- a/clang/lib/Basic/Attributes.cpp
+++ b/clang/lib/Basic/Attributes.cpp
@@ -16,6 +16,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/ParsedAttrInfo.h"
#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringMap.h"
using namespace clang;
@@ -153,12 +154,35 @@ std::string AttributeCommonInfo::getNormalizedFullName() const {
normalizeName(getAttrName(), getScopeName(), getSyntax()));
}
+static const llvm::StringMap<AttributeCommonInfo::Scope> ScopeMap = {
+ {"", AttributeCommonInfo::SC_NONE},
+ {"clang", AttributeCommonInfo::SC_CLANG},
+ {"gnu", AttributeCommonInfo::SC_GNU},
+ {"msvc", AttributeCommonInfo::SC_MSVC},
+ {"omp", AttributeCommonInfo::SC_OMP},
+ {"hlsl", AttributeCommonInfo::SC_HLSL},
+ {"gsl", AttributeCommonInfo::SC_GSL},
+ {"riscv", AttributeCommonInfo::SC_RISCV}};
+
+AttributeCommonInfo::Scope
+getScopeFromNormalizedScopeName(const StringRef ScopeName) {
+ auto It = ScopeMap.find(ScopeName);
+ if (It == ScopeMap.end()) {
+ llvm_unreachable("Unknown normalized scope name. Shouldn't get here");
+ }
+
+ return It->second;
+}
+
unsigned AttributeCommonInfo::calculateAttributeSpellingListIndex() const {
// Both variables will be used in tablegen generated
// attribute spell list index matching code.
auto Syntax = static_cast<AttributeCommonInfo::Syntax>(getSyntax());
- StringRef Scope = normalizeAttrScopeName(getScopeName(), Syntax);
- StringRef Name = normalizeAttrName(getAttrName(), Scope, Syntax);
+ StringRef ScopeName = normalizeAttrScopeName(getScopeName(), Syntax);
+ StringRef Name = normalizeAttrName(getAttrName(), ScopeName, Syntax);
+
+ AttributeCommonInfo::Scope ComputedScope =
+ getScopeFromNormalizedScopeName(ScopeName);
#include "clang/Sema/AttrSpellingListIndex.inc"
}
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 3031d81b3df731..e38cd33b81fcbc 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
@@ -3841,11 +3842,95 @@ void EmitClangAttrSpellingListIndex(const RecordKeeper &Records,
const Record &R = *I.second;
std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(R);
OS << " case AT_" << I.first << ": {\n";
- for (unsigned I = 0; I < Spellings.size(); ++ I) {
- OS << " if (Name == \"" << Spellings[I].name() << "\" && "
- << "getSyntax() == AttributeCommonInfo::AS_" << Spellings[I].variety()
- << " && Scope == \"" << Spellings[I].nameSpace() << "\")\n"
- << " return " << I << ";\n";
+
+ // If there are none or one spelling to check, resort to the default
+ // behavior of returning index as 0.
+ if (Spellings.size() <= 1) {
+ OS << " return 0;\n";
+ OS << " break;\n";
+ OS << " }\n";
+ continue;
+ }
+
+ bool HasSingleUniqueSpellingName = true;
+ StringMap<std::vector<const FlattenedSpelling *>> SpellingMap;
+
+ StringRef FirstName = Spellings.front().name();
+ for (const auto &S : Spellings) {
+ StringRef Name = S.name();
+ if (Name != FirstName)
+ HasSingleUniqueSpellingName = false;
+ SpellingMap[Name].push_back(&S);
+ }
+
+ // If parsed attribute has only one possible spelling name, only compare
+ // syntax and scope.
+ if (HasSingleUniqueSpellingName) {
+ for (const auto &[Idx, S] : enumerate(SpellingMap[FirstName])) {
+ OS << " if (getSyntax() == AttributeCommonInfo::AS_" << S->variety();
+
+ std::string ScopeStr = "AttributeCommonInfo::SC_";
+ if (S->nameSpace() == "")
+ ScopeStr += "NONE";
+ else
+ ScopeStr += S->nameSpace().upper();
+
+ OS << " && ComputedScope == " << ScopeStr << ")\n"
+ << " return " << Idx << ";\n";
+ }
+ } else {
+ size_t Idx = 0;
+ for (const auto &MapEntry : SpellingMap) {
+ StringRef Name = MapEntry.first();
+ const std::vector<const FlattenedSpelling *> &Cases = SpellingMap[Name];
+
+ if (Cases.size() > 1) {
+ // For names with multiple possible cases, emit an enclosing if such
+ // that the name is compared against only once. Eg:
+ //
+ // if (Name == "always_inline") {
+ // if (getSyntax() == AttributeCommonInfo::AS_GNU &&
+ // ComputedScope == AttributeCommonInfo::SC_None)
+ // return 0;
+ // ...
+ // }
+ OS << " if (Name == \"" << Name << "\") {\n";
+ for (const auto &S : SpellingMap[Name]) {
+ OS << " if (getSyntax() == AttributeCommonInfo::AS_"
+ << S->variety();
+ std::string ScopeStr = "AttributeCommonInfo::SC_";
+ if (S->nameSpace() == "")
+ ScopeStr += "NONE";
+ else
+ ScopeStr += S->nameSpace().upper();
+
+ OS << " && ComputedScope == " << ScopeStr << ")\n"
+ << " return " << Idx << ";\n";
+ Idx++;
+ }
+ OS << " }\n";
+ } else {
+ // If there is only possible case for the spelling name, no need of
+ // enclosing if. Eg.
+ //
+ // if (Name == "__forceinline" &&
+ // getSyntax() == AttributeCommonInfo::AS_Keyword
+ // && ComputedScope == AttributeCommonInfo::SC_NONE)
+ // return 5;
+ const FlattenedSpelling *S = Cases.front();
+ OS << " if (Name == \"" << Name << "\"";
+ OS << " && getSyntax() == AttributeCommonInfo::AS_" << S->variety();
+ std::string ScopeStr = "AttributeCommonInfo::SC_";
+ if (S->nameSpace() == "")
+ ScopeStr += "NONE";
+ else
+ ScopeStr += S->nameSpace().upper();
+
+ OS << " && ComputedScope == " << ScopeStr << ")\n"
+ << " return " << Idx << ";\n";
+ Idx++;
+ }
+ }
}
OS << " break;\n";
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Show sample output?
NEW:
OLD:
NEW:
OLD:
Entire file at : https://gist.github.com/chinmaydd/1733a9a84932a45678c47f31be4d64d7 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to have missed a bunch, see alloc_size
and allocating
and alloc_align
, all of which are the same spellings, but you're checking the name anyway (in the .inc file you linked).
There are actually quite a lot of cases you're not covering there, and the loop you did to generate this is a bit out of hand. I've suggested some simplifications.
bool HasSingleUniqueSpellingName = true; | ||
StringMap<std::vector<const FlattenedSpelling *>> SpellingMap; | ||
|
||
StringRef FirstName = Spellings.front().name(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does a ton of work in a way that is pretty difficult. Really what you're trying to find is that all of the names are the same, plus information about the names. I think something like:
std::vector<StringRef> Names;
llvm::transform(Spellings, std::back_inserter(Names), [](const FlattenedSpelling &FS) { return FS.name(); });
llvm::sort(Names);
llvm::erase(llvm::unique(Names), Names.end());
Would give you a lot of information that would be necessary/useful later.
First, you can check:
Names.size() ==1
<= Means that all of the names are the same name, so you can skip the name check entirely.
Names.end() == std::adjacent_find(Names.begin(), Names.end(), [](StringRef LHS, StringRef RHS){ return LHS.size() == RHS.size(); });
If THAT is true, all you have to do is check 'size', since all of them have a unique length.
THOUGH, you probably want to do this on a 'size' of name basis, so if you do that in the loop of the current FlattenedSpelling
(and add to the condition in the adjacent_find
that the size is the current FS.name().size()
), if you don't find one, you can do just the size check.
Alternatively, you can do just a copy_if
from that based on the current FlattenedSpelling
name's size, which would give you the bits to tell which actual characters to check. So something like:
for (const FlattenedSpelling &FS : Spellings) {
std::vector<StringRef> SameLenNames;
llvm::copy_if(Names, std::back_inserter(SameLenNames), [&](StringRef N) { return N.size() == FS.name().size(); });
// insert print size check here
-- Since Names.size() > 1 above, we actually have to check the sizes. THOUGH, IMO, you can be a little smarter about combining it into here too, and only doing the copy_if that is necessary (so we only do the loop on Spellings once).
if (SameLenNames.size() > 1) {
// if size > 1 we have to check individual characters, else we could just do size + scope/etc
// print with just the size check.
for(StringRef SLN : SameLenNames) {
if (SLN == FS.name()) continue; // don't check same name!
auto [CurItr, OtherItr] = std::mismatch(FS.name().begin(), FS.name().end(), SLN.begin());
// NOW you know which character to check to make sure we're unique. so something like:
OS << "Name[" << std::distance(FS.name().begin(), CurItr) << "] == '" << *CurItr << "'';
}
}
}
THOUGH, like i said, you can probably combine the Names.size() ==1
condition inside of that loop as well. And probably the adjacent_find
test can be skipped with the loop I just wrote above.
EmitClangAttrSpellingListIndex() performs a lot of unnecessary string comparisons which is wasteful in time and space. This commit attempts to refactor this method to be more performant.
1be266c
to
95b7569
Compare
Hi @erichkeane, thanks for your detailed review and comments. I had some thoughts about your optimized approach which I would like to share. I agree that falling back to full string comparisons is "safe" but non-optimal. However, performing "first different character" comparison might get complicated if you have more than 2 different spelling names with the same size. An example case is I'm happy to add the FIXME note, but would like to hear what you think. To address a previous comment -
I'm confident that my earlier approach did not perform string comparisons for "alloc_size", "allocating" and "alloc_align". The 2 tests that were failing were because I assumed that spellings with the same name would have consecutive indices. This failed for Gist contains updated output. |
So the conditional actually doesn't have to be all that difficult. You need N-1 comparisons to differentiate (though you m ight end up with duplicates). For example, in your 3 strings example if differentiating the first one, my algorithm would generate Str.size() == 5 && Str[3] == 'b' && Str[3] == 'b' (since that is the difference character for each). It is simple enough join with an 'and' and a loop, as long as we don't care about duplicates. As far as debugability/readability of emitted code: it isn't really a concern here. Once we get the tablegen 'right' (which traditionally doesn't take much effort/testing), no one ever gets into here again. The spelling-identification one uses a trie, and it is a giant mess of undebugability :)
Got it! It wasn't clear based on the old implementation what was happening, just from the GIST that something wasn't working.
That GIST looks about right. I see as you mentioned, ~6 different comparisons, which isn't too bad. Definitely an improvement. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just 1 nit/simplification here, else this LGTM!
Thanks @erichkeane and @arsenm ! |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/78/builds/9236 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/10003 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/12/builds/9158 Here is the relevant piece of the build log for the reference
|
Buildbot errors fixed by dd1c99b |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/27/builds/1695 Here is the relevant piece of the build log for the reference
|
StringRef Value) { return Element.first < Value; }); | ||
assert(It != std::end(ScopeList) && It->first == ScopeName); | ||
|
||
return It->second; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like exactly what StringSwitch
is for...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ooof, yeah, you're right. This should jsut be a StringSwitch
...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting, I had no idea about StringSwitch
. I'll raise a new PR to fix that.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/12164 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/11711 Here is the relevant piece of the build log for the reference
|
`EmitClangAttrSpellingListIndex()` performs a lot of unnecessary string comparisons which is wasteful in time and stack space. This commit attempts to refactor this method to be more performant.
EmitClangAttrSpellingListIndex()
performs a lot of unnecessary string comparisons which is wasteful in time and stack space. This commit attempts to refactor this method to be more performant.New and old
AttrSpellingListIndex.inc
files can be found here - https://gist.github.com/chinmaydd/1733a9a84932a45678c47f31be4d64d7. (CTRL-F for NEW_INC_FILE and OLD_INC_FILE)Previous discussion can be found here: #114285.