Skip to content

[lld-macho] Always store symbol name length eagerly (NFC) #106906

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 3, 2024

Conversation

BertalanD
Copy link
Member

The only instance where we weren't already passing a StringRef with a known length to Symbol's constructor is where the argument is a string literal. Even in that case, lazy strlen calls don't make sense, as the compiler can constant-evaluate the StringRef(const char*) constructor.

For symbols that go into the symbol table we need the length when calculating the hash anyway. We could get away with not calling getName() for local symbols, but the total contribution of strlen to the run time is already below 1%, so that would just complicate the code for a negligible benefit.

The only instance where we weren't already passing a `StringRef` with a
known length to `Symbol`'s constructor is where the argument is a string
literal. Even in that case, lazy `strlen` calls don't make sense, as the
compiler can constant-evaluate the `StringRef(const char*)` constructor.

For symbols that go into the symbol table we need the length when
calculating the hash anyway. We could get away with not calling
`getName()` for local symbols, but the total contribution of `strlen` to
the run time is already below 1%, so that would just complicate the code
for a negligible benefit.
@llvmbot
Copy link
Member

llvmbot commented Sep 1, 2024

@llvm/pr-subscribers-lld

@llvm/pr-subscribers-lld-macho

Author: Daniel Bertalan (BertalanD)

Changes

The only instance where we weren't already passing a StringRef with a known length to Symbol's constructor is where the argument is a string literal. Even in that case, lazy strlen calls don't make sense, as the compiler can constant-evaluate the StringRef(const char*) constructor.

For symbols that go into the symbol table we need the length when calculating the hash anyway. We could get away with not calling getName() for local symbols, but the total contribution of strlen to the run time is already below 1%, so that would just complicate the code for a negligible benefit.


Full diff: https://github.com/llvm/llvm-project/pull/106906.diff

2 Files Affected:

  • (modified) lld/MachO/Symbols.cpp (+1-1)
  • (modified) lld/MachO/Symbols.h (+9-20)
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 7bac78a59e4fec..f52da4f48aafba 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -52,7 +52,7 @@ uint64_t Symbol::getLazyPtrVA() const {
 uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
 uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
 
-Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
+Defined::Defined(StringRef name, InputFile *file, InputSection *isec,
                  uint64_t value, uint64_t size, bool isWeakDef, bool isExternal,
                  bool isPrivateExtern, bool includeInSymtab,
                  bool isReferencedDynamically, bool noDeadStrip,
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index be5d135b8cd54f..bd60226f38ad30 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -21,14 +21,6 @@ namespace macho {
 
 class MachHeaderSection;
 
-struct StringRefZ {
-  StringRefZ(const char *s) : data(s), size(-1) {}
-  StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
-
-  const char *data;
-  const uint32_t size;
-};
-
 class Symbol {
 public:
   enum Kind {
@@ -45,11 +37,7 @@ class Symbol {
 
   Kind kind() const { return symbolKind; }
 
-  StringRef getName() const {
-    if (nameSize == (uint32_t)-1)
-      nameSize = strlen(nameData);
-    return {nameData, nameSize};
-  }
+  StringRef getName() const { return {nameData, nameSize}; }
 
   bool isLive() const { return used; }
   bool isLazy() const {
@@ -96,15 +84,15 @@ class Symbol {
   InputFile *getFile() const { return file; }
 
 protected:
-  Symbol(Kind k, StringRefZ name, InputFile *file)
-      : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
+  Symbol(Kind k, StringRef name, InputFile *file)
+      : symbolKind(k), nameData(name.data()), file(file), nameSize(name.size()),
         isUsedInRegularObj(!file || isa<ObjFile>(file)),
         used(!config->deadStrip) {}
 
   Kind symbolKind;
   const char *nameData;
   InputFile *file;
-  mutable uint32_t nameSize;
+  uint32_t nameSize;
 
 public:
   // True if this symbol was referenced by a regular (non-bitcode) object.
@@ -116,7 +104,7 @@ class Symbol {
 
 class Defined : public Symbol {
 public:
-  Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
+  Defined(StringRef name, InputFile *file, InputSection *isec, uint64_t value,
           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
           bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
           bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
@@ -206,7 +194,7 @@ enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
 
 class Undefined : public Symbol {
 public:
-  Undefined(StringRefZ name, InputFile *file, RefState refState,
+  Undefined(StringRef name, InputFile *file, RefState refState,
             bool wasBitcodeSymbol)
       : Symbol(UndefinedKind, name, file), refState(refState),
         wasBitcodeSymbol(wasBitcodeSymbol) {
@@ -238,7 +226,7 @@ class Undefined : public Symbol {
 // to regular defined symbols in a __common section.
 class CommonSymbol : public Symbol {
 public:
-  CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
+  CommonSymbol(StringRef name, InputFile *file, uint64_t size, uint32_t align,
                bool isPrivateExtern)
       : Symbol(CommonKind, name, file), size(size),
         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
@@ -255,7 +243,7 @@ class CommonSymbol : public Symbol {
 
 class DylibSymbol : public Symbol {
 public:
-  DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
+  DylibSymbol(DylibFile *file, StringRef name, bool isWeakDef,
               RefState refState, bool isTlv)
       : Symbol(DylibKind, name, file), shouldReexport(false),
         refState(refState), weakDef(isWeakDef), tlv(isTlv) {
@@ -301,6 +289,7 @@ class DylibSymbol : public Symbol {
   }
 
   bool shouldReexport : 1;
+
 private:
   RefState refState : 2;
   const bool weakDef : 1;

@BertalanD BertalanD requested review from int3 and smeenai and removed request for int3 September 3, 2024 08:41
@BertalanD BertalanD merged commit b24a304 into llvm:main Sep 3, 2024
11 checks passed
@BertalanD BertalanD deleted the eager-stringref branch September 3, 2024 23:05
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants