Skip to content

Commit 79cc728

Browse files
SharonXSharonSharon Xuellishg
authored
[lld][macho] Strip .__uniq. and .llvm. hashes in -order_file (#140670)
``` /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and /// "yyyy" are numbers that could change between builds. We need to use the root /// symbol name before this suffix so these symbols can be matched with profiles /// which may have different suffixes. ``` Just like what we are doing in BP, https://github.com/llvm/llvm-project/blob/main/lld/MachO/BPSectionOrderer.cpp#L127 the patch removes the suffixes when parsing the order file and getting the symbol priority to have a better symbol match. --------- Co-authored-by: Sharon Xu <[email protected]> Co-authored-by: Ellis Hoag <[email protected]>
1 parent 95ce58b commit 79cc728

File tree

8 files changed

+157
-20
lines changed

8 files changed

+157
-20
lines changed

lld/Common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ add_lld_library(lldCommon
3434
Strings.cpp
3535
TargetOptionsCommandFlags.cpp
3636
Timer.cpp
37+
Utils.cpp
3738
VCSVersion.inc
3839
Version.cpp
3940

lld/Common/Utils.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===- Utils.cpp ------------------------------------------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//===----------------------------------------------------------------------===//
7+
//
8+
// The file defines utils functions that can be shared across archs.
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#include "lld/Common/Utils.h"
13+
14+
using namespace llvm;
15+
using namespace lld;
16+
17+
StringRef lld::utils::getRootSymbol(StringRef name) {
18+
name.consume_back(".Tgm");
19+
auto [P0, S0] = name.rsplit(".llvm.");
20+
auto [P1, S1] = P0.rsplit(".__uniq.");
21+
return P1;
22+
}

lld/ELF/BPSectionOrderer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
8181
if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
8282
!orderer.secToSym.try_emplace(sec, d).second)
8383
return;
84-
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
84+
rootSymbolToSectionIdxs[CachedHashStringRef(
85+
lld::utils::getRootSymbol(sym.getName()))]
8586
.insert(sections.size());
8687
sections.emplace_back(sec);
8788
};

lld/MachO/BPSectionOrderer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
124124
size_t idx = sections.size();
125125
sections.emplace_back(isec);
126126
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
127-
auto rootName = getRootSymbol(sym->getName());
127+
auto rootName = lld::utils::getRootSymbol(sym->getName());
128128
rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
129129
if (auto linkageName =
130130
BPOrdererMachO::getResolvedLinkageName(rootName))

lld/MachO/SectionPriorities.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "lld/Common/Args.h"
2222
#include "lld/Common/CommonLinkerContext.h"
2323
#include "lld/Common/ErrorHandler.h"
24+
#include "lld/Common/Utils.h"
2425
#include "llvm/ADT/DenseMap.h"
2526
#include "llvm/ADT/MapVector.h"
2627
#include "llvm/Support/Path.h"
@@ -250,7 +251,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
250251
if (sym->isAbsolute())
251252
return std::nullopt;
252253

253-
auto it = priorities.find(sym->getName());
254+
auto it = priorities.find(utils::getRootSymbol(sym->getName()));
254255
if (it == priorities.end())
255256
return std::nullopt;
256257
const SymbolPriorityEntry &entry = it->second;
@@ -330,7 +331,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
330331
break;
331332
}
332333
}
333-
symbol = line.trim();
334+
symbol = utils::getRootSymbol(line.trim());
334335

335336
if (!symbol.empty()) {
336337
SymbolPriorityEntry &entry = priorities[symbol];

lld/include/lld/Common/BPSectionOrdererBase.inc

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
//===----------------------------------------------------------------------===//
2121

2222
#include "lld/Common/ErrorHandler.h"
23+
#include "lld/Common/Utils.h"
2324
#include "llvm/ADT/CachedHashString.h"
2425
#include "llvm/ADT/DenseMap.h"
2526
#include "llvm/ADT/MapVector.h"
@@ -147,19 +148,6 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
147148
return sectionUns;
148149
}
149150

150-
/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
151-
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
152-
/// the global merge functions suffix
153-
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
154-
/// name before this suffix so these symbols can be matched with profiles which
155-
/// may have different suffixes.
156-
inline StringRef getRootSymbol(StringRef name) {
157-
name.consume_back(".Tgm");
158-
auto [P0, S0] = name.rsplit(".llvm.");
159-
auto [P1, S1] = P0.rsplit(".__uniq.");
160-
return P1;
161-
}
162-
163151
template <class D>
164152
auto BPOrderer<D>::computeOrder(
165153
StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
@@ -197,7 +185,7 @@ auto BPOrderer<D>::computeOrder(
197185
for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
198186
auto [_, parsedFuncName] = getParsedIRPGOName(
199187
reader->getSymtab().getFuncOrVarName(trace[timestamp]));
200-
parsedFuncName = getRootSymbol(parsedFuncName);
188+
parsedFuncName = lld::utils::getRootSymbol(parsedFuncName);
201189

202190
auto sectionIdxsIt =
203191
rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName));
@@ -375,7 +363,7 @@ auto BPOrderer<D>::computeOrder(
375363
// 4?
376364
uint64_t lastPage = endAddress / pageSize;
377365
StringRef rootSymbol = D::getSymName(*sym);
378-
rootSymbol = getRootSymbol(rootSymbol);
366+
rootSymbol = lld::utils::getRootSymbol(rootSymbol);
379367
symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
380368
if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol))
381369
symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
@@ -393,7 +381,7 @@ auto BPOrderer<D>::computeOrder(
393381
auto traceId = trace.FunctionNameRefs[step];
394382
auto [Filename, ParsedFuncName] =
395383
getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
396-
ParsedFuncName = getRootSymbol(ParsedFuncName);
384+
ParsedFuncName = lld::utils::getRootSymbol(ParsedFuncName);
397385
auto it = symbolToPageNumbers.find(ParsedFuncName);
398386
if (it != symbolToPageNumbers.end()) {
399387
auto &[firstPage, lastPage] = it->getValue();

lld/include/lld/Common/Utils.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//===- Utils.h ------------------------------------------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//===----------------------------------------------------------------------===//
7+
//
8+
// The file declares utils functions that can be shared across archs.
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#ifndef LLD_UTILS_H
13+
#define LLD_UTILS_H
14+
15+
#include "llvm/ADT/StringRef.h"
16+
17+
namespace lld {
18+
namespace utils {
19+
20+
/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
21+
/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
22+
/// the global merge functions suffix
23+
/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
24+
/// name before this suffix so these symbols can be matched with profiles which
25+
/// may have different suffixes.
26+
llvm::StringRef getRootSymbol(llvm::StringRef Name);
27+
} // namespace utils
28+
} // namespace lld
29+
30+
#endif
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# RUN: rm -rf %t && split-file %s %t
2+
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
3+
4+
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1
5+
# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s
6+
7+
#--- a.s
8+
.text
9+
.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
10+
11+
_main:
12+
ret
13+
A:
14+
ret
15+
F:
16+
add w0, w0, #3
17+
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
18+
ret
19+
C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
20+
add w0, w0, #2
21+
bl A
22+
ret
23+
D:
24+
add w0, w0, #2
25+
bl B
26+
ret
27+
B:
28+
add w0, w0, #1
29+
bl A
30+
ret
31+
E:
32+
add w0, w0, #2
33+
bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
34+
ret
35+
36+
.section __DATA,__objc_const
37+
# test multiple symbols at the same address, which will be alphabetic sorted based symbol names
38+
_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2:
39+
.quad 789
40+
41+
_OBJC_$_CATEGORY_SOME_$_FOLDED:
42+
_OBJC_$_CATEGORY_Foo_$_Cat1:
43+
_ALPHABETIC_SORT_FIRST:
44+
.quad 123
45+
46+
_OBJC_$_CATEGORY_Foo_$_Cat2:
47+
.quad 222
48+
49+
_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1:
50+
.quad 456
51+
52+
.section __DATA,__objc_data
53+
_OBJC_CLASS_$_Foo:
54+
.quad 123
55+
56+
_OBJC_CLASS_$_Bar.llvm.1234:
57+
.quad 456
58+
59+
_OBJC_CLASS_$_Baz:
60+
.quad 789
61+
62+
_OBJC_CLASS_$_Baz2:
63+
.quad 999
64+
65+
.section __DATA,__objc_classrefs
66+
.quad _OBJC_CLASS_$_Foo
67+
.quad _OBJC_CLASS_$_Bar.llvm.1234
68+
.quad _OBJC_CLASS_$_Baz
69+
70+
.subsections_via_symbols
71+
72+
73+
#--- ord-1
74+
# change order, parital covered
75+
A
76+
B
77+
C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
78+
_OBJC_CLASS_$_Baz
79+
_OBJC_CLASS_$_Bar.__uniq.12345
80+
_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789
81+
_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
82+
_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567
83+
84+
# .text
85+
# CHECK: A
86+
# CHECK: B
87+
# CHECK: C
88+
# .section __DATA,__objc_const
89+
# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
90+
# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
91+
# .section __DATA,__objc_data
92+
# CHECK: _OBJC_CLASS_$_Baz
93+
# CHECK: _OBJC_CLASS_$_Bar
94+
# CHECK: _OBJC_CLASS_$_Foo

0 commit comments

Comments
 (0)