Skip to content

Commit 57e5b82

Browse files
authored
[lld-macho] Fix STABS entries for --icf=safe_thunks and --keep-icf-stabs (#133179)
When using the linker flags `--icf=safe_thunks` and `--keep-icf-stabs` together, an issue arises with the STABS debugging entries in the linked output. The problem affects STABS entries for functions that are folded via ICF using thunks. For instance, if `func1` is merged into `func2` through a thunk, the STABS entry for `func1` incorrectly points to the object file of `func2`. This is incorrect behavior—each function’s STABS entry should consistently point to its own original object file (e.g., the STABS entry for `func1` should reference `func1`’s object file). This issue causes `dsymutil` to not be able to retrieve the debug information for the problematic function. This patch corrects this behavior - making it so that STABS entries always point to the correct object file.
1 parent 7c3ecff commit 57e5b82

File tree

2 files changed

+166
-25
lines changed

2 files changed

+166
-25
lines changed

lld/MachO/SyntheticSections.cpp

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,18 +1205,6 @@ void SymtabSection::emitEndFunStab(Defined *defined) {
12051205
stabs.emplace_back(std::move(stab));
12061206
}
12071207

1208-
// Given a pointer to a function symbol, return the symbol that points to the
1209-
// actual function body that will go in the final binary. Generally this is the
1210-
// symbol itself, but if the symbol was folded using a thunk, we retrieve the
1211-
// target function body from the thunk.
1212-
Defined *SymtabSection::getFuncBodySym(Defined *originalSym) {
1213-
if (originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::None ||
1214-
originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
1215-
return originalSym;
1216-
1217-
return macho::getBodyForThunkFoldedSym(originalSym);
1218-
}
1219-
12201208
void SymtabSection::emitStabs() {
12211209
if (config->omitDebugInfo)
12221210
return;
@@ -1252,10 +1240,11 @@ void SymtabSection::emitStabs() {
12521240
if (!file || !file->compileUnit)
12531241
continue;
12541242

1255-
// We use 'originalIsec' to get the file id of the symbol since 'isec()'
1256-
// might point to the merged ICF symbol's file
1257-
symbolsNeedingStabs.emplace_back(
1258-
defined, getFuncBodySym(defined)->originalIsec->getFile()->id);
1243+
// We use the symbol's original InputSection to get the file id,
1244+
// even for ICF folded symbols, to ensure STABS entries point to the
1245+
// correct object file where the symbol was originally defined
1246+
symbolsNeedingStabs.emplace_back(defined,
1247+
defined->originalIsec->getFile()->id);
12591248
}
12601249
}
12611250

@@ -1270,10 +1259,12 @@ void SymtabSection::emitStabs() {
12701259
InputFile *lastFile = nullptr;
12711260
for (SortingPair &pair : symbolsNeedingStabs) {
12721261
Defined *defined = pair.first;
1273-
// We use 'originalIsec' of the symbol since we care about the actual origin
1274-
// of the symbol, not the canonical location returned by `isec()`.
1275-
Defined *funcBodySym = getFuncBodySym(defined);
1276-
InputSection *isec = funcBodySym->originalIsec;
1262+
// When emitting STABS entries for a symbol, always use the original
1263+
// InputSection of the defined symbol, not the section of the function body
1264+
// (which might be a different function entirely if ICF folded this
1265+
// function). This ensures STABS entries point back to the original object
1266+
// file.
1267+
InputSection *isec = defined->originalIsec;
12771268
ObjFile *file = cast<ObjFile>(isec->getFile());
12781269

12791270
if (lastFile == nullptr || lastFile != file) {
@@ -1288,12 +1279,30 @@ void SymtabSection::emitStabs() {
12881279
StabsEntry symStab;
12891280
symStab.sect = isec->parent->index;
12901281
symStab.strx = stringTableSection.addString(defined->getName());
1291-
symStab.value = funcBodySym->getVA();
1282+
1283+
// When using --keep-icf-stabs, we need to use the VA of the actual function
1284+
// body that the linker will place in the binary. This is the function that
1285+
// the symbol refers to after ICF folding.
1286+
if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1287+
// For thunks, we need to get the function they point to
1288+
Defined *target = getBodyForThunkFoldedSym(defined);
1289+
symStab.value = target->getVA();
1290+
} else {
1291+
symStab.value = defined->getVA();
1292+
}
12921293

12931294
if (isCodeSection(isec)) {
12941295
symStab.type = N_FUN;
12951296
stabs.emplace_back(std::move(symStab));
1296-
emitEndFunStab(funcBodySym);
1297+
// For the end function marker in STABS, we need to use the size of the
1298+
// actual function body that exists in the output binary
1299+
if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1300+
// For thunks, we use the target's size
1301+
Defined *target = getBodyForThunkFoldedSym(defined);
1302+
emitEndFunStab(target);
1303+
} else {
1304+
emitEndFunStab(defined);
1305+
}
12971306
} else {
12981307
symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
12991308
stabs.emplace_back(std::move(symStab));

lld/test/MachO/icf-safe-thunks-dwarf.ll

Lines changed: 135 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
; RUN: rm -rf %t && split-file %s %t
55

6+
; Test single object file case
67
; RUN: llc -filetype=obj %t/a.ll -O3 -o %t/a.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
78
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/a.dylib %t/a.o
89

@@ -26,6 +27,42 @@
2627
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/a_thunks.dylib %t/a.o
2728
; RUN: dsymutil -s %t/a_thunks.dylib > %t/a_thunks.txt
2829

30+
;;;;;;;;;;;;;;;;;;;;;;;;;;;; Test multiple object files with identical functions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31+
32+
; RUN: llc -filetype=obj %t/b.ll -O3 -o %t/b.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
33+
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/multi_thunks.dylib %t/a.o %t/b.o
34+
; RUN: dsymutil -s %t/multi_thunks.dylib | FileCheck %s --check-prefix=VERIFY-MULTI-STABS
35+
# Check that STABS entries correctly associate functions with their originating object files
36+
# VERIFY-MULTI-STABS-LABEL: Symbol table for: '{{.*}}/multi_thunks.dylib'
37+
38+
# First object file's source and object file entries
39+
# VERIFY-MULTI-STABS: N_SO{{.*}}a.cpp
40+
# VERIFY-MULTI-STABS-NEXT: N_OSO{{.*}}a.o
41+
42+
# Functions from the first object file - all functions share the same address but belong to a.o
43+
# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR:[0-9a-f]+]] '_func_A'
44+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
45+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_B'
46+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
47+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_C'
48+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
49+
# VERIFY-MULTI-STABS-NEXT-NEXT: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr'
50+
51+
# End of first object file's entries
52+
# VERIFY-MULTI-STABS: N_SO{{.*}}01 0000 0000000000000000
53+
54+
# Second object file's source and object file entries
55+
# VERIFY-MULTI-STABS: N_SO{{.*}}b.cpp
56+
# VERIFY-MULTI-STABS-NEXT: N_OSO{{.*}}b.o
57+
58+
# Functions from the second object file - same addresses but different object file
59+
# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_D'
60+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
61+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_E'
62+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
63+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_F'
64+
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
65+
# VERIFY-MULTI-STABS-NEXT-NEXT: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr_b'
2966

3067
; RUN: dsymutil --flat --verify-dwarf=none %t/a_thunks.dylib -o %t/a_thunks.dSYM
3168
; RUN: dsymutil -s %t/a_thunks.dSYM >> %t/a_thunks.txt
@@ -73,6 +110,9 @@
73110
# VERIFY-THUNK-NEXT: {{ +}}DW_AT_low_pc (0x[[MERGED_FUN_ADDR]])
74111
# VERIFY-THUNK-NEXT-NEXT-NEXT-NEXT-NEXT: {{ +}}DW_AT_name ("func_C")
75112

113+
114+
115+
76116
;--- a.cpp
77117
#define ATTR __attribute__((noinline)) extern "C"
78118
typedef unsigned long long ULL;
@@ -89,40 +129,64 @@ ATTR ULL take_func_addr() {
89129
return val;
90130
}
91131

132+
;--- b.cpp
133+
#define ATTR __attribute__((noinline)) extern "C"
134+
typedef unsigned long long ULL;
135+
136+
// Identical functions in a different object file
137+
ATTR int func_D() { return 1; }
138+
ATTR int func_E() { return 1; }
139+
ATTR int func_F() { return 1; }
140+
141+
ATTR ULL take_func_addr_b() {
142+
ULL val = 0;
143+
val += (ULL)(void*)func_D;
144+
val += (ULL)(void*)func_E;
145+
val += (ULL)(void*)func_F;
146+
return val;
147+
}
148+
92149
;--- gen
93-
clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -o -
150+
clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -
151+
echo ""
152+
echo ";--- b.ll"
153+
clang -target arm64-apple-macos11.0 -S -emit-llvm b.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -
94154

95155
;--- a.ll
96156
; ModuleID = 'a.cpp'
97157
source_filename = "a.cpp"
98-
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
158+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
99159
target triple = "arm64-apple-macosx11.0.0"
100160

101161
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
102162
define noundef i32 @func_A() #0 !dbg !12 {
163+
entry:
103164
ret i32 1, !dbg !16
104165
}
105166

106167
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
107168
define noundef i32 @func_B() #0 !dbg !17 {
169+
entry:
108170
ret i32 1, !dbg !18
109171
}
110172

111173
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
112174
define noundef i32 @func_C() #0 !dbg !19 {
175+
entry:
113176
ret i32 1, !dbg !20
114177
}
115178

116179
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
117180
define noundef i64 @take_func_addr() local_unnamed_addr #0 !dbg !21 {
181+
entry:
118182
#dbg_value(i64 0, !25, !DIExpression(), !26)
119183
#dbg_value(i64 ptrtoint (ptr @func_A to i64), !25, !DIExpression(), !26)
120184
#dbg_value(i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), !25, !DIExpression(), !26)
121185
#dbg_value(i64 add (i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), i64 ptrtoint (ptr @func_C to i64)), !25, !DIExpression(), !26)
122186
ret i64 add (i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), i64 ptrtoint (ptr @func_C to i64)), !dbg !27
123187
}
124188

125-
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
189+
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
126190

127191
!llvm.dbg.cu = !{!0}
128192
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
@@ -155,3 +219,71 @@ attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp wil
155219
!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 9, type: !3)
156220
!26 = !DILocation(line: 0, scope: !21)
157221
!27 = !DILocation(line: 13, column: 5, scope: !21)
222+
223+
;--- b.ll
224+
; ModuleID = 'b.cpp'
225+
source_filename = "b.cpp"
226+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
227+
target triple = "arm64-apple-macosx11.0.0"
228+
229+
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
230+
define noundef i32 @func_D() #0 !dbg !12 {
231+
entry:
232+
ret i32 1, !dbg !16
233+
}
234+
235+
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
236+
define noundef i32 @func_E() #0 !dbg !17 {
237+
entry:
238+
ret i32 1, !dbg !18
239+
}
240+
241+
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
242+
define noundef i32 @func_F() #0 !dbg !19 {
243+
entry:
244+
ret i32 1, !dbg !20
245+
}
246+
247+
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
248+
define noundef i64 @take_func_addr_b() local_unnamed_addr #0 !dbg !21 {
249+
entry:
250+
#dbg_value(i64 0, !25, !DIExpression(), !26)
251+
#dbg_value(i64 ptrtoint (ptr @func_D to i64), !25, !DIExpression(), !26)
252+
#dbg_value(i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), !25, !DIExpression(), !26)
253+
#dbg_value(i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !25, !DIExpression(), !26)
254+
ret i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !dbg !27
255+
}
256+
257+
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
258+
259+
!llvm.dbg.cu = !{!0}
260+
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
261+
262+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
263+
!1 = !DIFile(filename: "b.cpp", directory: "/proc/self/cwd")
264+
!2 = !{!3, !5}
265+
!3 = !DIDerivedType(tag: DW_TAG_typedef, name: "ULL", file: !1, line: 2, baseType: !4)
266+
!4 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)
267+
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
268+
!6 = !{i32 7, !"Dwarf Version", i32 4}
269+
!7 = !{i32 2, !"Debug Info Version", i32 3}
270+
!8 = !{i32 1, !"wchar_size", i32 4}
271+
!9 = !{i32 8, !"PIC Level", i32 2}
272+
!10 = !{i32 7, !"uwtable", i32 1}
273+
!11 = !{i32 7, !"frame-pointer", i32 1}
274+
!12 = distinct !DISubprogram(name: "func_D", scope: !1, file: !1, line: 5, type: !13, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
275+
!13 = !DISubroutineType(types: !14)
276+
!14 = !{!15}
277+
!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
278+
!16 = !DILocation(line: 5, column: 21, scope: !12)
279+
!17 = distinct !DISubprogram(name: "func_E", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
280+
!18 = !DILocation(line: 6, column: 21, scope: !17)
281+
!19 = distinct !DISubprogram(name: "func_F", scope: !1, file: !1, line: 7, type: !13, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
282+
!20 = !DILocation(line: 7, column: 21, scope: !19)
283+
!21 = distinct !DISubprogram(name: "take_func_addr_b", scope: !1, file: !1, line: 9, type: !22, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !24)
284+
!22 = !DISubroutineType(types: !23)
285+
!23 = !{!3}
286+
!24 = !{!25}
287+
!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 10, type: !3)
288+
!26 = !DILocation(line: 0, scope: !21)
289+
!27 = !DILocation(line: 14, column: 5, scope: !21)

0 commit comments

Comments
 (0)