Skip to content

[lld-macho] Fix STABS entries for --icf=safe_thunks and --keep-icf-stabs #133179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 31 additions & 22 deletions lld/MachO/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1205,18 +1205,6 @@ void SymtabSection::emitEndFunStab(Defined *defined) {
stabs.emplace_back(std::move(stab));
}

// Given a pointer to a function symbol, return the symbol that points to the
// actual function body that will go in the final binary. Generally this is the
// symbol itself, but if the symbol was folded using a thunk, we retrieve the
// target function body from the thunk.
Defined *SymtabSection::getFuncBodySym(Defined *originalSym) {
if (originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::None ||
originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
return originalSym;

return macho::getBodyForThunkFoldedSym(originalSym);
}

void SymtabSection::emitStabs() {
if (config->omitDebugInfo)
return;
Expand Down Expand Up @@ -1252,10 +1240,11 @@ void SymtabSection::emitStabs() {
if (!file || !file->compileUnit)
continue;

// We use 'originalIsec' to get the file id of the symbol since 'isec()'
// might point to the merged ICF symbol's file
symbolsNeedingStabs.emplace_back(
defined, getFuncBodySym(defined)->originalIsec->getFile()->id);
// We use the symbol's original InputSection to get the file id,
// even for ICF folded symbols, to ensure STABS entries point to the
// correct object file where the symbol was originally defined
symbolsNeedingStabs.emplace_back(defined,
defined->originalIsec->getFile()->id);
}
}

Expand All @@ -1270,10 +1259,12 @@ void SymtabSection::emitStabs() {
InputFile *lastFile = nullptr;
for (SortingPair &pair : symbolsNeedingStabs) {
Defined *defined = pair.first;
// We use 'originalIsec' of the symbol since we care about the actual origin
// of the symbol, not the canonical location returned by `isec()`.
Defined *funcBodySym = getFuncBodySym(defined);
InputSection *isec = funcBodySym->originalIsec;
// When emitting STABS entries for a symbol, always use the original
// InputSection of the defined symbol, not the section of the function body
// (which might be a different function entirely if ICF folded this
// function). This ensures STABS entries point back to the original object
// file.
InputSection *isec = defined->originalIsec;
ObjFile *file = cast<ObjFile>(isec->getFile());

if (lastFile == nullptr || lastFile != file) {
Expand All @@ -1288,12 +1279,30 @@ void SymtabSection::emitStabs() {
StabsEntry symStab;
symStab.sect = isec->parent->index;
symStab.strx = stringTableSection.addString(defined->getName());
symStab.value = funcBodySym->getVA();

// When using --keep-icf-stabs, we need to use the VA of the actual function
// body that the linker will place in the binary. This is the function that
// the symbol refers to after ICF folding.
if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
// For thunks, we need to get the function they point to
Defined *target = getBodyForThunkFoldedSym(defined);
symStab.value = target->getVA();
} else {
symStab.value = defined->getVA();
}

if (isCodeSection(isec)) {
symStab.type = N_FUN;
stabs.emplace_back(std::move(symStab));
emitEndFunStab(funcBodySym);
// For the end function marker in STABS, we need to use the size of the
// actual function body that exists in the output binary
if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
// For thunks, we use the target's size
Defined *target = getBodyForThunkFoldedSym(defined);
emitEndFunStab(target);
} else {
emitEndFunStab(defined);
}
} else {
symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
stabs.emplace_back(std::move(symStab));
Expand Down
138 changes: 135 additions & 3 deletions lld/test/MachO/icf-safe-thunks-dwarf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

; RUN: rm -rf %t && split-file %s %t

; Test single object file case
; RUN: llc -filetype=obj %t/a.ll -O3 -o %t/a.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/a.dylib %t/a.o

Expand All @@ -26,6 +27,42 @@
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/a_thunks.dylib %t/a.o
; RUN: dsymutil -s %t/a_thunks.dylib > %t/a_thunks.txt

;;;;;;;;;;;;;;;;;;;;;;;;;;;; Test multiple object files with identical functions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; RUN: llc -filetype=obj %t/b.ll -O3 -o %t/b.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/multi_thunks.dylib %t/a.o %t/b.o
; RUN: dsymutil -s %t/multi_thunks.dylib | FileCheck %s --check-prefix=VERIFY-MULTI-STABS
# Check that STABS entries correctly associate functions with their originating object files
# VERIFY-MULTI-STABS-LABEL: Symbol table for: '{{.*}}/multi_thunks.dylib'

# First object file's source and object file entries
# VERIFY-MULTI-STABS: N_SO{{.*}}a.cpp
# VERIFY-MULTI-STABS-NEXT: N_OSO{{.*}}a.o

# Functions from the first object file - all functions share the same address but belong to a.o
# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR:[0-9a-f]+]] '_func_A'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_B'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_C'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT-NEXT: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr'

# End of first object file's entries
# VERIFY-MULTI-STABS: N_SO{{.*}}01 0000 0000000000000000

# Second object file's source and object file entries
# VERIFY-MULTI-STABS: N_SO{{.*}}b.cpp
# VERIFY-MULTI-STABS-NEXT: N_OSO{{.*}}b.o

# Functions from the second object file - same addresses but different object file
# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_D'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_E'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}[[FUNC_ADDR]] '_func_F'
# VERIFY-MULTI-STABS-NEXT: N_FUN{{.*}}00 0000 {{.*}}
# VERIFY-MULTI-STABS-NEXT-NEXT: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr_b'

; RUN: dsymutil --flat --verify-dwarf=none %t/a_thunks.dylib -o %t/a_thunks.dSYM
; RUN: dsymutil -s %t/a_thunks.dSYM >> %t/a_thunks.txt
Expand Down Expand Up @@ -73,6 +110,9 @@
# VERIFY-THUNK-NEXT: {{ +}}DW_AT_low_pc (0x[[MERGED_FUN_ADDR]])
# VERIFY-THUNK-NEXT-NEXT-NEXT-NEXT-NEXT: {{ +}}DW_AT_name ("func_C")




;--- a.cpp
#define ATTR __attribute__((noinline)) extern "C"
typedef unsigned long long ULL;
Expand All @@ -89,40 +129,64 @@ ATTR ULL take_func_addr() {
return val;
}

;--- b.cpp
#define ATTR __attribute__((noinline)) extern "C"
typedef unsigned long long ULL;

// Identical functions in a different object file
ATTR int func_D() { return 1; }
ATTR int func_E() { return 1; }
ATTR int func_F() { return 1; }

ATTR ULL take_func_addr_b() {
ULL val = 0;
val += (ULL)(void*)func_D;
val += (ULL)(void*)func_E;
val += (ULL)(void*)func_F;
return val;
}

;--- gen
clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -o -
clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -
echo ""
echo ";--- b.ll"
clang -target arm64-apple-macos11.0 -S -emit-llvm b.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -

;--- a.ll
; ModuleID = 'a.cpp'
source_filename = "a.cpp"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_A() #0 !dbg !12 {
entry:
ret i32 1, !dbg !16
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_B() #0 !dbg !17 {
entry:
ret i32 1, !dbg !18
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_C() #0 !dbg !19 {
entry:
ret i32 1, !dbg !20
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i64 @take_func_addr() local_unnamed_addr #0 !dbg !21 {
entry:
#dbg_value(i64 0, !25, !DIExpression(), !26)
#dbg_value(i64 ptrtoint (ptr @func_A to i64), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), i64 ptrtoint (ptr @func_C to i64)), !25, !DIExpression(), !26)
ret i64 add (i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), i64 ptrtoint (ptr @func_C to i64)), !dbg !27
}

attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
Expand Down Expand Up @@ -155,3 +219,71 @@ attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp wil
!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 9, type: !3)
!26 = !DILocation(line: 0, scope: !21)
!27 = !DILocation(line: 13, column: 5, scope: !21)

;--- b.ll
; ModuleID = 'b.cpp'
source_filename = "b.cpp"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_D() #0 !dbg !12 {
entry:
ret i32 1, !dbg !16
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_E() #0 !dbg !17 {
entry:
ret i32 1, !dbg !18
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_F() #0 !dbg !19 {
entry:
ret i32 1, !dbg !20
}

; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i64 @take_func_addr_b() local_unnamed_addr #0 !dbg !21 {
entry:
#dbg_value(i64 0, !25, !DIExpression(), !26)
#dbg_value(i64 ptrtoint (ptr @func_D to i64), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !25, !DIExpression(), !26)
ret i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !dbg !27
}

attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
!1 = !DIFile(filename: "b.cpp", directory: "/proc/self/cwd")
!2 = !{!3, !5}
!3 = !DIDerivedType(tag: DW_TAG_typedef, name: "ULL", file: !1, line: 2, baseType: !4)
!4 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
!6 = !{i32 7, !"Dwarf Version", i32 4}
!7 = !{i32 2, !"Debug Info Version", i32 3}
!8 = !{i32 1, !"wchar_size", i32 4}
!9 = !{i32 8, !"PIC Level", i32 2}
!10 = !{i32 7, !"uwtable", i32 1}
!11 = !{i32 7, !"frame-pointer", i32 1}
!12 = distinct !DISubprogram(name: "func_D", scope: !1, file: !1, line: 5, type: !13, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!13 = !DISubroutineType(types: !14)
!14 = !{!15}
!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!16 = !DILocation(line: 5, column: 21, scope: !12)
!17 = distinct !DISubprogram(name: "func_E", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!18 = !DILocation(line: 6, column: 21, scope: !17)
!19 = distinct !DISubprogram(name: "func_F", scope: !1, file: !1, line: 7, type: !13, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
!20 = !DILocation(line: 7, column: 21, scope: !19)
!21 = distinct !DISubprogram(name: "take_func_addr_b", scope: !1, file: !1, line: 9, type: !22, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !24)
!22 = !DISubroutineType(types: !23)
!23 = !{!3}
!24 = !{!25}
!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 10, type: !3)
!26 = !DILocation(line: 0, scope: !21)
!27 = !DILocation(line: 14, column: 5, scope: !21)