-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[ELF] Improve undefined symbol message w/ DW_TAG_variable of the enclosing symbol but w/o line number information #70854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: Fangrui Song (MaskRay) ChangesThe undefined symbol message suggests the source line when line number
This patch refactors Full diff: https://github.com/llvm/llvm-project/pull/70854.diff 6 Files Affected:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 02394cbae95d557..e4ce050a789dfbb 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -241,12 +241,12 @@ InputSection *InputSectionBase::getLinkOrderDep() const {
return cast<InputSection>(file->getSections()[link]);
}
-// Find a function symbol that encloses a given location.
-Defined *InputSectionBase::getEnclosingFunction(uint64_t offset) {
+// Find a symbol that encloses a given location.
+Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, uint8_t type) {
for (Symbol *b : file->getSymbols())
if (Defined *d = dyn_cast<Defined>(b))
- if (d->section == this && d->type == STT_FUNC && d->value <= offset &&
- offset < d->value + d->size)
+ if (d->section == this && d->value <= offset &&
+ offset < d->value + d->size && (type == 0 || type == d->type))
return d;
return nullptr;
}
@@ -296,10 +296,8 @@ std::string InputSectionBase::getObjMsg(uint64_t off) {
// Find a symbol that encloses a given location. getObjMsg may be called
// before ObjFile::initSectionsAndLocalSyms where local symbols are
// initialized.
- for (Symbol *b : file->getSymbols())
- if (auto *d = dyn_cast_or_null<Defined>(b))
- if (d->section == this && d->value <= off && off < d->value + d->size)
- return filename + ":(" + toString(*d) + ")" + archive;
+ if (Defined *d = getEnclosingSymbol(off))
+ return filename + ":(" + toString(*d) + ")" + archive;
// If there's no symbol, print out the offset in the section.
return (filename + ":(" + name + "+0x" + utohexstr(off) + ")" + archive)
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 2b91711abba3d14..7570901b4ef9425 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -189,9 +189,12 @@ class InputSectionBase : public SectionBase {
InputSection *getLinkOrderDep() const;
- // Get the function symbol that encloses this offset from within the
- // section.
- Defined *getEnclosingFunction(uint64_t offset);
+ // Get a symbol that encloses this offset from within the section. If type is
+ // not zero, return a symbol with the specified type.
+ Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0);
+ Defined *getEnclosingFunction(uint64_t offset) {
+ return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC);
+ }
// Returns a source location string. Used to construct an error message.
std::string getLocation(uint64_t offset);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index f3fb0c71a8b3064..62e80521c4558ee 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -739,7 +739,8 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef,
uint64_t offset = l.offset;
msg += "\n>>> referenced by ";
- std::string src = sec.getSrcMsg(sym, offset);
+ Symbol *enclosing = sec.getEnclosingSymbol(offset);
+ std::string src = sec.getSrcMsg(enclosing ? *enclosing : sym, offset);
if (!src.empty())
msg += src + "\n>>> ";
msg += sec.getObjMsg(offset);
diff --git a/lld/test/ELF/Inputs/undef-debug.s b/lld/test/ELF/Inputs/undef-debug.s
index 46c1c92d2b1f6e8..016a0cac60e5eaf 100644
--- a/lld/test/ELF/Inputs/undef-debug.s
+++ b/lld/test/ELF/Inputs/undef-debug.s
@@ -1,3 +1,4 @@
+## Variables with line number information
.file 1 "dir/undef-debug.s"
.loc 1 3
.quad zed3
diff --git a/lld/test/ELF/Inputs/undef-debug2.s b/lld/test/ELF/Inputs/undef-debug2.s
new file mode 100644
index 000000000000000..e176969dc4e3ffc
--- /dev/null
+++ b/lld/test/ELF/Inputs/undef-debug2.s
@@ -0,0 +1,200 @@
+## Generate from:
+##
+## extern int zed9;
+## namespace ns {
+## int *var[] = {
+## &zed9
+## };
+## }
+ .text
+ .file "undef-debug2.cc"
+ .file 0 "dir" "undef-debug2.cc" md5 0xd7caefb836c47f6c56303f19e96f2587
+ .type _ZN2ns3varE,@object # @_ZN2ns3varE
+ .data
+ .globl _ZN2ns3varE
+ .p2align 3, 0x0
+_ZN2ns3varE:
+ .quad zed9
+ .size _ZN2ns3varE, 8
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 57 # DW_TAG_namespace
+ .byte 1 # DW_CHILDREN_yes
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 1 # DW_TAG_array_type
+ .byte 1 # DW_CHILDREN_yes
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 33 # DW_TAG_subrange_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 55 # DW_AT_count
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 6 # Abbreviation Code
+ .byte 15 # DW_TAG_pointer_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 7 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 8 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x3b DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x1e:0xf DW_TAG_namespace
+ .byte 3 # DW_AT_name
+ .byte 3 # Abbrev [3] 0x20:0xc DW_TAG_variable
+ .byte 4 # DW_AT_name
+ .long 45 # DW_AT_type
+ # DW_AT_external
+ .byte 0 # DW_AT_decl_file
+ .byte 3 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 161
+ .byte 0
+ .byte 7 # DW_AT_linkage_name
+ .byte 0 # End Of Children Mark
+ .byte 4 # Abbrev [4] 0x2d:0xc DW_TAG_array_type
+ .long 57 # DW_AT_type
+ .byte 5 # Abbrev [5] 0x32:0x6 DW_TAG_subrange_type
+ .long 66 # DW_AT_type
+ .byte 1 # DW_AT_count
+ .byte 0 # End Of Children Mark
+ .byte 6 # Abbrev [6] 0x39:0x5 DW_TAG_pointer_type
+ .long 62 # DW_AT_type
+ .byte 7 # Abbrev [7] 0x3e:0x4 DW_TAG_base_type
+ .byte 5 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 8 # Abbrev [8] 0x42:0x4 DW_TAG_base_type
+ .byte 6 # DW_AT_name
+ .byte 8 # DW_AT_byte_size
+ .byte 7 # DW_AT_encoding
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 36 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Linfo_string0:
+ .asciz "clang version 18.0.0" # string offset=0
+.Linfo_string1:
+ .asciz "undef-debug2.cc" # string offset=21
+.Linfo_string2:
+ .asciz "dir" # string offset=37
+.Linfo_string3:
+ .asciz "ns" # string offset=44
+.Linfo_string4:
+ .asciz "var" # string offset=47
+.Linfo_string5:
+ .asciz "int" # string offset=51
+.Linfo_string6:
+ .asciz "__ARRAY_SIZE_TYPE__" # string offset=55
+.Linfo_string7:
+ .asciz "_ZN2ns3varE" # string offset=75
+ .section .debug_str_offsets,"",@progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .long .Linfo_string5
+ .long .Linfo_string6
+ .long .Linfo_string7
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad _ZN2ns3varE
+.Ldebug_addr_end0:
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s
index 2b42ae12be2c129..009b8bf6d23b410 100644
--- a/lld/test/ELF/undef.s
+++ b/lld/test/ELF/undef.s
@@ -3,11 +3,12 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef.s -o %t2.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-debug.s -o %t3.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-bad-debug.s -o %t4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-debug2.s -o %t5.o
# RUN: rm -f %t2.a
# RUN: llvm-ar rc %t2.a %t2.o
-# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o %t5.o -o /dev/null 2>&1 \
# RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
-# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \
+# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o %t5.o -o /dev/null 2>&1 \
# RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:"
# CHECK: error: undefined symbol: foo
@@ -82,6 +83,10 @@
# CHECK-NEXT: >>> referenced by undef-bad-debug2.s:11 (dir2{{/|\\}}undef-bad-debug2.s:11)
# CHECK-NEXT: >>> {{.*}}tmp4.o:(.text+0x18)
+# CHECK: error: undefined symbol: zed9
+# CHECK-NEXT: >>> referenced by undef-debug2.cc:3 (dir{{/|\\}}undef-debug2.cc:3)
+# CHECK-NEXT: >>> {{.*}}tmp5.o:(ns::var)
+
# RUN: not ld.lld %t.o %t2.a -o /dev/null -no-demangle 2>&1 | \
# RUN: FileCheck -check-prefix=NO-DEMANGLE %s
# NO-DEMANGLE: error: undefined symbol: _Z3fooi
|
4ddc9f9
to
4000572
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, it accomplishes the same thing as the original PR I submitted.
4000572
to
6dfcab2
Compare
Rebase on top of |
…osing symbol but w/o line number information The undefined symbol message suggests the source line when line number information is available (see https://reviews.llvm.org/D31481). When the undefined symbol is from a global variable, we won't get the line information. ``` extern int undef; namespace ns { int *var[] = { &undef }; // DW_TAG_variable(DW_AT_decl_file/DW_AT_decl_line) is available while // line number information is unavailable. } ld.lld: error: undefined symbol: undef >>> referenced by undef-debug2.cc >>> undef-debug2.o:(ns::var) ``` This patch utilizes `getEnclosingSymbol` to locate `var` and find DW_TAG_variable for `var`: ``` ld.lld: error: undefined symbol: undef >>> referenced by undef-debug2.cc:3 (/tmp/c/undef-debug2.cc:3) >>> undef-debug2.o:(ns::var) ```
6dfcab2
to
c012418
Compare
The undefined symbol message suggests the source line when line number
information is available (see https://reviews.llvm.org/D31481).
When the undefined symbol is from a global variable, we won't get the
line information.
This patch utilizes
getEnclosingSymbol
to locatevar
and findDW_TAG_variable for
var
: