Skip to content

Commit 8b0f47b

Browse files
authored
[Object][Wasm] Use file offset for section addresses in linked wasm files (#80529)
Wasm has no unified virtual memory space as other object formats and architectures do, so previously WasmObjectFile reported 0 for all section addresses, and until 428cf71 used section offsets for function symbols. Now we use file offsets for function symbols, and this change switches section addresses to do the same (in linked files). The main result of this is that objdump now reports VMAs in section listings, and also uses file offets rather than section offsets when disassembling linked binaries (matching the behavior of other disassemblers and stack traces produced by browwsers). To make this work, this PR also updates objdump's generation of synthetics fallback symbols to match lib/Object and also correctly plumbs symbol types for regular and dummy symbols through to the backend to avoid needing special knowledge of address 0. This also paves the way for generating symbols from name sections rather than symbol tables or imports (see #76107) by allowing the disassembler's synthetic fallback symbols match the name-section generated symbols (in a followup PR).
1 parent 2ecf608 commit 8b0f47b

File tree

9 files changed

+56
-39
lines changed

9 files changed

+56
-39
lines changed

lld/test/wasm/build-id.test

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,18 @@ foo:
4343

4444

4545
# DEFAULT: Contents of section build_id:
46-
# DEFAULT-NEXT: 0000 10299168 1e3c845a 3c8f80ae 2f16cc22 .).h.<.Z<.../.."
47-
# DEFAULT-NEXT: 0010 2d
46+
# DEFAULT-NEXT: 0079 10299168 1e3c845a 3c8f80ae 2f16cc22 .).h.<.Z<.../.."
47+
# DEFAULT-NEXT: 0089 2d
4848

4949
# SHA1: Contents of section build_id:
50-
# SHA1-NEXT: 0000 145abdda 387a9bc4 e3aed3c3 3319cd37 .Z..8z......3..7
51-
# SHA1-NEXT: 0010 0212237c e4 ..#|.
50+
# SHA1-NEXT: 0079 145abdda 387a9bc4 e3aed3c3 3319cd37 .Z..8z......3..7
51+
# SHA1-NEXT: 0089 0212237c e4 ..#|.
5252

5353
# UUID: Contents of section build_id:
54-
# UUID-NEXT: 0000 10
54+
# UUID-NEXT: 0079 10
5555

5656
# HEX: Contents of section build_id:
57-
# HEX-NEXT: 0000 04123456 78 ..4Vx
57+
# HEX-NEXT: 0079 04123456 78 ..4Vx
5858

5959

6060
# NONE-NOT: Contents of section build_id:

lld/test/wasm/merge-string-debug.s

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@
2929

3030
# CHECK: Hex dump of section '.debug_str':
3131

32-
# CHECK-O0: 0x00000000 636c616e 67207665 7273696f 6e203133 clang version 13
33-
# CHECK-O0: 0x00000010 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan
34-
# CHECK-O0: 0x00000020 67207665 7273696f 6e203133 2e302e30 g version 13.0.0
35-
# CHECK-O0: 0x00000030 00626172 00666f6f 00 .bar.foo.
32+
# CHECK-O0: 0x00000025 636c616e 67207665 7273696f 6e203133 clang version 13
33+
# CHECK-O0: 0x00000035 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan
34+
# CHECK-O0: 0x00000045 67207665 7273696f 6e203133 2e302e30 g version 13.0.0
35+
# CHECK-O0: 0x00000055 00626172 00666f6f 00 .bar.foo.
3636

37-
# CHECK-O1: 0x00000000 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang
38-
# CHECK-O1: 0x00000010 20766572 73696f6e 2031332e 302e3000 version 13.0.0.
37+
# CHECK-O1: 0x00000025 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang
38+
# CHECK-O1: 0x00000035 20766572 73696f6e 2031332e 302e3000 version 13.0.0.
3939

4040
# CHECK-OFFSETS: Hex dump of section '.debug_str_offsets':
41-
# CHECK-OFFSETS: 0x00000000 00000000 00000000 00000000 ............
41+
# CHECK-OFFSETS: 0x0000007e 00000000 00000000 00000000 ............

lld/test/wasm/startstop.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ entry:
3434
; CHECK-NEXT: Value: 1024
3535
; CHECK-NEXT: Content: 03000000040000002A0000002B000000
3636

37-
; ASM: 00000001 <get_start>:
37+
; ASM: 0000006e <get_start>:
3838
; ASM-EMPTY:
39-
; ASM-NEXT: 3: i32.const 1024
40-
; ASM-NEXT: 9: end
39+
; ASM-NEXT: 70: i32.const 1024
40+
; ASM-NEXT: 76: end
4141

42-
; ASM: 0000000a <get_end>:
42+
; ASM: 00000077 <get_end>:
4343
; ASM-EMPTY:
44-
; ASM-NEXT: c: i32.const 1040
45-
; ASM-NEXT: 12: end
44+
; ASM-NEXT: 79: i32.const 1040
45+
; ASM-NEXT: 7f: end

llvm/lib/Object/WasmObjectFile.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1906,7 +1906,13 @@ Expected<StringRef> WasmObjectFile::getSectionName(DataRefImpl Sec) const {
19061906
return wasm::sectionTypeToString(S.Type);
19071907
}
19081908

1909-
uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
1909+
uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const {
1910+
// For object files, use 0 for section addresses, and section offsets for
1911+
// symbol addresses. For linked files, use file offsets.
1912+
// See also getSymbolAddress.
1913+
return isRelocatableObject() || isSharedObject() ? 0
1914+
: Sections[Sec.d.a].Offset;
1915+
}
19101916

19111917
uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const {
19121918
return Sec.d.a;

llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
1818
#include "TargetInfo/WebAssemblyTargetInfo.h"
19+
#include "llvm/BinaryFormat/Wasm.h"
1920
#include "llvm/MC/MCContext.h"
2021
#include "llvm/MC/MCDecoderOps.h"
2122
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -127,7 +128,7 @@ WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
127128
uint64_t Address,
128129
raw_ostream &CStream) const {
129130
Size = 0;
130-
if (Address == 0) {
131+
if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) {
131132
// Start of a code section: we're parsing only the function count.
132133
int64_t FunctionCount;
133134
if (!nextLEB(FunctionCount, Bytes, Size, false))

llvm/test/tools/llvm-objdump/wasm/executable-without-symbols-debugnames.test

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ Sections:
3636

3737
# CHECK: Disassembly of section CODE:
3838
# CHECK-EMPTY:
39-
# CHECK-NEXT: 00000000 <CODE>:
39+
# CHECK-NEXT: 00000026 <CODE>:
4040
# CHECK-NEXT: # 2 functions in section.
4141
# CHECK-EMPTY:
42-
# CHECK-NEXT: 00000001 <f>:
42+
# CHECK-NEXT: 00000027 <f>:
4343
# CHECK-EMPTY:
44-
# CHECK-NEXT: 3: 0b end
44+
# CHECK-NEXT: 29: 0b end
4545
# CHECK-EMPTY:
46-
# CHECK-NEXT: 00000004 <g>:
46+
# CHECK-NEXT: 0000002a <g>:
4747
# CHECK-EMPTY:
48-
# CHECK-NEXT: 6: 20 00 local.get 0
49-
# CHECK-NEXT: 8: 0b end
48+
# CHECK-NEXT: 2c: 20 00 local.get 0
49+
# CHECK-NEXT: 2e: 0b end

llvm/test/tools/llvm-objdump/wasm/executable-without-symbols.test

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ Sections:
2929

3030
# CHECK: Disassembly of section CODE:
3131
# CHECK-EMPTY:
32-
# CHECK-NEXT: 00000000 <CODE>:
32+
# CHECK-NEXT: 00000026 <CODE>:
3333
# CHECK-NEXT: # 2 functions in section.
3434
# CHECK-EMPTY:
35-
# CHECK-NEXT: 00000001 <>:
35+
# CHECK-NEXT: 00000027 <>:
3636
# CHECK-EMPTY:
37-
# CHECK-NEXT: 3: 0b end
37+
# CHECK-NEXT: 29: 0b end
3838
# CHECK-EMPTY:
39-
# CHECK-NEXT: 00000004 <>:
39+
# CHECK-NEXT: 0000002a <>:
4040
# CHECK-EMPTY:
41-
# CHECK-NEXT: 6: 20 00 local.get 0
42-
# CHECK-NEXT: 8: 0b end
41+
# CHECK-NEXT: 2c: 20 00 local.get 0
42+
# CHECK-NEXT: 2e: 0b end

llvm/test/tools/llvm-objdump/wasm/no-codesec.test

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
# CHECK: Sections:
66
# CHECK-NEXT: Idx Name Size VMA Type
7-
# CHECK-NEXT: 0 TYPE 00000004 00000000
8-
# CHECK-NEXT: 1 FUNCTION 00000002 00000000
9-
# CHECK-NEXT: 2 name 00000008 00000000
7+
# CHECK-NEXT: 0 TYPE 00000004 0000000e
8+
# CHECK-NEXT: 1 FUNCTION 00000002 00000018
9+
# CHECK-NEXT: 2 name 00000008 00000020
1010

1111
--- !WASM
1212
FileHeader:

llvm/tools/llvm-objdump/llvm-objdump.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/ADT/StringExtras.h"
3030
#include "llvm/ADT/StringSet.h"
3131
#include "llvm/ADT/Twine.h"
32+
#include "llvm/BinaryFormat/Wasm.h"
3233
#include "llvm/DebugInfo/BTF/BTFParser.h"
3334
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
3435
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
@@ -1149,7 +1150,11 @@ addMissingWasmCodeSymbols(const WasmObjectFile &Obj,
11491150
SymbolAddresses.insert(Sym.Addr);
11501151

11511152
for (const wasm::WasmFunction &Function : Obj.functions()) {
1152-
uint64_t Address = Function.CodeSectionOffset;
1153+
// This adjustment mirrors the one in WasmObjectFile::getSymbolAddress.
1154+
uint32_t Adjustment = Obj.isRelocatableObject() || Obj.isSharedObject()
1155+
? 0
1156+
: Section->getAddress();
1157+
uint64_t Address = Function.CodeSectionOffset + Adjustment;
11531158
// Only add fallback symbols for functions not already present in the symbol
11541159
// table.
11551160
if (SymbolAddresses.count(Address))
@@ -1354,6 +1359,10 @@ SymbolInfoTy objdump::createSymbolInfo(const ObjectFile &Obj,
13541359
const SymbolRef::Type SymType = unwrapOrError(Symbol.getType(), FileName);
13551360
return SymbolInfoTy(Addr, Name, SymType, /*IsMappingSymbol=*/false,
13561361
/*IsXCOFF=*/true);
1362+
} else if (Obj.isWasm()) {
1363+
uint8_t SymType =
1364+
cast<WasmObjectFile>(&Obj)->getWasmSymbol(Symbol).Info.Kind;
1365+
return SymbolInfoTy(Addr, Name, SymType, false);
13571366
} else {
13581367
uint8_t Type =
13591368
Obj.isELF() ? getElfSymbolType(Obj, Symbol) : (uint8_t)ELF::STT_NOTYPE;
@@ -1366,8 +1375,9 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj,
13661375
uint8_t Type) {
13671376
if (Obj.isXCOFF() && (SymbolDescription || TracebackTable))
13681377
return SymbolInfoTy(std::nullopt, Addr, Name, std::nullopt, false);
1369-
else
1370-
return SymbolInfoTy(Addr, Name, Type);
1378+
if (Obj.isWasm())
1379+
return SymbolInfoTy(Addr, Name, wasm::WASM_SYMBOL_TYPE_SECTION);
1380+
return SymbolInfoTy(Addr, Name, Type);
13711381
}
13721382

13731383
static void collectBBAddrMapLabels(

0 commit comments

Comments
 (0)