Skip to content

Commit 6c9f681

Browse files
committed
[lld-macho] Support EH frame pointer encodings that use sdata4
Previously we only supporting using the system pointer size (aka the `absptr` encoding) because `llvm-mc`'s CFI directives always generate EH frames with that encoding. But libffi uses 4-byte-encoded, hand-rolled EH frames, so this patch adds support for it. Fixes llvm#56576. Reviewed By: #lld-macho, oontvoo Differential Revision: https://reviews.llvm.org/D130804
1 parent 773d51c commit 6c9f681

File tree

4 files changed

+117
-25
lines changed

4 files changed

+117
-25
lines changed

lld/MachO/EhFrame.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,17 @@ uint32_t EhReader::readU32(size_t *off) const {
5858
return v;
5959
}
6060

61-
uint64_t EhReader::readPointer(size_t *off) const {
62-
if (*off + wordSize > data.size())
61+
uint64_t EhReader::readPointer(size_t *off, uint8_t size) const {
62+
if (*off + size > data.size())
6363
failOn(*off, "unexpected end of CIE/FDE");
6464
uint64_t v;
65-
if (wordSize == 8)
65+
if (size == 8)
6666
v = read64le(data.data() + *off);
6767
else {
68-
assert(wordSize == 4);
68+
assert(size == 4);
6969
v = read32le(data.data() + *off);
7070
}
71-
*off += wordSize;
71+
*off += size;
7272
return v;
7373
}
7474

lld/MachO/EhFrame.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,16 @@ namespace macho {
5555

5656
class EhReader {
5757
public:
58-
EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
59-
size_t wordSize)
60-
: file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
58+
EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
59+
: file(file), data(data), dataOff(dataOff) {}
6160
size_t size() const { return data.size(); }
6261
// Read and validate the length field.
6362
uint64_t readLength(size_t *off) const;
6463
// Skip the length field without doing validation.
6564
void skipValidLength(size_t *off) const;
6665
uint8_t readByte(size_t *off) const;
6766
uint32_t readU32(size_t *off) const;
68-
uint64_t readPointer(size_t *off) const;
67+
uint64_t readPointer(size_t *off, uint8_t size) const;
6968
StringRef readString(size_t *off) const;
7069
void skipLeb128(size_t *off) const;
7170
void failOn(size_t errOff, const Twine &msg) const;
@@ -76,7 +75,6 @@ class EhReader {
7675
// The offset of the data array within its section. Used only for error
7776
// reporting.
7877
const size_t dataOff;
79-
size_t wordSize;
8078
};
8179

8280
// The EH frame format, when emitted by llvm-mc, consists of a number of

lld/MachO/InputFiles.cpp

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
385385
}
386386

387387
void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
388-
EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
388+
EhReader reader(this, data, /*dataOff=*/0);
389389
size_t off = 0;
390390
while (off < reader.size()) {
391391
uint64_t frameOff = off;
@@ -1293,19 +1293,32 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
12931293

12941294
struct CIE {
12951295
macho::Symbol *personalitySymbol = nullptr;
1296-
bool fdesHaveLsda = false;
12971296
bool fdesHaveAug = false;
1297+
uint8_t lsdaPtrSize = 0; // 0 => no LSDA
1298+
uint8_t funcPtrSize = 0;
12981299
};
12991300

1301+
static uint8_t pointerEncodingToSize(uint8_t enc) {
1302+
switch (enc & 0xf) {
1303+
case dwarf::DW_EH_PE_absptr:
1304+
return target->wordSize;
1305+
case dwarf::DW_EH_PE_sdata4:
1306+
return 4;
1307+
case dwarf::DW_EH_PE_sdata8:
1308+
// ld64 doesn't actually support sdata8, but this seems simple enough...
1309+
return 8;
1310+
default:
1311+
return 0;
1312+
};
1313+
}
1314+
13001315
static CIE parseCIE(const InputSection *isec, const EhReader &reader,
13011316
size_t off) {
13021317
// Handling the full generality of possible DWARF encodings would be a major
13031318
// pain. We instead take advantage of our knowledge of how llvm-mc encodes
13041319
// DWARF and handle just that.
13051320
constexpr uint8_t expectedPersonalityEnc =
13061321
dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
1307-
constexpr uint8_t expectedPointerEnc =
1308-
dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
13091322

13101323
CIE cie;
13111324
uint8_t version = reader.readByte(&off);
@@ -1332,16 +1345,17 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
13321345
break;
13331346
}
13341347
case 'L': {
1335-
cie.fdesHaveLsda = true;
13361348
uint8_t lsdaEnc = reader.readByte(&off);
1337-
if (lsdaEnc != expectedPointerEnc)
1349+
cie.lsdaPtrSize = pointerEncodingToSize(lsdaEnc);
1350+
if (cie.lsdaPtrSize == 0)
13381351
reader.failOn(off, "unexpected LSDA encoding 0x" +
13391352
Twine::utohexstr(lsdaEnc));
13401353
break;
13411354
}
13421355
case 'R': {
13431356
uint8_t pointerEnc = reader.readByte(&off);
1344-
if (pointerEnc != expectedPointerEnc)
1357+
cie.funcPtrSize = pointerEncodingToSize(pointerEnc);
1358+
if (cie.funcPtrSize == 0 || !(pointerEnc & dwarf::DW_EH_PE_pcrel))
13451359
reader.failOn(off, "unexpected pointer encoding 0x" +
13461360
Twine::utohexstr(pointerEnc));
13471361
break;
@@ -1471,7 +1485,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
14711485
else if (isec->symbols[0]->value != 0)
14721486
fatal("found symbol at unexpected offset in __eh_frame");
14731487

1474-
EhReader reader(this, isec->data, subsec.offset, target->wordSize);
1488+
EhReader reader(this, isec->data, subsec.offset);
14751489
size_t dataOff = 0; // Offset from the start of the EH frame.
14761490
reader.skipValidLength(&dataOff); // readLength() already validated this.
14771491
// cieOffOff is the offset from the start of the EH frame to the cieOff
@@ -1510,20 +1524,20 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
15101524
continue;
15111525
}
15121526

1527+
assert(cieMap.count(cieIsec));
1528+
const CIE &cie = cieMap[cieIsec];
15131529
// Offset of the function address within the EH frame.
15141530
const size_t funcAddrOff = dataOff;
1515-
uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
1516-
isecOff + funcAddrOff;
1517-
uint32_t funcLength = reader.readPointer(&dataOff);
1531+
uint64_t funcAddr = reader.readPointer(&dataOff, cie.funcPtrSize) +
1532+
ehFrameSection.addr + isecOff + funcAddrOff;
1533+
uint32_t funcLength = reader.readPointer(&dataOff, cie.funcPtrSize);
15181534
size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
1519-
assert(cieMap.count(cieIsec));
1520-
const CIE &cie = cieMap[cieIsec];
15211535
Optional<uint64_t> lsdaAddrOpt;
15221536
if (cie.fdesHaveAug) {
15231537
reader.skipLeb128(&dataOff);
15241538
lsdaAddrOff = dataOff;
1525-
if (cie.fdesHaveLsda) {
1526-
uint64_t lsdaOff = reader.readPointer(&dataOff);
1539+
if (cie.lsdaPtrSize != 0) {
1540+
uint64_t lsdaOff = reader.readPointer(&dataOff, cie.lsdaPtrSize);
15271541
if (lsdaOff != 0) // FIXME possible to test this?
15281542
lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
15291543
}

lld/test/MachO/eh-frame-sdata4.s

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# REQUIRES: x86
2+
# RUN: rm -rf %t; split-file %s %t
3+
4+
## Test that we correctly handle the sdata4 DWARF pointer encoding. llvm-mc's
5+
## CFI directives always generate EH frames using the absptr (i.e. system
6+
## pointer size) encoding, but it is possible to hand-roll your own EH frames
7+
## that use the sdata4 encoding. For instance, libffi does this.
8+
9+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/sdata4.s -o %t/sdata4.o
10+
# RUN: %lld -lSystem %t/sdata4.o -o %t/sdata4
11+
# RUN: llvm-objdump --macho --syms --dwarf=frames %t/sdata4 | FileCheck %s
12+
13+
# CHECK: SYMBOL TABLE:
14+
# CHECK: [[#%.16x,MAIN:]] g F __TEXT,__text _main
15+
16+
# CHECK: .eh_frame contents:
17+
# CHECK: 00000000 00000010 00000000 CIE
18+
# CHECK: Format: DWARF32
19+
# CHECK: Version: 1
20+
# CHECK: Augmentation: "zR"
21+
# CHECK: Code alignment factor: 1
22+
# CHECK: Data alignment factor: 1
23+
# CHECK: Return address column: 1
24+
# CHECK: Augmentation data: 1B
25+
# CHECK: DW_CFA_def_cfa: reg7 +8
26+
# CHECK: CFA=reg7+8
27+
28+
# CHECK: 00000014 00000010 00000018 FDE cie=00000000 pc=[[#%x,MAIN]]...[[#%x,MAIN+1]]
29+
# CHECK: Format: DWARF32
30+
# CHECK: DW_CFA_GNU_args_size: +16
31+
# CHECK: DW_CFA_nop:
32+
# CHECK: 0x[[#%x,MAIN]]: CFA=reg7+8
33+
34+
#--- sdata4.s
35+
.globl _main
36+
_main:
37+
retq
38+
LmainEnd:
39+
40+
.balign 4
41+
.section __TEXT,__eh_frame
42+
# Although we don't reference this EhFrame symbol directly, we must have at
43+
# least one non-local symbol in this section, otherwise llvm-mc generates bogus
44+
# subtractor relocations.
45+
EhFrame:
46+
LCieHdr:
47+
.long LCieEnd - LCieStart
48+
LCieStart:
49+
.long 0 # CIE ID
50+
.byte 1 # CIE version
51+
.ascii "zR\0"
52+
.byte 1 # Code alignment
53+
.byte 1 # Data alignment
54+
.byte 1 # RA column
55+
.byte 1 # Augmentation size
56+
.byte 0x1b # FDE pointer encoding (pcrel | sdata4)
57+
.byte 0xc, 7, 8 # DW_CFA_def_cfa reg7 +8
58+
.balign 4
59+
LCieEnd:
60+
61+
LFdeHdr:
62+
.long LFdeEnd - LFdeStart
63+
LFdeStart:
64+
.long LFdeStart - LCieHdr
65+
# The next two fields are longs instead of quads because of the sdata4
66+
# encoding.
67+
.long _main - . # Function address
68+
.long LmainEnd - _main # Function length
69+
.byte 0
70+
## Insert DW_CFA_GNU_args_size to prevent ld64 from creating a compact unwind
71+
## entry to replace this FDE. Makes it easier for us to cross-check behavior
72+
## across the two linkers (LLD never bothers trying to synthesize compact
73+
## unwind if it is not already present).
74+
.byte 0x2e, 0x10 # DW_CFA_GNU_args_size
75+
.balign 4
76+
LFdeEnd:
77+
78+
.long 0 # terminator
79+
80+
.subsections_via_symbols

0 commit comments

Comments
 (0)