Skip to content

Commit d6e5bfc

Browse files
int3tstellar
authored andcommitted
[lld-macho] Support EH frame pointer encodings that use sdata4
Previously we only supporting using the system pointer size (aka the `absptr` encoding) because `llvm-mc`'s CFI directives always generate EH frames with that encoding. But libffi uses 4-byte-encoded, hand-rolled EH frames, so this patch adds support for it. Fixes llvm#56576. Reviewed By: #lld-macho, oontvoo Differential Revision: https://reviews.llvm.org/D130804 (cherry picked from commit 6c9f681)
1 parent 2fb8f67 commit d6e5bfc

File tree

4 files changed

+117
-25
lines changed

4 files changed

+117
-25
lines changed

lld/MachO/EhFrame.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,17 @@ uint32_t EhReader::readU32(size_t *off) const {
5858
return v;
5959
}
6060

61-
uint64_t EhReader::readPointer(size_t *off) const {
62-
if (*off + wordSize > data.size())
61+
uint64_t EhReader::readPointer(size_t *off, uint8_t size) const {
62+
if (*off + size > data.size())
6363
failOn(*off, "unexpected end of CIE/FDE");
6464
uint64_t v;
65-
if (wordSize == 8)
65+
if (size == 8)
6666
v = read64le(data.data() + *off);
6767
else {
68-
assert(wordSize == 4);
68+
assert(size == 4);
6969
v = read32le(data.data() + *off);
7070
}
71-
*off += wordSize;
71+
*off += size;
7272
return v;
7373
}
7474

lld/MachO/EhFrame.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,16 @@ namespace macho {
5555

5656
class EhReader {
5757
public:
58-
EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
59-
size_t wordSize)
60-
: file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
58+
EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
59+
: file(file), data(data), dataOff(dataOff) {}
6160
size_t size() const { return data.size(); }
6261
// Read and validate the length field.
6362
uint64_t readLength(size_t *off) const;
6463
// Skip the length field without doing validation.
6564
void skipValidLength(size_t *off) const;
6665
uint8_t readByte(size_t *off) const;
6766
uint32_t readU32(size_t *off) const;
68-
uint64_t readPointer(size_t *off) const;
67+
uint64_t readPointer(size_t *off, uint8_t size) const;
6968
StringRef readString(size_t *off) const;
7069
void skipLeb128(size_t *off) const;
7170
void failOn(size_t errOff, const Twine &msg) const;
@@ -76,7 +75,6 @@ class EhReader {
7675
// The offset of the data array within its section. Used only for error
7776
// reporting.
7877
const size_t dataOff;
79-
size_t wordSize;
8078
};
8179

8280
// The EH frame format, when emitted by llvm-mc, consists of a number of

lld/MachO/InputFiles.cpp

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
385385
}
386386

387387
void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
388-
EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
388+
EhReader reader(this, data, /*dataOff=*/0);
389389
size_t off = 0;
390390
while (off < reader.size()) {
391391
uint64_t frameOff = off;
@@ -1290,19 +1290,32 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
12901290

12911291
struct CIE {
12921292
macho::Symbol *personalitySymbol = nullptr;
1293-
bool fdesHaveLsda = false;
12941293
bool fdesHaveAug = false;
1294+
uint8_t lsdaPtrSize = 0; // 0 => no LSDA
1295+
uint8_t funcPtrSize = 0;
12951296
};
12961297

1298+
static uint8_t pointerEncodingToSize(uint8_t enc) {
1299+
switch (enc & 0xf) {
1300+
case dwarf::DW_EH_PE_absptr:
1301+
return target->wordSize;
1302+
case dwarf::DW_EH_PE_sdata4:
1303+
return 4;
1304+
case dwarf::DW_EH_PE_sdata8:
1305+
// ld64 doesn't actually support sdata8, but this seems simple enough...
1306+
return 8;
1307+
default:
1308+
return 0;
1309+
};
1310+
}
1311+
12971312
static CIE parseCIE(const InputSection *isec, const EhReader &reader,
12981313
size_t off) {
12991314
// Handling the full generality of possible DWARF encodings would be a major
13001315
// pain. We instead take advantage of our knowledge of how llvm-mc encodes
13011316
// DWARF and handle just that.
13021317
constexpr uint8_t expectedPersonalityEnc =
13031318
dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
1304-
constexpr uint8_t expectedPointerEnc =
1305-
dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
13061319

13071320
CIE cie;
13081321
uint8_t version = reader.readByte(&off);
@@ -1329,16 +1342,17 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
13291342
break;
13301343
}
13311344
case 'L': {
1332-
cie.fdesHaveLsda = true;
13331345
uint8_t lsdaEnc = reader.readByte(&off);
1334-
if (lsdaEnc != expectedPointerEnc)
1346+
cie.lsdaPtrSize = pointerEncodingToSize(lsdaEnc);
1347+
if (cie.lsdaPtrSize == 0)
13351348
reader.failOn(off, "unexpected LSDA encoding 0x" +
13361349
Twine::utohexstr(lsdaEnc));
13371350
break;
13381351
}
13391352
case 'R': {
13401353
uint8_t pointerEnc = reader.readByte(&off);
1341-
if (pointerEnc != expectedPointerEnc)
1354+
cie.funcPtrSize = pointerEncodingToSize(pointerEnc);
1355+
if (cie.funcPtrSize == 0 || !(pointerEnc & dwarf::DW_EH_PE_pcrel))
13421356
reader.failOn(off, "unexpected pointer encoding 0x" +
13431357
Twine::utohexstr(pointerEnc));
13441358
break;
@@ -1468,7 +1482,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
14681482
else if (isec->symbols[0]->value != 0)
14691483
fatal("found symbol at unexpected offset in __eh_frame");
14701484

1471-
EhReader reader(this, isec->data, subsec.offset, target->wordSize);
1485+
EhReader reader(this, isec->data, subsec.offset);
14721486
size_t dataOff = 0; // Offset from the start of the EH frame.
14731487
reader.skipValidLength(&dataOff); // readLength() already validated this.
14741488
// cieOffOff is the offset from the start of the EH frame to the cieOff
@@ -1507,20 +1521,20 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
15071521
continue;
15081522
}
15091523

1524+
assert(cieMap.count(cieIsec));
1525+
const CIE &cie = cieMap[cieIsec];
15101526
// Offset of the function address within the EH frame.
15111527
const size_t funcAddrOff = dataOff;
1512-
uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
1513-
isecOff + funcAddrOff;
1514-
uint32_t funcLength = reader.readPointer(&dataOff);
1528+
uint64_t funcAddr = reader.readPointer(&dataOff, cie.funcPtrSize) +
1529+
ehFrameSection.addr + isecOff + funcAddrOff;
1530+
uint32_t funcLength = reader.readPointer(&dataOff, cie.funcPtrSize);
15151531
size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
1516-
assert(cieMap.count(cieIsec));
1517-
const CIE &cie = cieMap[cieIsec];
15181532
Optional<uint64_t> lsdaAddrOpt;
15191533
if (cie.fdesHaveAug) {
15201534
reader.skipLeb128(&dataOff);
15211535
lsdaAddrOff = dataOff;
1522-
if (cie.fdesHaveLsda) {
1523-
uint64_t lsdaOff = reader.readPointer(&dataOff);
1536+
if (cie.lsdaPtrSize != 0) {
1537+
uint64_t lsdaOff = reader.readPointer(&dataOff, cie.lsdaPtrSize);
15241538
if (lsdaOff != 0) // FIXME possible to test this?
15251539
lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
15261540
}

lld/test/MachO/eh-frame-sdata4.s

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# REQUIRES: x86
2+
# RUN: rm -rf %t; split-file %s %t
3+
4+
## Test that we correctly handle the sdata4 DWARF pointer encoding. llvm-mc's
5+
## CFI directives always generate EH frames using the absptr (i.e. system
6+
## pointer size) encoding, but it is possible to hand-roll your own EH frames
7+
## that use the sdata4 encoding. For instance, libffi does this.
8+
9+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/sdata4.s -o %t/sdata4.o
10+
# RUN: %lld -lSystem %t/sdata4.o -o %t/sdata4
11+
# RUN: llvm-objdump --macho --syms --dwarf=frames %t/sdata4 | FileCheck %s
12+
13+
# CHECK: SYMBOL TABLE:
14+
# CHECK: [[#%.16x,MAIN:]] g F __TEXT,__text _main
15+
16+
# CHECK: .eh_frame contents:
17+
# CHECK: 00000000 00000010 00000000 CIE
18+
# CHECK: Format: DWARF32
19+
# CHECK: Version: 1
20+
# CHECK: Augmentation: "zR"
21+
# CHECK: Code alignment factor: 1
22+
# CHECK: Data alignment factor: 1
23+
# CHECK: Return address column: 1
24+
# CHECK: Augmentation data: 1B
25+
# CHECK: DW_CFA_def_cfa: reg7 +8
26+
# CHECK: CFA=reg7+8
27+
28+
# CHECK: 00000014 00000010 00000018 FDE cie=00000000 pc=[[#%x,MAIN]]...[[#%x,MAIN+1]]
29+
# CHECK: Format: DWARF32
30+
# CHECK: DW_CFA_GNU_args_size: +16
31+
# CHECK: DW_CFA_nop:
32+
# CHECK: 0x[[#%x,MAIN]]: CFA=reg7+8
33+
34+
#--- sdata4.s
35+
.globl _main
36+
_main:
37+
retq
38+
LmainEnd:
39+
40+
.balign 4
41+
.section __TEXT,__eh_frame
42+
# Although we don't reference this EhFrame symbol directly, we must have at
43+
# least one non-local symbol in this section, otherwise llvm-mc generates bogus
44+
# subtractor relocations.
45+
EhFrame:
46+
LCieHdr:
47+
.long LCieEnd - LCieStart
48+
LCieStart:
49+
.long 0 # CIE ID
50+
.byte 1 # CIE version
51+
.ascii "zR\0"
52+
.byte 1 # Code alignment
53+
.byte 1 # Data alignment
54+
.byte 1 # RA column
55+
.byte 1 # Augmentation size
56+
.byte 0x1b # FDE pointer encoding (pcrel | sdata4)
57+
.byte 0xc, 7, 8 # DW_CFA_def_cfa reg7 +8
58+
.balign 4
59+
LCieEnd:
60+
61+
LFdeHdr:
62+
.long LFdeEnd - LFdeStart
63+
LFdeStart:
64+
.long LFdeStart - LCieHdr
65+
# The next two fields are longs instead of quads because of the sdata4
66+
# encoding.
67+
.long _main - . # Function address
68+
.long LmainEnd - _main # Function length
69+
.byte 0
70+
## Insert DW_CFA_GNU_args_size to prevent ld64 from creating a compact unwind
71+
## entry to replace this FDE. Makes it easier for us to cross-check behavior
72+
## across the two linkers (LLD never bothers trying to synthesize compact
73+
## unwind if it is not already present).
74+
.byte 0x2e, 0x10 # DW_CFA_GNU_args_size
75+
.balign 4
76+
LFdeEnd:
77+
78+
.long 0 # terminator
79+
80+
.subsections_via_symbols

0 commit comments

Comments
 (0)