Skip to content

Commit 5d3ee68

Browse files
committed
swift-reflection-dump: Base ObjectMemoryReader address space on image vm address.
As the base of the "remote" address space ObjectMemoryReader presents for an image, use the image's own preferred VM address mappings. If there are multiple images loaded, differentiate them by using the top 16 bits of the remote address space as an index into the array of images. This should make it so that absolute pointers in the file Just Work without sliding in most cases; we'd only need to mix in the image index in order to have a value that is also a valid remote address.
1 parent 3afbe31 commit 5d3ee68

File tree

4 files changed

+203
-97
lines changed

4 files changed

+203
-97
lines changed

include/swift/Reflection/ReflectionContext.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,8 @@ class ReflectionContext
393393
auto SecBuf = this->getReader().readBytes(
394394
RemoteAddress(SectionHdrAddress + (I * SectionEntrySize)),
395395
SectionEntrySize);
396+
if (!SecBuf)
397+
return false;
396398
auto SecHdr =
397399
reinterpret_cast<const typename T::Section *>(SecBuf.get());
398400
SecHdrVec.push_back(SecHdr);

test/Reflection/Inputs/main.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// a dummy main.swift we can use to build executables to test reflection on
2+
// intentionally left blank
3+

test/Reflection/typeref_decoding.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
// REQUIRES: no_asan
22
// RUN: %empty-directory(%t)
3+
34
// RUN: %target-build-swift -Xfrontend -enable-anonymous-context-mangled-names %S/Inputs/ConcreteTypes.swift %S/Inputs/GenericTypes.swift %S/Inputs/Protocols.swift %S/Inputs/Extensions.swift %S/Inputs/Closures.swift -parse-as-library -emit-module -emit-library -module-name TypesToReflect -o %t/%target-library-name(TypesToReflect)
5+
// RUN: %target-build-swift -Xfrontend -enable-anonymous-context-mangled-names %S/Inputs/ConcreteTypes.swift %S/Inputs/GenericTypes.swift %S/Inputs/Protocols.swift %S/Inputs/Extensions.swift %S/Inputs/Closures.swift %S/Inputs/main.swift -emit-module -emit-executable -module-name TypesToReflect -o %t/TypesToReflect
6+
47
// RUN: %target-swift-reflection-dump -binary-filename %t/%target-library-name(TypesToReflect) | %FileCheck %s
8+
// RUN: %target-swift-reflection-dump -binary-filename %t/TypesToReflect | %FileCheck %s
59

610
// CHECK: FIELDS:
711
// CHECK: =======

tools/swift-reflection-dump/swift-reflection-dump.cpp

Lines changed: 194 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -81,128 +81,208 @@ template <typename T> static T unwrap(llvm::Expected<T> value) {
8181
exit(EXIT_FAILURE);
8282
}
8383

84-
static void reportError(std::error_code EC) {
85-
assert(EC);
86-
llvm::errs() << "swift-reflection-test error: " << EC.message() << ".\n";
87-
exit(EXIT_FAILURE);
88-
}
89-
9084
using NativeReflectionContext =
9185
swift::reflection::ReflectionContext<External<RuntimeTarget<sizeof(uintptr_t)>>>;
9286

9387
using ReadBytesResult = swift::remote::MemoryReader::ReadBytesResult;
9488

95-
static uint64_t getSectionAddress(SectionRef S) {
96-
// See COFFObjectFile.cpp for the implementation of
97-
// COFFObjectFile::getSectionAddress. The image base address is added
98-
// to all the addresses of the sections, thus the behavior is slightly different from
99-
// the other platforms.
100-
if (auto C = dyn_cast<COFFObjectFile>(S.getObject()))
101-
return S.getAddress() - C->getImageBase();
102-
return S.getAddress();
103-
}
104-
105-
static bool needToRelocate(SectionRef S) {
106-
if (!getSectionAddress(S))
107-
return false;
108-
109-
if (auto EO = dyn_cast<ELFObjectFileBase>(S.getObject())) {
110-
static const llvm::StringSet<> ELFSectionsList = {
111-
".data", ".rodata", "swift5_protocols", "swift5_protocol_conformances",
112-
"swift5_typeref", "swift5_reflstr", "swift5_assocty", "swift5_replace",
113-
"swift5_type_metadata", "swift5_fieldmd", "swift5_capture", "swift5_builtin"
114-
};
115-
StringRef Name;
116-
if (auto EC = S.getName(Name))
117-
reportError(EC);
118-
return ELFSectionsList.count(Name);
119-
}
120-
121-
return true;
122-
}
89+
// Since ObjectMemoryReader maintains ownership of the ObjectFiles and their
90+
// raw data, we can vend ReadBytesResults with no-op destructors.
91+
static void no_op_destructor(const void*) {}
12392

12493

12594
class Image {
126-
const ObjectFile *O;
127-
uint64_t VASize;
128-
129-
struct RelocatedRegion {
130-
uint64_t Start, Size;
131-
const char *Base;
95+
private:
96+
struct Segment {
97+
uint64_t Addr;
98+
StringRef Contents;
13299
};
100+
101+
uint64_t HeaderAddress;
102+
std::vector<Segment> Segments;
103+
104+
void scanMachO(const MachOObjectFile *O) {
105+
using namespace llvm::MachO;
106+
107+
HeaderAddress = UINT64_MAX;
108+
109+
for (const auto &Load : O->load_commands()) {
110+
if (Load.C.cmd == LC_SEGMENT_64) {
111+
auto Seg = O->getSegment64LoadCommand(Load);
112+
if (Seg.filesize == 0)
113+
continue;
114+
115+
auto contents = O->getData().slice(Seg.fileoff,
116+
Seg.fileoff + Seg.filesize);
117+
118+
if (contents.empty() || contents.size() != Seg.filesize)
119+
continue;
120+
121+
Segments.push_back({Seg.vmaddr, contents});
122+
HeaderAddress = std::min(HeaderAddress, Seg.vmaddr);
123+
} else if (Load.C.cmd == LC_SEGMENT) {
124+
auto Seg = O->getSegmentLoadCommand(Load);
125+
if (Seg.filesize == 0)
126+
continue;
127+
128+
auto contents = O->getData().slice(Seg.fileoff,
129+
Seg.fileoff + Seg.filesize);
130+
131+
if (contents.empty() || contents.size() != Seg.filesize)
132+
continue;
133+
134+
Segments.push_back({Seg.vmaddr, contents});
135+
HeaderAddress = std::min(HeaderAddress, (uint64_t)Seg.vmaddr);
136+
}
137+
}
138+
}
139+
140+
template<typename ELFT>
141+
void scanELFType(const ELFObjectFile<ELFT> *O) {
142+
using namespace llvm::ELF;
133143

134-
std::vector<RelocatedRegion> RelocatedRegions;
135-
136-
public:
137-
explicit Image(const ObjectFile *O) : O(O), VASize(O->getData().size()) {
138-
for (SectionRef S : O->sections()) {
139-
if (!needToRelocate(S))
140-
continue;
141-
auto SectionAddr = getSectionAddress(S);
142-
if (SectionAddr)
143-
VASize = std::max(VASize, SectionAddr + S.getSize());
144+
HeaderAddress = UINT64_MAX;
144145

145-
llvm::Expected<llvm::StringRef> Content = S.getContents();
146-
if (!Content)
147-
reportError(errorToErrorCode(Content.takeError()));
146+
auto phdrs = O->getELFFile()->program_headers();
147+
if (!phdrs) {
148+
llvm::consumeError(phdrs.takeError());
149+
return;
150+
}
148151

149-
auto PhysOffset = (uintptr_t)Content->data() - (uintptr_t)O->getData().data();
152+
for (auto &ph : *phdrs) {
153+
if (ph.p_filesz == 0)
154+
continue;
150155

151-
if (PhysOffset == SectionAddr) {
156+
auto contents = O->getData().slice(ph.p_offset,
157+
ph.p_offset + ph.p_filesz);
158+
if (contents.empty() || contents.size() != ph.p_filesz)
152159
continue;
153-
}
154-
155-
RelocatedRegions.push_back(RelocatedRegion{
156-
SectionAddr,
157-
Content->size(),
158-
Content->data()});
160+
161+
Segments.push_back({ph.p_vaddr, contents});
162+
HeaderAddress = std::min(HeaderAddress, (uint64_t)ph.p_vaddr);
159163
}
160164
}
161-
162-
RemoteAddress getStartAddress() const {
163-
return RemoteAddress((uintptr_t)O->getData().data());
165+
166+
void scanELF(const ELFObjectFileBase *O) {
167+
if (auto le32 = dyn_cast<ELFObjectFile<ELF32LE>>(O)) {
168+
scanELFType(le32);
169+
} else if (auto be32 = dyn_cast<ELFObjectFile<ELF32BE>>(O)) {
170+
scanELFType(be32);
171+
} else if (auto le64 = dyn_cast<ELFObjectFile<ELF64LE>>(O)) {
172+
scanELFType(le64);
173+
} else if (auto be64 = dyn_cast<ELFObjectFile<ELF64BE>>(O)) {
174+
scanELFType(be64);
175+
}
176+
177+
// FIXME: ReflectionContext tries to read bits of the ELF structure that
178+
// aren't normally mapped by a phdr. Until that's fixed,
179+
// allow access to the whole file 1:1 in address space that isn't otherwise
180+
// mapped.
181+
Segments.push_back({HeaderAddress, O->getData()});
164182
}
165-
166-
bool isAddressValid(RemoteAddress Addr, uint64_t Size) const {
167-
auto start = getStartAddress().getAddressData();
168-
return start <= Addr.getAddressData()
169-
&& Addr.getAddressData() + Size <= start + VASize;
183+
184+
void scanCOFF(const COFFObjectFile *O) {
185+
HeaderAddress = O->getImageBase();
186+
187+
for (auto SectionRef : O->sections()) {
188+
auto Section = O->getCOFFSection(SectionRef);
189+
190+
if (Section->SizeOfRawData == 0)
191+
continue;
192+
193+
auto SectionBase = O->getImageBase() + Section->VirtualAddress;
194+
auto SectionContent =
195+
O->getData().slice(Section->PointerToRawData,
196+
Section->PointerToRawData + Section->SizeOfRawData);
197+
if (SectionContent.empty()
198+
|| SectionContent.size() != Section->SizeOfRawData)
199+
continue;
200+
201+
Segments.push_back({SectionBase, SectionContent});
202+
}
203+
204+
Segments.push_back({HeaderAddress, O->getData()});
170205
}
171206

172-
ReadBytesResult readBytes(RemoteAddress Addr, uint64_t Size) {
173-
if (!isAddressValid(Addr, Size))
174-
return ReadBytesResult(nullptr, [](const void *) {});
207+
public:
208+
explicit Image(const ObjectFile *O) {
209+
// Unfortunately llvm doesn't provide a uniform interface for iterating
210+
// loadable segments or dynamic relocations in executable images yet.
211+
if (auto macho = dyn_cast<MachOObjectFile>(O)) {
212+
scanMachO(macho);
213+
} else if (auto elf = dyn_cast<ELFObjectFileBase>(O)) {
214+
scanELF(elf);
215+
} else if (auto coff = dyn_cast<COFFObjectFile>(O)) {
216+
scanCOFF(coff);
217+
} else {
218+
fputs("unsupported image format\n", stderr);
219+
abort();
220+
}
175221

176-
auto addrValue = Addr.getAddressData();
177-
auto base = O->getData().data();
178-
auto offset = addrValue - (uint64_t)base;
179-
for (auto &region : RelocatedRegions) {
180-
if (region.Start <= offset && offset < region.Start + region.Size) {
181-
// Read shouldn't need to straddle section boundaries.
182-
if (offset + Size > region.Start + region.Size)
183-
return ReadBytesResult(nullptr, [](const void *) {});
184-
185-
offset -= region.Start;
186-
base = region.Base;
187-
break;
222+
// ObjectMemoryReader uses the most significant 16 bits of the address to
223+
// index multiple images, so if an object maps stuff out of that range
224+
// we won't be able to read it. 2**48 of virtual address space ought to be
225+
// enough for anyone, but warn if we blow that limit.
226+
for (auto Segment : Segments) {
227+
if (Segment.Addr >= 0xFFFFFFFFFFFFull) {
228+
fputs("warning: segment mapped at address above 2**48\n", stderr);
229+
continue;
188230
}
189231
}
232+
}
190233

191-
return ReadBytesResult(base + offset, [](const void *) {});
234+
uint64_t getStartAddress() const {
235+
return HeaderAddress;
236+
}
237+
238+
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size) const {
239+
for (auto &Segment : Segments) {
240+
auto addrInSegment = Segment.Addr <= Addr
241+
&& Addr + Size <= Segment.Addr + Segment.Contents.size();
242+
243+
if (!addrInSegment)
244+
continue;
245+
246+
auto offset = Addr - Segment.Addr;
247+
248+
return Segment.Contents.drop_front(offset);
249+
}
250+
return {};
192251
}
193252
};
194253

254+
/// MemoryReader that reads from the on-disk representation of an executable
255+
/// or dynamic library image.
256+
///
257+
/// This reader uses a remote addressing scheme where the most significant
258+
/// 16 bits of the address value serve as an index into the array of loaded images,
259+
/// and the low 48 bits correspond to the preferred virtual address mapping of
260+
/// the image.
195261
class ObjectMemoryReader : public MemoryReader {
196262
std::vector<Image> Images;
197263

264+
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size) {
265+
auto imageIndex = Addr >> 48;
266+
if (imageIndex >= Images.size())
267+
return StringRef();
268+
269+
return Images[imageIndex]
270+
.getContentsAtAddress(Addr & 0xFFFFFFFFFFFFull, Size);
271+
}
272+
198273
public:
199274
explicit ObjectMemoryReader(
200275
const std::vector<const ObjectFile *> &ObjectFiles) {
276+
// We use a 16-bit index for images, so can't take more than 64K at once.
277+
if (ObjectFiles.size() >= 0x10000) {
278+
fputs("can't dump more than 65,536 images at once", stderr);
279+
abort();
280+
}
201281
for (const ObjectFile *O : ObjectFiles)
202282
Images.emplace_back(O);
203283
}
204284

205-
const std::vector<Image> &getImages() const { return Images; }
285+
ArrayRef<Image> getImages() const { return Images; }
206286

207287
bool queryDataLayout(DataLayoutQueryType type, void *inBuffer,
208288
void *outBuffer) override {
@@ -221,25 +301,41 @@ class ObjectMemoryReader : public MemoryReader {
221301

222302
return false;
223303
}
304+
305+
RemoteAddress getImageStartAddress(unsigned i) const {
306+
assert(i < Images.size());
307+
308+
return RemoteAddress(Images[i].getStartAddress() | ((uint64_t)i << 48));
309+
}
224310

311+
// TODO: We could consult the dynamic symbol tables of the images to
312+
// implement this.
225313
RemoteAddress getSymbolAddress(const std::string &name) override {
226314
return RemoteAddress(nullptr);
227315
}
228316

229317
ReadBytesResult readBytes(RemoteAddress Addr, uint64_t Size) override {
230-
auto I = std::find_if(Images.begin(), Images.end(), [=](const Image &I) {
231-
return I.isAddressValid(Addr, Size);
232-
});
233-
return I == Images.end() ? ReadBytesResult(nullptr, [](const void *) {})
234-
: I->readBytes(Addr, Size);
318+
auto addrValue = Addr.getAddressData();
319+
auto resultBuffer = getContentsAtAddress(addrValue, Size);
320+
return ReadBytesResult(resultBuffer.data(), no_op_destructor);
235321
}
236322

237323
bool readString(RemoteAddress Addr, std::string &Dest) override {
238-
ReadBytesResult R = readBytes(Addr, 1);
239-
if (!R)
324+
auto addrValue = Addr.getAddressData();
325+
auto resultBuffer = getContentsAtAddress(addrValue, 1);
326+
if (resultBuffer.empty())
240327
return false;
241-
StringRef Str((const char *)R.get());
242-
Dest.append(Str.begin(), Str.end());
328+
329+
// Make sure there's a null terminator somewhere in the contents.
330+
unsigned i = 0;
331+
for (unsigned e = resultBuffer.size(); i < e; ++i) {
332+
if (resultBuffer[i] == 0)
333+
goto found_terminator;
334+
}
335+
return false;
336+
337+
found_terminator:
338+
Dest.append(resultBuffer.begin(), resultBuffer.begin() + i);
243339
return true;
244340
}
245341
};
@@ -275,8 +371,9 @@ static int doDumpReflectionSections(ArrayRef<std::string> BinaryFilenames,
275371

276372
auto Reader = std::make_shared<ObjectMemoryReader>(ObjectFiles);
277373
NativeReflectionContext Context(Reader);
278-
for (const Image &I : Reader->getImages())
279-
Context.addImage(I.getStartAddress());
374+
for (unsigned i = 0, e = Reader->getImages().size(); i < e; ++i) {
375+
Context.addImage(Reader->getImageStartAddress(i));
376+
}
280377

281378
switch (Action) {
282379
case ActionType::DumpReflectionSections:

0 commit comments

Comments
 (0)