Skip to content

Commit 65a9e7d

Browse files
committed
Implement heuristic to prioritize in field descriptor search
Currently when looking for field descriptors we parse the reflection metadata in whatever order it was registered. This patch implements a heuristic where we try to match a new optional Name field with the module name of the type's field descriptor we're looking for. rdar://87889973
1 parent 43032e8 commit 65a9e7d

File tree

4 files changed

+165
-87
lines changed

4 files changed

+165
-87
lines changed

include/swift/Reflection/ReflectionContext.h

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,10 @@ class ReflectionContext
227227
return sizeof(StoredPointer) * 2;
228228
}
229229

230-
template <typename T> bool readMachOSections(RemoteAddress ImageStart) {
230+
template <typename T>
231+
bool readMachOSections(
232+
RemoteAddress ImageStart,
233+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
231234
auto Buf =
232235
this->getReader().readBytes(ImageStart, sizeof(typename T::Header));
233236
if (!Buf)
@@ -337,15 +340,15 @@ class ReflectionContext
337340
MPEnumMdSec.first == nullptr)
338341
return false;
339342

340-
ReflectionInfo info = {
341-
{FieldMdSec.first, FieldMdSec.second},
342-
{AssocTySec.first, AssocTySec.second},
343-
{BuiltinTySec.first, BuiltinTySec.second},
344-
{CaptureSec.first, CaptureSec.second},
345-
{TypeRefMdSec.first, TypeRefMdSec.second},
346-
{ReflStrMdSec.first, ReflStrMdSec.second},
347-
{ConformMdSec.first, ConformMdSec.second},
348-
{MPEnumMdSec.first, MPEnumMdSec.second}};
343+
ReflectionInfo info = {{FieldMdSec.first, FieldMdSec.second},
344+
{AssocTySec.first, AssocTySec.second},
345+
{BuiltinTySec.first, BuiltinTySec.second},
346+
{CaptureSec.first, CaptureSec.second},
347+
{TypeRefMdSec.first, TypeRefMdSec.second},
348+
{ReflStrMdSec.first, ReflStrMdSec.second},
349+
{ConformMdSec.first, ConformMdSec.second},
350+
{MPEnumMdSec.first, MPEnumMdSec.second},
351+
PotentialModuleNames};
349352

350353
this->addReflectionInfo(info);
351354

@@ -374,7 +377,9 @@ class ReflectionContext
374377
return true;
375378
}
376379

377-
bool readPECOFFSections(RemoteAddress ImageStart) {
380+
bool readPECOFFSections(
381+
RemoteAddress ImageStart,
382+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
378383
auto DOSHdrBuf = this->getReader().readBytes(
379384
ImageStart, sizeof(llvm::object::dos_header));
380385
if (!DOSHdrBuf)
@@ -463,20 +468,21 @@ class ReflectionContext
463468
MPEnumMdSec.first == nullptr)
464469
return false;
465470

466-
ReflectionInfo Info = {
467-
{FieldMdSec.first, FieldMdSec.second},
468-
{AssocTySec.first, AssocTySec.second},
469-
{BuiltinTySec.first, BuiltinTySec.second},
470-
{CaptureSec.first, CaptureSec.second},
471-
{TypeRefMdSec.first, TypeRefMdSec.second},
472-
{ReflStrMdSec.first, ReflStrMdSec.second},
473-
{ConformMdSec.first, ConformMdSec.second},
474-
{MPEnumMdSec.first, MPEnumMdSec.second}};
471+
ReflectionInfo Info = {{FieldMdSec.first, FieldMdSec.second},
472+
{AssocTySec.first, AssocTySec.second},
473+
{BuiltinTySec.first, BuiltinTySec.second},
474+
{CaptureSec.first, CaptureSec.second},
475+
{TypeRefMdSec.first, TypeRefMdSec.second},
476+
{ReflStrMdSec.first, ReflStrMdSec.second},
477+
{ConformMdSec.first, ConformMdSec.second},
478+
{MPEnumMdSec.first, MPEnumMdSec.second},
479+
PotentialModuleNames};
475480
this->addReflectionInfo(Info);
476481
return true;
477482
}
478483

479-
bool readPECOFF(RemoteAddress ImageStart) {
484+
bool readPECOFF(RemoteAddress ImageStart,
485+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
480486
auto Buf = this->getReader().readBytes(ImageStart,
481487
sizeof(llvm::object::dos_header));
482488
if (!Buf)
@@ -495,12 +501,14 @@ class ReflectionContext
495501
if (memcmp(Buf.get(), llvm::COFF::PEMagic, sizeof(llvm::COFF::PEMagic)))
496502
return false;
497503

498-
return readPECOFFSections(ImageStart);
504+
return readPECOFFSections(ImageStart, PotentialModuleNames);
499505
}
500506

501507
template <typename T>
502-
bool readELFSections(RemoteAddress ImageStart,
503-
llvm::Optional<llvm::sys::MemoryBlock> FileBuffer) {
508+
bool readELFSections(
509+
RemoteAddress ImageStart,
510+
llvm::Optional<llvm::sys::MemoryBlock> FileBuffer,
511+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
504512
// When reading from the FileBuffer we can simply return a pointer to
505513
// the underlying data.
506514
// When reading from the process, we need to keep the memory around
@@ -655,15 +663,15 @@ class ReflectionContext
655663
MPEnumMdSec.first == nullptr)
656664
return false;
657665

658-
ReflectionInfo info = {
659-
{FieldMdSec.first, FieldMdSec.second},
660-
{AssocTySec.first, AssocTySec.second},
661-
{BuiltinTySec.first, BuiltinTySec.second},
662-
{CaptureSec.first, CaptureSec.second},
663-
{TypeRefMdSec.first, TypeRefMdSec.second},
664-
{ReflStrMdSec.first, ReflStrMdSec.second},
665-
{ConformMdSec.first, ConformMdSec.second},
666-
{MPEnumMdSec.first, MPEnumMdSec.second}};
666+
ReflectionInfo info = {{FieldMdSec.first, FieldMdSec.second},
667+
{AssocTySec.first, AssocTySec.second},
668+
{BuiltinTySec.first, BuiltinTySec.second},
669+
{CaptureSec.first, CaptureSec.second},
670+
{TypeRefMdSec.first, TypeRefMdSec.second},
671+
{ReflStrMdSec.first, ReflStrMdSec.second},
672+
{ConformMdSec.first, ConformMdSec.second},
673+
{MPEnumMdSec.first, MPEnumMdSec.second},
674+
PotentialModuleNames};
667675

668676
this->addReflectionInfo(info);
669677
return true;
@@ -687,7 +695,10 @@ class ReflectionContext
687695
/// \return
688696
/// /b True if the metadata information was parsed successfully,
689697
/// /b false otherwise.
690-
bool readELF(RemoteAddress ImageStart, llvm::Optional<llvm::sys::MemoryBlock> FileBuffer) {
698+
bool
699+
readELF(RemoteAddress ImageStart,
700+
llvm::Optional<llvm::sys::MemoryBlock> FileBuffer,
701+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
691702
auto Buf =
692703
this->getReader().readBytes(ImageStart, sizeof(llvm::ELF::Elf64_Ehdr));
693704
if (!Buf)
@@ -703,16 +714,18 @@ class ReflectionContext
703714
unsigned char FileClass = Hdr->getFileClass();
704715
if (FileClass == llvm::ELF::ELFCLASS64) {
705716
return readELFSections<ELFTraits<llvm::ELF::ELFCLASS64>>(
706-
ImageStart, FileBuffer);
717+
ImageStart, FileBuffer, PotentialModuleNames);
707718
} else if (FileClass == llvm::ELF::ELFCLASS32) {
708719
return readELFSections<ELFTraits<llvm::ELF::ELFCLASS32>>(
709-
ImageStart, FileBuffer);
720+
ImageStart, FileBuffer, PotentialModuleNames);
710721
} else {
711722
return false;
712723
}
713724
}
714725

715-
bool addImage(RemoteAddress ImageStart) {
726+
bool
727+
addImage(RemoteAddress ImageStart,
728+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
716729
// Read the first few bytes to look for a magic header.
717730
auto Magic = this->getReader().readBytes(ImageStart, sizeof(uint32_t));
718731
if (!Magic)
@@ -723,18 +736,18 @@ class ReflectionContext
723736

724737
// 32- and 64-bit Mach-O.
725738
if (MagicWord == llvm::MachO::MH_MAGIC) {
726-
return readMachOSections<MachOTraits<4>>(ImageStart);
739+
return readMachOSections<MachOTraits<4>>(ImageStart, PotentialModuleNames);
727740
}
728741

729742
if (MagicWord == llvm::MachO::MH_MAGIC_64) {
730-
return readMachOSections<MachOTraits<8>>(ImageStart);
743+
return readMachOSections<MachOTraits<8>>(ImageStart, PotentialModuleNames);
731744
}
732745

733746
// PE. (This just checks for the DOS header; `readPECOFF` will further
734747
// validate the existence of the PE header.)
735748
auto MagicBytes = (const char*)Magic.get();
736749
if (MagicBytes[0] == 'M' && MagicBytes[1] == 'Z') {
737-
return readPECOFF(ImageStart);
750+
return readPECOFF(ImageStart, PotentialModuleNames);
738751
}
739752

740753

@@ -743,7 +756,8 @@ class ReflectionContext
743756
&& MagicBytes[1] == llvm::ELF::ElfMagic[1]
744757
&& MagicBytes[2] == llvm::ELF::ElfMagic[2]
745758
&& MagicBytes[3] == llvm::ELF::ElfMagic[3]) {
746-
return readELF(ImageStart, llvm::Optional<llvm::sys::MemoryBlock>());
759+
return readELF(ImageStart, llvm::Optional<llvm::sys::MemoryBlock>(),
760+
PotentialModuleNames);
747761
}
748762

749763
// We don't recognize the format.
@@ -758,9 +772,11 @@ class ReflectionContext
758772
/// \return
759773
/// \b True if any of the reflection sections were registered,
760774
/// \b false otherwise.
761-
bool addImage(llvm::function_ref<
762-
std::pair<RemoteRef<void>, uint64_t>(ReflectionSectionKind)>
763-
FindSection) {
775+
bool
776+
addImage(llvm::function_ref<
777+
std::pair<RemoteRef<void>, uint64_t>(ReflectionSectionKind)>
778+
FindSection,
779+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
764780
auto Sections = {
765781
ReflectionSectionKind::fieldmd, ReflectionSectionKind::assocty,
766782
ReflectionSectionKind::builtin, ReflectionSectionKind::capture,
@@ -784,11 +800,15 @@ class ReflectionContext
784800
if (llvm::all_of(Pairs, [](const auto &Pair) { return !Pair.first; }))
785801
return false;
786802

787-
ReflectionInfo Info = {
788-
{Pairs[0].first, Pairs[0].second}, {Pairs[1].first, Pairs[1].second},
789-
{Pairs[2].first, Pairs[2].second}, {Pairs[3].first, Pairs[3].second},
790-
{Pairs[4].first, Pairs[4].second}, {Pairs[5].first, Pairs[5].second},
791-
{Pairs[6].first, Pairs[6].second}, {Pairs[7].first, Pairs[7].second}};
803+
ReflectionInfo Info = {{Pairs[0].first, Pairs[0].second},
804+
{Pairs[1].first, Pairs[1].second},
805+
{Pairs[2].first, Pairs[2].second},
806+
{Pairs[3].first, Pairs[3].second},
807+
{Pairs[4].first, Pairs[4].second},
808+
{Pairs[5].first, Pairs[5].second},
809+
{Pairs[6].first, Pairs[6].second},
810+
{Pairs[7].first, Pairs[7].second},
811+
PotentialModuleNames};
792812
this->addReflectionInfo(Info);
793813
return true;
794814
}

include/swift/Reflection/TypeRefBuilder.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "swift/Reflection/TypeLowering.h"
2525
#include "swift/Reflection/TypeRef.h"
2626
#include "llvm/ADT/Optional.h"
27+
#include "llvm/ADT/SmallVector.h"
2728
#include <iomanip>
2829
#include <iostream>
2930
#include <ostream>
@@ -264,6 +265,7 @@ struct ReflectionInfo {
264265
GenericSection ReflectionString;
265266
GenericSection Conformance;
266267
MultiPayloadEnumSection MultiPayloadEnum;
268+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames;
267269
};
268270

269271
struct ClosureContextInfo {
@@ -926,13 +928,15 @@ class TypeRefBuilder {
926928
private:
927929
std::vector<ReflectionInfo> ReflectionInfos;
928930

929-
/// Index of the next Reflection Info that should be processed.
930-
/// This assumes that Reflection Infos are never removed from the vector.
931-
size_t FirstUnprocessedReflectionInfoIndex = 0;
932-
931+
/// Indexes of Reflection Infos we've already processed.
932+
llvm::DenseSet<size_t> ProcessedReflectionInfoIndexes;
933+
933934
llvm::Optional<std::string> normalizeReflectionName(RemoteRef<char> name);
934935
bool reflectionNameMatches(RemoteRef<char> reflectionName,
935936
StringRef searchName);
937+
void populateFieldTypeInfoCacheWithReflectionAtIndex(size_t Index);
938+
llvm::Optional<RemoteRef<FieldDescriptor>>
939+
findFieldDescriptorAtIndex(size_t Index, const std::string &MangledName);
936940

937941
public:
938942
RemoteRef<char> readTypeRef(uint64_t remoteAddr);
@@ -1871,7 +1875,6 @@ class TypeRefBuilder {
18711875
mangledTypeName};
18721876
}
18731877
};
1874-
18751878
public:
18761879
template <template <typename Runtime> class ObjCInteropKind,
18771880
unsigned PointerSize>

stdlib/public/Reflection/TypeRefBuilder.cpp

Lines changed: 75 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -193,40 +193,93 @@ const TypeRef *TypeRefBuilder::lookupSuperclass(const TypeRef *TR) {
193193
return Unsubstituted->subst(*this, *SubstMap);
194194
}
195195

196-
RemoteRef<FieldDescriptor>
197-
TypeRefBuilder::getFieldTypeInfo(const TypeRef *TR) {
196+
static llvm::Optional<StringRef> FindOutermostModuleName(NodePointer Node) {
197+
if (!Node)
198+
return {};
199+
// Breadth first search until we find the module name so we find the outermost
200+
// one.
201+
llvm::SmallVector<NodePointer, 8> Queue;
202+
Queue.push_back(Node);
203+
// Instead of removing items from the front of the queue we just iterate over
204+
// them.
205+
for (size_t i = 0; i < Queue.size(); ++i) {
206+
NodePointer Current = Queue[i];
207+
if (Current->getKind() == Node::Kind::Module) {
208+
if (Current->hasText())
209+
return Current->getText();
210+
else
211+
return {};
212+
}
213+
for (auto Child : *Current)
214+
Queue.push_back(Child);
215+
}
216+
return {};
217+
}
218+
219+
void TypeRefBuilder::populateFieldTypeInfoCacheWithReflectionAtIndex(
220+
size_t Index) {
221+
if (ProcessedReflectionInfoIndexes.contains(Index))
222+
return;
223+
224+
const auto &Info = ReflectionInfos[Index];
225+
for (auto FD : Info.Field) {
226+
if (!FD->hasMangledTypeName())
227+
continue;
228+
auto CandidateMangledName = readTypeRef(FD, FD->MangledTypeName);
229+
if (auto NormalizedName = normalizeReflectionName(CandidateMangledName)) {
230+
FieldTypeInfoCache[std::move(*NormalizedName)] = FD;
231+
}
232+
}
233+
234+
ProcessedReflectionInfoIndexes.insert(Index);
235+
}
236+
237+
llvm::Optional<RemoteRef<FieldDescriptor>>
238+
TypeRefBuilder::findFieldDescriptorAtIndex(size_t Index,
239+
const std::string &MangledName) {
240+
populateFieldTypeInfoCacheWithReflectionAtIndex(Index);
241+
auto Found = FieldTypeInfoCache.find(MangledName);
242+
if (Found != FieldTypeInfoCache.end()) {
243+
return Found->second;
244+
}
245+
return llvm::None;
246+
}
247+
248+
RemoteRef<FieldDescriptor> TypeRefBuilder::getFieldTypeInfo(const TypeRef *TR) {
198249
const std::string *MangledName;
199-
if (auto N = dyn_cast<NominalTypeRef>(TR))
250+
NodePointer Node;
251+
Demangler Dem;
252+
if (auto N = dyn_cast<NominalTypeRef>(TR)) {
253+
Node = N->getDemangling(Dem);
200254
MangledName = &N->getMangledName();
201-
else if (auto BG = dyn_cast<BoundGenericTypeRef>(TR))
255+
} else if (auto BG = dyn_cast<BoundGenericTypeRef>(TR)) {
256+
Node = BG->getDemangling(Dem);
202257
MangledName = &BG->getMangledName();
203-
else
258+
} else
204259
return nullptr;
205260

206261
// Try the cache.
207262
auto Found = FieldTypeInfoCache.find(*MangledName);
208263
if (Found != FieldTypeInfoCache.end())
209264
return Found->second;
210265

266+
// Heuristic: find the outermost Module node available, and try to parse the
267+
// ReflectionInfos with a matching name first.
268+
auto ModuleName = FindOutermostModuleName(Node);
269+
// If we couldn't find a module name or the type is imported (__C module) we
270+
// don't any useful information on which image to look for the type.
271+
if (ModuleName && ModuleName != llvm::StringRef("__C"))
272+
for (size_t i = 0; i < ReflectionInfos.size(); ++i)
273+
if (llvm::is_contained(ReflectionInfos[i].PotentialModuleNames,
274+
ModuleName))
275+
if (auto FD = findFieldDescriptorAtIndex(i, *MangledName))
276+
return *FD;
277+
211278
// On failure, fill out the cache, ReflectionInfo by ReflectionInfo,
212279
// until we find the field descriptor we're looking for.
213-
while (FirstUnprocessedReflectionInfoIndex < ReflectionInfos.size()) {
214-
auto &Info = ReflectionInfos[FirstUnprocessedReflectionInfoIndex];
215-
for (auto FD : Info.Field) {
216-
if (!FD->hasMangledTypeName())
217-
continue;
218-
auto CandidateMangledName = readTypeRef(FD, FD->MangledTypeName);
219-
if (auto NormalizedName = normalizeReflectionName(CandidateMangledName))
220-
FieldTypeInfoCache[std::move(*NormalizedName)] = FD;
221-
}
222-
223-
// Since we're done with the current ReflectionInfo, increment early in
224-
// case we get a cache hit.
225-
++FirstUnprocessedReflectionInfoIndex;
226-
Found = FieldTypeInfoCache.find(*MangledName);
227-
if (Found != FieldTypeInfoCache.end())
228-
return Found->second;
229-
}
280+
for (size_t i = 0; i < ReflectionInfos.size(); ++i)
281+
if (auto FD = findFieldDescriptorAtIndex(i, *MangledName))
282+
return *FD;
230283

231284
return nullptr;
232285
}

0 commit comments

Comments
 (0)