Skip to content

Commit 0277971

Browse files
authored
Merge pull request #59802 from augusto2112/heuristic-get-field-descriptor
Implement heuristic to prioritize reflection info in field descriptor search
2 parents 1123a19 + 65a9e7d commit 0277971

File tree

4 files changed

+165
-87
lines changed

4 files changed

+165
-87
lines changed

include/swift/Reflection/ReflectionContext.h

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,10 @@ class ReflectionContext
227227
return sizeof(StoredPointer) * 2;
228228
}
229229

230-
template <typename T> bool readMachOSections(RemoteAddress ImageStart) {
230+
template <typename T>
231+
bool readMachOSections(
232+
RemoteAddress ImageStart,
233+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
231234
auto Buf =
232235
this->getReader().readBytes(ImageStart, sizeof(typename T::Header));
233236
if (!Buf)
@@ -337,15 +340,15 @@ class ReflectionContext
337340
MPEnumMdSec.first == nullptr)
338341
return false;
339342

340-
ReflectionInfo info = {
341-
{FieldMdSec.first, FieldMdSec.second},
342-
{AssocTySec.first, AssocTySec.second},
343-
{BuiltinTySec.first, BuiltinTySec.second},
344-
{CaptureSec.first, CaptureSec.second},
345-
{TypeRefMdSec.first, TypeRefMdSec.second},
346-
{ReflStrMdSec.first, ReflStrMdSec.second},
347-
{ConformMdSec.first, ConformMdSec.second},
348-
{MPEnumMdSec.first, MPEnumMdSec.second}};
343+
ReflectionInfo info = {{FieldMdSec.first, FieldMdSec.second},
344+
{AssocTySec.first, AssocTySec.second},
345+
{BuiltinTySec.first, BuiltinTySec.second},
346+
{CaptureSec.first, CaptureSec.second},
347+
{TypeRefMdSec.first, TypeRefMdSec.second},
348+
{ReflStrMdSec.first, ReflStrMdSec.second},
349+
{ConformMdSec.first, ConformMdSec.second},
350+
{MPEnumMdSec.first, MPEnumMdSec.second},
351+
PotentialModuleNames};
349352

350353
this->addReflectionInfo(info);
351354

@@ -374,7 +377,9 @@ class ReflectionContext
374377
return true;
375378
}
376379

377-
bool readPECOFFSections(RemoteAddress ImageStart) {
380+
bool readPECOFFSections(
381+
RemoteAddress ImageStart,
382+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
378383
auto DOSHdrBuf = this->getReader().readBytes(
379384
ImageStart, sizeof(llvm::object::dos_header));
380385
if (!DOSHdrBuf)
@@ -463,20 +468,21 @@ class ReflectionContext
463468
MPEnumMdSec.first == nullptr)
464469
return false;
465470

466-
ReflectionInfo Info = {
467-
{FieldMdSec.first, FieldMdSec.second},
468-
{AssocTySec.first, AssocTySec.second},
469-
{BuiltinTySec.first, BuiltinTySec.second},
470-
{CaptureSec.first, CaptureSec.second},
471-
{TypeRefMdSec.first, TypeRefMdSec.second},
472-
{ReflStrMdSec.first, ReflStrMdSec.second},
473-
{ConformMdSec.first, ConformMdSec.second},
474-
{MPEnumMdSec.first, MPEnumMdSec.second}};
471+
ReflectionInfo Info = {{FieldMdSec.first, FieldMdSec.second},
472+
{AssocTySec.first, AssocTySec.second},
473+
{BuiltinTySec.first, BuiltinTySec.second},
474+
{CaptureSec.first, CaptureSec.second},
475+
{TypeRefMdSec.first, TypeRefMdSec.second},
476+
{ReflStrMdSec.first, ReflStrMdSec.second},
477+
{ConformMdSec.first, ConformMdSec.second},
478+
{MPEnumMdSec.first, MPEnumMdSec.second},
479+
PotentialModuleNames};
475480
this->addReflectionInfo(Info);
476481
return true;
477482
}
478483

479-
bool readPECOFF(RemoteAddress ImageStart) {
484+
bool readPECOFF(RemoteAddress ImageStart,
485+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
480486
auto Buf = this->getReader().readBytes(ImageStart,
481487
sizeof(llvm::object::dos_header));
482488
if (!Buf)
@@ -495,12 +501,14 @@ class ReflectionContext
495501
if (memcmp(Buf.get(), llvm::COFF::PEMagic, sizeof(llvm::COFF::PEMagic)))
496502
return false;
497503

498-
return readPECOFFSections(ImageStart);
504+
return readPECOFFSections(ImageStart, PotentialModuleNames);
499505
}
500506

501507
template <typename T>
502-
bool readELFSections(RemoteAddress ImageStart,
503-
llvm::Optional<llvm::sys::MemoryBlock> FileBuffer) {
508+
bool readELFSections(
509+
RemoteAddress ImageStart,
510+
llvm::Optional<llvm::sys::MemoryBlock> FileBuffer,
511+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
504512
// When reading from the FileBuffer we can simply return a pointer to
505513
// the underlying data.
506514
// When reading from the process, we need to keep the memory around
@@ -655,15 +663,15 @@ class ReflectionContext
655663
MPEnumMdSec.first == nullptr)
656664
return false;
657665

658-
ReflectionInfo info = {
659-
{FieldMdSec.first, FieldMdSec.second},
660-
{AssocTySec.first, AssocTySec.second},
661-
{BuiltinTySec.first, BuiltinTySec.second},
662-
{CaptureSec.first, CaptureSec.second},
663-
{TypeRefMdSec.first, TypeRefMdSec.second},
664-
{ReflStrMdSec.first, ReflStrMdSec.second},
665-
{ConformMdSec.first, ConformMdSec.second},
666-
{MPEnumMdSec.first, MPEnumMdSec.second}};
666+
ReflectionInfo info = {{FieldMdSec.first, FieldMdSec.second},
667+
{AssocTySec.first, AssocTySec.second},
668+
{BuiltinTySec.first, BuiltinTySec.second},
669+
{CaptureSec.first, CaptureSec.second},
670+
{TypeRefMdSec.first, TypeRefMdSec.second},
671+
{ReflStrMdSec.first, ReflStrMdSec.second},
672+
{ConformMdSec.first, ConformMdSec.second},
673+
{MPEnumMdSec.first, MPEnumMdSec.second},
674+
PotentialModuleNames};
667675

668676
this->addReflectionInfo(info);
669677
return true;
@@ -687,7 +695,10 @@ class ReflectionContext
687695
/// \return
688696
/// /b True if the metadata information was parsed successfully,
689697
/// /b false otherwise.
690-
bool readELF(RemoteAddress ImageStart, llvm::Optional<llvm::sys::MemoryBlock> FileBuffer) {
698+
bool
699+
readELF(RemoteAddress ImageStart,
700+
llvm::Optional<llvm::sys::MemoryBlock> FileBuffer,
701+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
691702
auto Buf =
692703
this->getReader().readBytes(ImageStart, sizeof(llvm::ELF::Elf64_Ehdr));
693704
if (!Buf)
@@ -703,16 +714,18 @@ class ReflectionContext
703714
unsigned char FileClass = Hdr->getFileClass();
704715
if (FileClass == llvm::ELF::ELFCLASS64) {
705716
return readELFSections<ELFTraits<llvm::ELF::ELFCLASS64>>(
706-
ImageStart, FileBuffer);
717+
ImageStart, FileBuffer, PotentialModuleNames);
707718
} else if (FileClass == llvm::ELF::ELFCLASS32) {
708719
return readELFSections<ELFTraits<llvm::ELF::ELFCLASS32>>(
709-
ImageStart, FileBuffer);
720+
ImageStart, FileBuffer, PotentialModuleNames);
710721
} else {
711722
return false;
712723
}
713724
}
714725

715-
bool addImage(RemoteAddress ImageStart) {
726+
bool
727+
addImage(RemoteAddress ImageStart,
728+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
716729
// Read the first few bytes to look for a magic header.
717730
auto Magic = this->getReader().readBytes(ImageStart, sizeof(uint32_t));
718731
if (!Magic)
@@ -723,18 +736,18 @@ class ReflectionContext
723736

724737
// 32- and 64-bit Mach-O.
725738
if (MagicWord == llvm::MachO::MH_MAGIC) {
726-
return readMachOSections<MachOTraits<4>>(ImageStart);
739+
return readMachOSections<MachOTraits<4>>(ImageStart, PotentialModuleNames);
727740
}
728741

729742
if (MagicWord == llvm::MachO::MH_MAGIC_64) {
730-
return readMachOSections<MachOTraits<8>>(ImageStart);
743+
return readMachOSections<MachOTraits<8>>(ImageStart, PotentialModuleNames);
731744
}
732745

733746
// PE. (This just checks for the DOS header; `readPECOFF` will further
734747
// validate the existence of the PE header.)
735748
auto MagicBytes = (const char*)Magic.get();
736749
if (MagicBytes[0] == 'M' && MagicBytes[1] == 'Z') {
737-
return readPECOFF(ImageStart);
750+
return readPECOFF(ImageStart, PotentialModuleNames);
738751
}
739752

740753

@@ -743,7 +756,8 @@ class ReflectionContext
743756
&& MagicBytes[1] == llvm::ELF::ElfMagic[1]
744757
&& MagicBytes[2] == llvm::ELF::ElfMagic[2]
745758
&& MagicBytes[3] == llvm::ELF::ElfMagic[3]) {
746-
return readELF(ImageStart, llvm::Optional<llvm::sys::MemoryBlock>());
759+
return readELF(ImageStart, llvm::Optional<llvm::sys::MemoryBlock>(),
760+
PotentialModuleNames);
747761
}
748762

749763
// We don't recognize the format.
@@ -758,9 +772,11 @@ class ReflectionContext
758772
/// \return
759773
/// \b True if any of the reflection sections were registered,
760774
/// \b false otherwise.
761-
bool addImage(llvm::function_ref<
762-
std::pair<RemoteRef<void>, uint64_t>(ReflectionSectionKind)>
763-
FindSection) {
775+
bool
776+
addImage(llvm::function_ref<
777+
std::pair<RemoteRef<void>, uint64_t>(ReflectionSectionKind)>
778+
FindSection,
779+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames = {}) {
764780
auto Sections = {
765781
ReflectionSectionKind::fieldmd, ReflectionSectionKind::assocty,
766782
ReflectionSectionKind::builtin, ReflectionSectionKind::capture,
@@ -784,11 +800,15 @@ class ReflectionContext
784800
if (llvm::all_of(Pairs, [](const auto &Pair) { return !Pair.first; }))
785801
return false;
786802

787-
ReflectionInfo Info = {
788-
{Pairs[0].first, Pairs[0].second}, {Pairs[1].first, Pairs[1].second},
789-
{Pairs[2].first, Pairs[2].second}, {Pairs[3].first, Pairs[3].second},
790-
{Pairs[4].first, Pairs[4].second}, {Pairs[5].first, Pairs[5].second},
791-
{Pairs[6].first, Pairs[6].second}, {Pairs[7].first, Pairs[7].second}};
803+
ReflectionInfo Info = {{Pairs[0].first, Pairs[0].second},
804+
{Pairs[1].first, Pairs[1].second},
805+
{Pairs[2].first, Pairs[2].second},
806+
{Pairs[3].first, Pairs[3].second},
807+
{Pairs[4].first, Pairs[4].second},
808+
{Pairs[5].first, Pairs[5].second},
809+
{Pairs[6].first, Pairs[6].second},
810+
{Pairs[7].first, Pairs[7].second},
811+
PotentialModuleNames};
792812
this->addReflectionInfo(Info);
793813
return true;
794814
}

include/swift/Reflection/TypeRefBuilder.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "swift/Reflection/TypeLowering.h"
2525
#include "swift/Reflection/TypeRef.h"
2626
#include "llvm/ADT/Optional.h"
27+
#include "llvm/ADT/SmallVector.h"
2728
#include <iomanip>
2829
#include <iostream>
2930
#include <ostream>
@@ -264,6 +265,7 @@ struct ReflectionInfo {
264265
GenericSection ReflectionString;
265266
GenericSection Conformance;
266267
MultiPayloadEnumSection MultiPayloadEnum;
268+
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames;
267269
};
268270

269271
struct ClosureContextInfo {
@@ -926,13 +928,15 @@ class TypeRefBuilder {
926928
private:
927929
std::vector<ReflectionInfo> ReflectionInfos;
928930

929-
/// Index of the next Reflection Info that should be processed.
930-
/// This assumes that Reflection Infos are never removed from the vector.
931-
size_t FirstUnprocessedReflectionInfoIndex = 0;
932-
931+
/// Indexes of Reflection Infos we've already processed.
932+
llvm::DenseSet<size_t> ProcessedReflectionInfoIndexes;
933+
933934
llvm::Optional<std::string> normalizeReflectionName(RemoteRef<char> name);
934935
bool reflectionNameMatches(RemoteRef<char> reflectionName,
935936
StringRef searchName);
937+
void populateFieldTypeInfoCacheWithReflectionAtIndex(size_t Index);
938+
llvm::Optional<RemoteRef<FieldDescriptor>>
939+
findFieldDescriptorAtIndex(size_t Index, const std::string &MangledName);
936940

937941
public:
938942
RemoteRef<char> readTypeRef(uint64_t remoteAddr);
@@ -1871,7 +1875,6 @@ class TypeRefBuilder {
18711875
mangledTypeName};
18721876
}
18731877
};
1874-
18751878
public:
18761879
template <template <typename Runtime> class ObjCInteropKind,
18771880
unsigned PointerSize>

stdlib/public/Reflection/TypeRefBuilder.cpp

Lines changed: 75 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -193,40 +193,93 @@ const TypeRef *TypeRefBuilder::lookupSuperclass(const TypeRef *TR) {
193193
return Unsubstituted->subst(*this, *SubstMap);
194194
}
195195

196-
RemoteRef<FieldDescriptor>
197-
TypeRefBuilder::getFieldTypeInfo(const TypeRef *TR) {
196+
static llvm::Optional<StringRef> FindOutermostModuleName(NodePointer Node) {
197+
if (!Node)
198+
return {};
199+
// Breadth first search until we find the module name so we find the outermost
200+
// one.
201+
llvm::SmallVector<NodePointer, 8> Queue;
202+
Queue.push_back(Node);
203+
// Instead of removing items from the front of the queue we just iterate over
204+
// them.
205+
for (size_t i = 0; i < Queue.size(); ++i) {
206+
NodePointer Current = Queue[i];
207+
if (Current->getKind() == Node::Kind::Module) {
208+
if (Current->hasText())
209+
return Current->getText();
210+
else
211+
return {};
212+
}
213+
for (auto Child : *Current)
214+
Queue.push_back(Child);
215+
}
216+
return {};
217+
}
218+
219+
void TypeRefBuilder::populateFieldTypeInfoCacheWithReflectionAtIndex(
220+
size_t Index) {
221+
if (ProcessedReflectionInfoIndexes.contains(Index))
222+
return;
223+
224+
const auto &Info = ReflectionInfos[Index];
225+
for (auto FD : Info.Field) {
226+
if (!FD->hasMangledTypeName())
227+
continue;
228+
auto CandidateMangledName = readTypeRef(FD, FD->MangledTypeName);
229+
if (auto NormalizedName = normalizeReflectionName(CandidateMangledName)) {
230+
FieldTypeInfoCache[std::move(*NormalizedName)] = FD;
231+
}
232+
}
233+
234+
ProcessedReflectionInfoIndexes.insert(Index);
235+
}
236+
237+
llvm::Optional<RemoteRef<FieldDescriptor>>
238+
TypeRefBuilder::findFieldDescriptorAtIndex(size_t Index,
239+
const std::string &MangledName) {
240+
populateFieldTypeInfoCacheWithReflectionAtIndex(Index);
241+
auto Found = FieldTypeInfoCache.find(MangledName);
242+
if (Found != FieldTypeInfoCache.end()) {
243+
return Found->second;
244+
}
245+
return llvm::None;
246+
}
247+
248+
RemoteRef<FieldDescriptor> TypeRefBuilder::getFieldTypeInfo(const TypeRef *TR) {
198249
const std::string *MangledName;
199-
if (auto N = dyn_cast<NominalTypeRef>(TR))
250+
NodePointer Node;
251+
Demangler Dem;
252+
if (auto N = dyn_cast<NominalTypeRef>(TR)) {
253+
Node = N->getDemangling(Dem);
200254
MangledName = &N->getMangledName();
201-
else if (auto BG = dyn_cast<BoundGenericTypeRef>(TR))
255+
} else if (auto BG = dyn_cast<BoundGenericTypeRef>(TR)) {
256+
Node = BG->getDemangling(Dem);
202257
MangledName = &BG->getMangledName();
203-
else
258+
} else
204259
return nullptr;
205260

206261
// Try the cache.
207262
auto Found = FieldTypeInfoCache.find(*MangledName);
208263
if (Found != FieldTypeInfoCache.end())
209264
return Found->second;
210265

266+
// Heuristic: find the outermost Module node available, and try to parse the
267+
// ReflectionInfos with a matching name first.
268+
auto ModuleName = FindOutermostModuleName(Node);
269+
// If we couldn't find a module name or the type is imported (__C module) we
270+
// don't any useful information on which image to look for the type.
271+
if (ModuleName && ModuleName != llvm::StringRef("__C"))
272+
for (size_t i = 0; i < ReflectionInfos.size(); ++i)
273+
if (llvm::is_contained(ReflectionInfos[i].PotentialModuleNames,
274+
ModuleName))
275+
if (auto FD = findFieldDescriptorAtIndex(i, *MangledName))
276+
return *FD;
277+
211278
// On failure, fill out the cache, ReflectionInfo by ReflectionInfo,
212279
// until we find the field descriptor we're looking for.
213-
while (FirstUnprocessedReflectionInfoIndex < ReflectionInfos.size()) {
214-
auto &Info = ReflectionInfos[FirstUnprocessedReflectionInfoIndex];
215-
for (auto FD : Info.Field) {
216-
if (!FD->hasMangledTypeName())
217-
continue;
218-
auto CandidateMangledName = readTypeRef(FD, FD->MangledTypeName);
219-
if (auto NormalizedName = normalizeReflectionName(CandidateMangledName))
220-
FieldTypeInfoCache[std::move(*NormalizedName)] = FD;
221-
}
222-
223-
// Since we're done with the current ReflectionInfo, increment early in
224-
// case we get a cache hit.
225-
++FirstUnprocessedReflectionInfoIndex;
226-
Found = FieldTypeInfoCache.find(*MangledName);
227-
if (Found != FieldTypeInfoCache.end())
228-
return Found->second;
229-
}
280+
for (size_t i = 0; i < ReflectionInfos.size(); ++i)
281+
if (auto FD = findFieldDescriptorAtIndex(i, *MangledName))
282+
return *FD;
230283

231284
return nullptr;
232285
}

0 commit comments

Comments
 (0)