[pdb] Correctly parse the hash adjusters table from TPI stream.

Zachary Turner · Zachary Turner · commit 29da5db7a0bb · 2017-01-25T21:17:40.000Z
This is not a list of pairs, it is a hash table data structure. We now correctly parse this out and dump it from llvm-pdbdump. We still need to understand the conditions that lead to a type getting an entry in the hash adjuster table. That will be done in a followup investigation / patch. Differential Revision: https://reviews.llvm.org/D29090 llvm-svn: 293090
diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h b/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
@@ -13,6 +13,7 @@
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/MSF/StreamArray.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/DebugInfo/PDB/Raw/HashTable.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 #include "llvm/Support/raw_ostream.h"
@@ -47,7 +48,7 @@ class TpiStream {
   uint32_t NumHashBuckets() const;
   msf::FixedStreamArray<support::ulittle32_t> getHashValues() const;
   msf::FixedStreamArray<TypeIndexOffset> getTypeIndexOffsets() const;
-  msf::FixedStreamArray<TypeIndexOffset> getHashAdjustments() const;
+  HashTable &getHashAdjusters();
 
   iterator_range<codeview::CVTypeArray::Iterator> types(bool *HadError) const;
 
@@ -64,7 +65,7 @@ class TpiStream {
   std::unique_ptr<msf::ReadableStream> HashStream;
   msf::FixedStreamArray<support::ulittle32_t> HashValues;
   msf::FixedStreamArray<TypeIndexOffset> TypeIndexOffsets;
-  msf::FixedStreamArray<TypeIndexOffset> HashAdjustments;
+  HashTable HashAdjusters;
 
   const TpiStreamHeader *Header;
 };
diff --git a/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp
@@ -113,11 +113,11 @@ Error TpiStream::reload() {
     if (auto EC = HSR.readArray(TypeIndexOffsets, NumTypeIndexOffsets))
       return EC;
 
-    HSR.setOffset(Header->HashAdjBuffer.Off);
-    uint32_t NumHashAdjustments =
-        Header->HashAdjBuffer.Length / sizeof(TypeIndexOffset);
-    if (auto EC = HSR.readArray(HashAdjustments, NumHashAdjustments))
-      return EC;
+    if (Header->HashAdjBuffer.Length > 0) {
+      HSR.setOffset(Header->HashAdjBuffer.Off);
+      if (auto EC = HashAdjusters.load(HSR))
+        return EC;
+    }
 
     HashStream = std::move(HS);
 
@@ -164,10 +164,7 @@ TpiStream::getTypeIndexOffsets() const {
   return TypeIndexOffsets;
 }
 
-FixedStreamArray<TypeIndexOffset>
-TpiStream::getHashAdjustments() const {
-  return HashAdjustments;
-}
+HashTable &TpiStream::getHashAdjusters() { return HashAdjusters; }
 
 iterator_range<CVTypeArray::Iterator>
 TpiStream::types(bool *HadError) const {
diff --git a/llvm/test/DebugInfo/PDB/pdbdump-headers.test b/llvm/test/DebugInfo/PDB/pdbdump-headers.test
@@ -152,9 +152,11 @@
 ; EMPTY-NEXT:       Number of Hash Buckets: 262143
 ; EMPTY-NEXT:       Hash Key Size: 4
 ; EMPTY-NEXT:       Values: [205956, 163561, 59811, 208239, 16377, 247078, 194342, 254156, 194536, 167492, 185421, 119540, 261871, 198119, 48056, 251486, 134580, 148190, 113636, 53336, 55779, 220695, 198114, 148734, 81128, 60158, 217249, 174209, 159978, 249504, 141941, 238785, 6214, 94935, 151449, 135589, 73373, 96512, 254299, 17744, 239514, 173189, 130544, 204437, 238560, 144673, 115151, 197306, 256035, 101096, 231280, 52156, 48854, 170035, 177041, 102745, 16947, 183703, 98548, 35693, 171328, 203640, 139292, 49018, 43821, 202555, 165040, 215835, 142625, 52534, 44186, 103930, 110942, 17991, 213215]
-; EMPTY-NEXT:       Type Index Offsets: [{4096, 0}]
 ; EMPTY-NEXT:       Hash Adjustments: []
 ; EMPTY-NEXT:     }
+; EMPTY-NEXT:     TypeIndexOffsets [
+; EMPTY-NEXT:       Index: 0x1000, Offset: 0
+; EMPTY-NEXT:     ]
 ; EMPTY:        Type Info Stream (IPI) {
 ; EMPTY-NEXT:     IPI Version: 20040203
 ; EMPTY-NEXT:     Record count: 15
@@ -231,13 +233,9 @@
 ; EMPTY-NEXT:           0000: 42100000 01000000 6C000000 0100F2F1  |B.......l.......|
 ; EMPTY-NEXT:         )
 ; EMPTY-NEXT:       }
-; EMPTY:          Hash {
-; EMPTY-NEXT:       Number of Hash Buckets: 262143
-; EMPTY-NEXT:       Hash Key Size: 4
-; EMPTY-NEXT:       Values: [7186, 7198, 7180, 7191, 7201, 7241, 7249, 80727, 154177, 75189, 253662, 193467, 222705, 186099, 257108]
-; EMPTY-NEXT:       Type Index Offsets: [{4096, 0}]
-; EMPTY-NEXT:       Hash Adjustments: []
-; EMPTY-NEXT:     }
+; EMPTY:          TypeIndexOffsets [
+; EMPTY-NEXT:       Index: 0x1000, Offset: 0
+; EMPTY-NEXT:     ]
 ; EMPTY:      DBI Stream {
 ; EMPTY-NEXT:   Dbi Version: 19990903
 ; EMPTY-NEXT:   Age: 1
diff --git a/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp
@@ -510,24 +510,27 @@ static void printTypeIndexOffset(raw_ostream &OS,
   OS << "{" << TIOff.Type.getIndex() << ", " << TIOff.Offset << "}";
 }
 
-static void dumpTpiHash(ScopedPrinter &P, TpiStream &Tpi) {
-  if (!opts::raw::DumpTpiHash)
-    return;
-  DictScope DD(P, "Hash");
-  P.printNumber("Number of Hash Buckets", Tpi.NumHashBuckets());
-  P.printNumber("Hash Key Size", Tpi.getHashKeySize());
-  P.printList("Values", Tpi.getHashValues());
-  P.printList("Type Index Offsets", Tpi.getTypeIndexOffsets(),
-              printTypeIndexOffset);
-  P.printList("Hash Adjustments", Tpi.getHashAdjustments(),
-              printTypeIndexOffset);
+namespace {
+class RecordBytesVisitor : public TypeVisitorCallbacks {
+public:
+  explicit RecordBytesVisitor(ScopedPrinter &P) : P(P) {}
+
+  Error visitTypeEnd(CVType &Record) override {
+    P.printBinaryBlock("Bytes", Record.content());
+    return Error::success();
+  }
+
+private:
+  ScopedPrinter &P;
+};
 }
 
 Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
   assert(StreamIdx == StreamTPI || StreamIdx == StreamIPI);
 
   bool DumpRecordBytes = false;
   bool DumpRecords = false;
+  bool DumpTpiHash = false;
   StringRef Label;
   StringRef VerLabel;
   if (StreamIdx == StreamTPI) {
@@ -537,6 +540,7 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
     }
     DumpRecordBytes = opts::raw::DumpTpiRecordBytes;
     DumpRecords = opts::raw::DumpTpiRecords;
+    DumpTpiHash = opts::raw::DumpTpiHash;
     Label = "Type Info Stream (TPI)";
     VerLabel = "TPI Version";
   } else if (StreamIdx == StreamIPI) {
@@ -549,80 +553,98 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
     Label = "Type Info Stream (IPI)";
     VerLabel = "IPI Version";
   }
-  if (!DumpRecordBytes && !DumpRecords && !opts::raw::DumpModuleSyms)
+  if (!DumpRecordBytes && !DumpRecords && !DumpTpiHash &&
+      !opts::raw::DumpModuleSyms)
     return Error::success();
 
+  bool IsSilentDatabaseBuild = !DumpRecordBytes && !DumpRecords && !DumpTpiHash;
+
   auto Tpi = (StreamIdx == StreamTPI) ? File.getPDBTpiStream()
                                       : File.getPDBIpiStream();
   if (!Tpi)
     return Tpi.takeError();
 
-  // Even if the user doesn't want to dump type records, we still need to
-  // iterate them in order to build the type database. So when they want to
-  // dump symbols but not types, don't stick a dumper on the end, just build
-  // the type database.
+  std::unique_ptr<DictScope> StreamScope;
+  std::unique_ptr<ListScope> RecordScope;
+
+  if (!IsSilentDatabaseBuild) {
+    StreamScope = llvm::make_unique<DictScope>(P, Label);
+    P.printNumber(VerLabel, Tpi->getTpiVersion());
+    P.printNumber("Record count", Tpi->NumTypeRecords());
+  }
+
   TypeDatabaseVisitor DBV(TypeDB);
   CompactTypeDumpVisitor CTDV(TypeDB, &P);
   TypeDumpVisitor TDV(TypeDB, &P, false);
+  RecordBytesVisitor RBV(P);
   TypeDeserializer Deserializer;
+
+  // We always need to deserialize and add it to the type database.  This is
+  // true if even if we're not dumping anything, because we could need the
+  // type database for the purposes of dumping symbols.
   TypeVisitorCallbackPipeline Pipeline;
   Pipeline.addCallbackToPipeline(Deserializer);
   Pipeline.addCallbackToPipeline(DBV);
 
-  CVTypeVisitor Visitor(Pipeline);
-
-  if (DumpRecords || DumpRecordBytes) {
-    DictScope D(P, Label);
-
-    P.printNumber(VerLabel, Tpi->getTpiVersion());
-    P.printNumber("Record count", Tpi->NumTypeRecords());
-    ListScope L(P, "Records");
-
-    bool HadError = false;
+  // If we're in dump mode, add a dumper with the appropriate detail level.
+  if (DumpRecords) {
     if (opts::raw::CompactRecords)
       Pipeline.addCallbackToPipeline(CTDV);
     else
       Pipeline.addCallbackToPipeline(TDV);
+  }
+  if (DumpRecordBytes)
+    Pipeline.addCallbackToPipeline(RBV);
 
-    for (auto Type : Tpi->types(&HadError)) {
-      std::unique_ptr<DictScope> Scope;
-      if (!opts::raw::CompactRecords)
-        Scope.reset(new DictScope(P, ""));
+  CVTypeVisitor Visitor(Pipeline);
 
-      if (DumpRecords) {
-        if (auto EC = Visitor.visitTypeRecord(Type))
-          return EC;
-      }
+  if (DumpRecords || DumpRecordBytes)
+    RecordScope = llvm::make_unique<ListScope>(P, "Records");
 
-      if (DumpRecordBytes)
-        P.printBinaryBlock("Bytes", Type.content());
-    }
-    dumpTpiHash(P, *Tpi);
-    if (HadError)
-      return make_error<RawError>(raw_error_code::corrupt_file,
-                                  "TPI stream contained corrupt record");
-    {
-      ListScope L(P, "TypeIndexOffsets");
-      for (const auto &IO : Tpi->getTypeIndexOffsets()) {
-        P.printString(formatv("Index: {0:x}, Offset: {1:N}", IO.Type.getIndex(),
-                              (uint32_t)IO.Offset)
-                          .str());
-      }
-    }
+  bool HadError = false;
+
+  TypeIndex T(TypeIndex::FirstNonSimpleIndex);
+  for (auto Type : Tpi->types(&HadError)) {
+    std::unique_ptr<DictScope> OneRecordScope;
 
-  } else if (opts::raw::DumpModuleSyms) {
+    if ((DumpRecords || DumpRecordBytes) && !opts::raw::CompactRecords)
+      OneRecordScope = llvm::make_unique<DictScope>(P, "");
 
-    bool HadError = false;
-    for (auto Type : Tpi->types(&HadError)) {
-      if (auto EC = Visitor.visitTypeRecord(Type))
-        return EC;
+    if (auto EC = Visitor.visitTypeRecord(Type))
+      return EC;
+  }
+  if (HadError)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "TPI stream contained corrupt record");
+
+  if (DumpTpiHash) {
+    DictScope DD(P, "Hash");
+    P.printNumber("Number of Hash Buckets", Tpi->NumHashBuckets());
+    P.printNumber("Hash Key Size", Tpi->getHashKeySize());
+    P.printList("Values", Tpi->getHashValues());
+
+    ListScope LHA(P, "Adjusters");
+    auto ExpectedST = File.getStringTable();
+    if (!ExpectedST)
+      return ExpectedST.takeError();
+    const auto &ST = *ExpectedST;
+    for (const auto &E : Tpi->getHashAdjusters()) {
+      DictScope DHA(P);
+      StringRef Name = ST.getStringForID(E.first);
+      P.printString("Type", Name);
+      P.printHex("TI", E.second);
     }
+  }
 
-    dumpTpiHash(P, *Tpi);
-    if (HadError)
-      return make_error<RawError>(raw_error_code::corrupt_file,
-                                  "TPI stream contained corrupt record");
+  if (!IsSilentDatabaseBuild) {
+    ListScope L(P, "TypeIndexOffsets");
+    for (const auto &IO : Tpi->getTypeIndexOffsets()) {
+      P.printString(formatv("Index: {0:x}, Offset: {1:N}", IO.Type.getIndex(),
+                            (uint32_t)IO.Offset)
+                        .str());
+    }
   }
+
   P.flush();
   return Error::success();
 }