Skip to content

Commit 73ac8d3

Browse files
committed
Replace llvm::MD5 with StableHasher
1 parent f5cb08e commit 73ac8d3

File tree

11 files changed

+53
-42
lines changed

11 files changed

+53
-42
lines changed

include/swift/AST/ParseRequests.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class ParseAbstractFunctionBodyRequest :
8787
struct SourceFileParsingResult {
8888
ArrayRef<Decl *> TopLevelDecls;
8989
Optional<ArrayRef<Token>> CollectedTokens;
90-
Optional<llvm::MD5> InterfaceHash;
90+
Optional<StableHasher> InterfaceHasher;
9191
Optional<syntax::SourceFileSyntax> SyntaxRoot;
9292
};
9393

include/swift/AST/SourceFile.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,10 @@ class SourceFile final : public FileUnit {
105105
SourceLoc MainDeclDiagLoc;
106106

107107
/// A hash of all interface-contributing tokens that have been lexed for
108-
/// this source file so far.
108+
/// this source file.
109+
///
109110
/// We only collect interface hash for primary input files.
110-
llvm::Optional<llvm::MD5> InterfaceHash;
111+
llvm::Optional<StableHasher> InterfaceHasher;
111112

112113
/// The ID for the memory buffer containing this file's source.
113114
///

include/swift/Basic/Fingerprint.h

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
#ifndef SWIFT_BASIC_FINGERPRINT_H
1414
#define SWIFT_BASIC_FINGERPRINT_H
1515

16+
#include "swift/Basic/StableHasher.h"
1617
#include "llvm/ADT/Hashing.h"
1718
#include "llvm/ADT/SmallString.h"
1819
#include "llvm/ADT/StringRef.h"
19-
#include "llvm/Support/MD5.h"
2020

2121
#include <string>
2222

@@ -52,11 +52,6 @@ namespace swift {
5252
/// iterable decl contexts to detect when the tokens in their bodies have
5353
/// changed. This makes them a coarse - yet safe - overapproximation for when a
5454
/// decl has changed semantically.
55-
///
56-
/// \c Fingerprints are currently implemented as a thin wrapper around an MD5
57-
/// hash. MD5 is known to be neither the fastest nor the most
58-
/// cryptographically capable algorithm, but it does afford us the avalanche
59-
/// effect we desire. We should revisit the modeling decision here.
6055
class Fingerprint final {
6156
public:
6257
/// The size (in bytes) of the raw value of all fingerprints.
@@ -66,6 +61,8 @@ class Fingerprint final {
6661
private:
6762
Core core;
6863

64+
friend struct StableHasher::Combiner<swift::Fingerprint>;
65+
6966
public:
7067
/// Creates a fingerprint value from a pair of 64-bit integers.
7168
explicit Fingerprint(Fingerprint::Core value) : core(value) {}
@@ -76,9 +73,9 @@ class Fingerprint final {
7673
/// Strings that violate this invariant will return a null optional.
7774
static llvm::Optional<Fingerprint> fromString(llvm::StringRef value);
7875

79-
/// Creates a fingerprint value by consuming the given \c MD5Result from LLVM.
80-
explicit Fingerprint(llvm::MD5::MD5Result &&MD5Value)
81-
: core{MD5Value.words()} {}
76+
/// Creates a fingerprint value by consuming the given \c StableHasher.
77+
explicit Fingerprint(StableHasher &&stableHasher)
78+
: core{std::move(stableHasher).finalize()} {}
8279

8380
public:
8481
/// Retrieve the raw underlying bytes of this fingerprint.
@@ -100,7 +97,7 @@ class Fingerprint final {
10097
public:
10198
/// The fingerprint value consisting of 32 bytes of zeroes.
10299
///
103-
/// This fingerprint is a perfectly fine value for an MD5 hash, but it is
100+
/// This fingerprint is a perfectly fine value for a hash, but it is
104101
/// completely arbitrary.
105102
static Fingerprint ZERO() {
106103
return Fingerprint(Fingerprint::Core{0, 0});
@@ -118,6 +115,22 @@ class Fingerprint final {
118115
void simple_display(llvm::raw_ostream &out, const Fingerprint &fp);
119116
}; // namespace swift
120117

118+
namespace swift {
119+
120+
template <> struct StableHasher::Combiner<Fingerprint> {
121+
static void combine(StableHasher &hasher, const Fingerprint &Val) {
122+
// Our underlying buffer is already byte-swapped. Combine the
123+
// raw bytes from the core by hand.
124+
uint8_t buffer[8];
125+
memcpy(buffer, &Val.core.first, sizeof(buffer));
126+
hasher.combine(buffer);
127+
memcpy(buffer, &Val.core.second, sizeof(buffer));
128+
hasher.combine(buffer);
129+
}
130+
};
131+
132+
}; // namespace swift
133+
121134
namespace llvm {
122135
class raw_ostream;
123136
raw_ostream &operator<<(raw_ostream &OS, const swift::Fingerprint &fp);

include/swift/Parse/Parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ class Parser {
127127

128128
/// The current token hash, or \c None if the parser isn't computing a hash
129129
/// for the token stream.
130-
Optional<llvm::MD5> CurrentTokenHash;
130+
Optional<StableHasher> CurrentTokenHash;
131131

132132
void recordTokenHash(const Token Tok) {
133133
if (!Tok.getText().empty())

lib/AST/Module.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,22 +1092,21 @@ Fingerprint SourceFile::getInterfaceHash() const {
10921092
assert(hasInterfaceHash() && "Interface hash not enabled");
10931093
auto &eval = getASTContext().evaluator;
10941094
auto *mutableThis = const_cast<SourceFile *>(this);
1095-
auto md5 = *evaluateOrDefault(eval, ParseSourceFileRequest{mutableThis}, {})
1096-
.InterfaceHash;
1097-
llvm::MD5::MD5Result result;
1098-
md5.final(result);
1099-
return Fingerprint{std::move(result)};
1095+
Optional<StableHasher> interfaceHasher =
1096+
evaluateOrDefault(eval, ParseSourceFileRequest{mutableThis}, {})
1097+
.InterfaceHasher;
1098+
return Fingerprint{StableHasher{interfaceHasher.getValue()}.finalize()};
11001099
}
11011100

11021101
Fingerprint SourceFile::getInterfaceHashIncludingTypeMembers() const {
11031102
/// FIXME: Gross. Hashing multiple "hash" values.
1104-
llvm::MD5 hash;
1105-
hash.update(getInterfaceHash().getRawValue());
1103+
auto hash = StableHasher::defaultHasher();
1104+
hash.combine(getInterfaceHash());
11061105

11071106
std::function<void(IterableDeclContext *)> hashTypeBodyFingerprints =
11081107
[&](IterableDeclContext *IDC) {
11091108
if (auto fp = IDC->getBodyFingerprint())
1110-
hash.update(fp->getRawValue());
1109+
hash.combine(*fp);
11111110
for (auto *member : IDC->getParsedMembers())
11121111
if (auto *childIDC = dyn_cast<IterableDeclContext>(member))
11131112
hashTypeBodyFingerprints(childIDC);
@@ -1118,9 +1117,7 @@ Fingerprint SourceFile::getInterfaceHashIncludingTypeMembers() const {
11181117
hashTypeBodyFingerprints(IDC);
11191118
}
11201119

1121-
llvm::MD5::MD5Result result;
1122-
hash.final(result);
1123-
return Fingerprint{std::move(result)};
1120+
return Fingerprint{std::move(hash)};
11241121
}
11251122

11261123
syntax::SourceFileSyntax SourceFile::getSyntaxRoot() const {

lib/Basic/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ add_swift_host_library(swiftBasic STATIC
6464
Program.cpp
6565
QuotedString.cpp
6666
SourceLoc.cpp
67+
StableHasher.cpp
6768
Statistic.cpp
6869
StringExtras.cpp
6970
TaskQueue.cpp

lib/Parse/ParseDecl.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4800,7 +4800,8 @@ Parser::parseDeclList(SourceLoc LBLoc, SourceLoc &RBLoc, Diag<> ErrorDiag,
48004800

48014801
// If we're hashing the type body separately, record the curly braces but
48024802
// nothing inside for the interface hash.
4803-
llvm::SaveAndRestore<Optional<llvm::MD5>> MemberHashingScope{CurrentTokenHash, llvm::MD5()};
4803+
llvm::SaveAndRestore<Optional<StableHasher>> MemberHashingScope{
4804+
CurrentTokenHash, StableHasher::defaultHasher()};
48044805
recordTokenHash("{");
48054806
recordTokenHash("}");
48064807

@@ -4833,9 +4834,9 @@ Parser::parseDeclList(SourceLoc LBLoc, SourceLoc &RBLoc, Diag<> ErrorDiag,
48334834
if (RBLoc.isInvalid())
48344835
hadError = true;
48354836

4836-
llvm::MD5::MD5Result result;
4837-
CurrentTokenHash->final(result);
4838-
return std::make_pair(decls, Fingerprint{std::move(result)});
4837+
// Clone the current hasher and extract a Fingerprint.
4838+
StableHasher currentHash{*CurrentTokenHash};
4839+
return std::make_pair(decls, Fingerprint{std::move(currentHash)});
48394840
}
48404841

48414842
bool Parser::canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
@@ -6725,7 +6726,7 @@ void Parser::parseAbstractFunctionBody(AbstractFunctionDecl *AFD) {
67256726
recordTokenHash("{");
67266727
recordTokenHash("}");
67276728

6728-
llvm::SaveAndRestore<Optional<llvm::MD5>> T(CurrentTokenHash, None);
6729+
llvm::SaveAndRestore<Optional<StableHasher>> T(CurrentTokenHash, None);
67296730

67306731
// If we can delay parsing this body, or this is the first pass of code
67316732
// completion, skip until the end. If we encounter a code completion token

lib/Parse/ParseIfConfig.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ ParserResult<IfConfigDecl> Parser::parseIfConfig(
616616
SourceMgr.getCodeCompletionBufferID() == L->getBufferID() &&
617617
SourceMgr.isBeforeInBuffer(Tok.getLoc(),
618618
SourceMgr.getCodeCompletionLoc())) {
619-
llvm::SaveAndRestore<Optional<llvm::MD5>> H(CurrentTokenHash, None);
619+
llvm::SaveAndRestore<Optional<StableHasher>> H(CurrentTokenHash, None);
620620
BacktrackingScope backtrack(*this);
621621
do {
622622
auto startLoc = Tok.getLoc();
@@ -706,7 +706,7 @@ ParserResult<IfConfigDecl> Parser::parseIfConfig(
706706
llvm::SaveAndRestore<bool> S(InInactiveClauseEnvironment,
707707
InInactiveClauseEnvironment || !isActive);
708708
// Disable updating the interface hash inside inactive blocks.
709-
Optional<llvm::SaveAndRestore<Optional<llvm::MD5>>> T;
709+
Optional<llvm::SaveAndRestore<Optional<StableHasher>>> T;
710710
if (!isActive)
711711
T.emplace(CurrentTokenHash, None);
712712

lib/Parse/ParseRequests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,15 +206,15 @@ ParseSourceFileRequest::getCachedResult() const {
206206
syntaxRoot.emplace(*rootPtr);
207207

208208
return SourceFileParsingResult{*decls, SF->AllCollectedTokens,
209-
SF->InterfaceHash, syntaxRoot};
209+
SF->InterfaceHasher, syntaxRoot};
210210
}
211211

212212
void ParseSourceFileRequest::cacheResult(SourceFileParsingResult result) const {
213213
auto *SF = std::get<0>(getStorage());
214214
assert(!SF->Decls);
215215
SF->Decls = result.TopLevelDecls;
216216
SF->AllCollectedTokens = result.CollectedTokens;
217-
SF->InterfaceHash = result.InterfaceHash;
217+
SF->InterfaceHasher = result.InterfaceHasher;
218218

219219
if (auto &root = result.SyntaxRoot)
220220
SF->SyntaxRoot = std::make_unique<SourceFileSyntax>(std::move(*root));

lib/Parse/Parser.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#include "swift/SyntaxParse/SyntaxTreeCreator.h"
3535
#include "llvm/Support/Compiler.h"
3636
#include "llvm/Support/MemoryBuffer.h"
37-
#include "llvm/Support/MD5.h"
3837
#include "llvm/Support/raw_ostream.h"
3938
#include "llvm/Support/SaveAndRestore.h"
4039
#include "llvm/ADT/PointerUnion.h"
@@ -148,7 +147,7 @@ void Parser::performCodeCompletionSecondPassImpl(
148147
SyntaxContext->disable();
149148

150149
// Disable updating the interface hash
151-
llvm::SaveAndRestore<Optional<llvm::MD5>> CurrentTokenHashSaver(
150+
llvm::SaveAndRestore<Optional<StableHasher>> CurrentTokenHashSaver(
152151
CurrentTokenHash, None);
153152

154153
auto BufferID = L->getBufferID();
@@ -540,7 +539,7 @@ Parser::Parser(std::unique_ptr<Lexer> Lex, SourceFile &SF,
540539

541540
// If the interface hash is enabled, set up the initial hash.
542541
if (SF.hasInterfaceHash())
543-
CurrentTokenHash.emplace();
542+
CurrentTokenHash.emplace(StableHasher::defaultHasher());
544543

545544
// Set the token to a sentinel so that we know the lexer isn't primed yet.
546545
// This cannot be tok::unknown, since that is a token the lexer could produce.
@@ -590,10 +589,9 @@ SourceLoc Parser::consumeTokenWithoutFeedingReceiver() {
590589
void Parser::recordTokenHash(StringRef token) {
591590
assert(!token.empty());
592591
if (CurrentTokenHash) {
593-
CurrentTokenHash->update(token);
592+
CurrentTokenHash->combine(token);
594593
// Add null byte to separate tokens.
595-
uint8_t a[1] = {0};
596-
CurrentTokenHash->update(a);
594+
CurrentTokenHash->combine(uint8_t{0});
597595
}
598596
}
599597

test/Serialization/sourceinfo.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ import MyModule
66
// RUN: %target-swiftc_driver -emit-module -module-name MyModule -o %t/Modules/MyModule.swiftmodule %S/Inputs/SourceInfo/File1.swift %S/Inputs/SourceInfo/File2.swift
77
// RUN: %target-swift-ide-test -print-module-metadata -module-to-print MyModule -enable-swiftsourceinfo -I %t/Modules -source-filename %s | %FileCheck %s
88

9-
// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File1.swift; hash=b44bab617797a7239a9fa948f11eb90b; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=35
10-
// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File2.swift; hash=c989d6b98d505a1f52749d43ea0569a1; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=57
9+
// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File1.swift; hash=9da710e9b2de1fff2915639236b8929c; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=35
10+
// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File2.swift; hash=4ce628834bb98fd822ac840ea341de26; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=57

0 commit comments

Comments
 (0)