Skip to content

Use a better hash seed when doing string hashing in the compiler #26685

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion include/swift/Serialization/ModuleFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ const uint16_t SWIFTMODULE_VERSION_MAJOR = 0;
/// describe what change you made. The content of this comment isn't important;
/// it just ensures a conflict if two people change the module format.
/// Don't worry about adhering to the 80-column limit for this line.
const uint16_t SWIFTMODULE_VERSION_MINOR = 516; // encode GenericSignature and GenericEnvironment together
const uint16_t SWIFTMODULE_VERSION_MINOR = 517; // better string hash seed

/// A standard hash seed used for all string hashes in a serialized module.
///
/// This is the same as the default used by llvm::djbHash, just provided
/// explicitly here to note that it's part of the format.
const uint32_t SWIFTMODULE_HASH_SEED = 5381;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity: Is the fact that the format depends on the hash seed a bug?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's an OnDiskHashTable, so the hash used is inherently part of the format.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense!


using DeclIDField = BCFixed<31>;

Expand Down
6 changes: 2 additions & 4 deletions lib/ClangImporter/SwiftLookupTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1090,8 +1090,7 @@ namespace {
}

hash_value_type ComputeHash(key_type_ref key) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return static_cast<unsigned>(key.first) + llvm::djbHash(key.second, 0);
return static_cast<unsigned>(key.first) + llvm::djbHash(key.second);
}

std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &out,
Expand Down Expand Up @@ -1340,8 +1339,7 @@ namespace {
}

hash_value_type ComputeHash(internal_key_type key) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return static_cast<unsigned>(key.first) + llvm::djbHash(key.second, 0);
return static_cast<unsigned>(key.first) + llvm::djbHash(key.second);
}

static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
Expand Down
4 changes: 1 addition & 3 deletions lib/Sema/TypeCheckDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ struct RawValueKey {
// FIXME: doesn't accommodate >64-bit or signed raw integer or float values.
union {
StringRef stringValue;
uint32_t charValue;
IntValueTy intValue;
FloatValueTy floatValue;
};
Expand Down Expand Up @@ -181,8 +180,7 @@ class DenseMapInfo<RawValueKey> {
return DenseMapInfo<uint64_t>::getHashValue(k.intValue.v0) &
DenseMapInfo<uint64_t>::getHashValue(k.intValue.v1);
case RawValueKey::Kind::String:
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(k.stringValue, 0);
return DenseMapInfo<StringRef>::getHashValue(k.stringValue);
case RawValueKey::Kind::Empty:
case RawValueKey::Kind::Tombstone:
return 0;
Expand Down
3 changes: 1 addition & 2 deletions lib/Serialization/DeserializeSIL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ class SILDeserializer::FuncTableInfo {
external_key_type GetExternalKey(internal_key_type ID) { return ID; }

hash_value_type ComputeHash(internal_key_type key) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTMODULE_HASH_SEED);
}

static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
Expand Down
9 changes: 9 additions & 0 deletions lib/Serialization/DocFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ const uint16_t SWIFTDOC_VERSION_MAJOR = 1;
/// adhering to the 80-column limit for this line.
const uint16_t SWIFTDOC_VERSION_MINOR = 1; // Last change: skipping 0 for testing purposes

/// The hash seed used for the string hashes in a Swift 5.1 swiftdoc file.
///
/// 0 is not a good seed for llvm::djbHash, but swiftdoc files use a stable
/// format, so we can't change the hash seed without a version bump. Any new
/// hashed strings should use a new stable hash seed constant. (No such constant
/// has been defined at the time this doc comment was last updated because there
/// are no other strings to hash.)
const uint32_t SWIFTDOC_HASH_SEED_5_1 = 0;

/// The record types within the comment block.
///
/// Be very careful when changing this block; it must remain
Expand Down
21 changes: 7 additions & 14 deletions lib/Serialization/ModuleFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,7 @@ class ModuleFile::DeclTableInfo {

hash_value_type ComputeHash(internal_key_type key) {
if (key.first == DeclBaseName::Kind::Normal) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key.second, 0);
return llvm::djbHash(key.second, SWIFTMODULE_HASH_SEED);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps factor this out into a ModuleFile::hashFunction(). I'm not sure whether this is much better, I'll leave it up to you.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I considered that, but figured this was slightly more likely to keep people from doing the wrong thing in the future. I'm not 100% sure of that though.

} else {
return (hash_value_type)key.first;
}
Expand Down Expand Up @@ -454,8 +453,7 @@ class ModuleFile::ExtensionTableInfo {
}

hash_value_type ComputeHash(internal_key_type key) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTMODULE_HASH_SEED);
}

static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
Expand Down Expand Up @@ -515,8 +513,7 @@ class ModuleFile::LocalDeclTableInfo {
}

hash_value_type ComputeHash(internal_key_type key) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTMODULE_HASH_SEED);
}

static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
Expand Down Expand Up @@ -551,8 +548,7 @@ class ModuleFile::NestedTypeDeclsTableInfo {
}

hash_value_type ComputeHash(internal_key_type key) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTMODULE_HASH_SEED);
}

static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
Expand Down Expand Up @@ -607,8 +603,7 @@ class ModuleFile::DeclMemberNamesTableInfo {

hash_value_type ComputeHash(internal_key_type key) {
if (key.first == DeclBaseName::Kind::Normal) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key.second, 0);
return llvm::djbHash(key.second, SWIFTMODULE_HASH_SEED);
} else {
return (hash_value_type)key.first;
}
Expand Down Expand Up @@ -782,8 +777,7 @@ class ModuleFile::ObjCMethodTableInfo {
}

hash_value_type ComputeHash(internal_key_type key) {
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTMODULE_HASH_SEED);
}

static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
Expand Down Expand Up @@ -972,8 +966,7 @@ class ModuleFile::DeclCommentTableInfo {

hash_value_type ComputeHash(internal_key_type key) {
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTDOC_HASH_SEED_5_1);
}

static bool EqualKey(internal_key_type lhs, internal_key_type rhs) {
Expand Down
19 changes: 7 additions & 12 deletions lib/Serialization/Serialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ namespace {
switch (key.getKind()) {
case DeclBaseName::Kind::Normal:
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key.getIdentifier().str(), 0);
return llvm::djbHash(key.getIdentifier().str(),
SWIFTMODULE_HASH_SEED);
case DeclBaseName::Kind::Subscript:
return static_cast<uint8_t>(DeclNameKind::Subscript);
case DeclBaseName::Kind::Constructor:
Expand Down Expand Up @@ -179,8 +179,7 @@ namespace {

hash_value_type ComputeHash(key_type_ref key) {
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key.str(), 0);
return llvm::djbHash(key.str(), SWIFTMODULE_HASH_SEED);
}

int32_t getNameDataForBase(const NominalTypeDecl *nominal,
Expand Down Expand Up @@ -244,8 +243,7 @@ namespace {

hash_value_type ComputeHash(key_type_ref key) {
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTMODULE_HASH_SEED);
}

std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &out,
Expand Down Expand Up @@ -286,8 +284,7 @@ namespace {

hash_value_type ComputeHash(key_type_ref key) {
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key.str(), 0);
return llvm::djbHash(key.str(), SWIFTMODULE_HASH_SEED);
}

std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &out,
Expand Down Expand Up @@ -332,8 +329,7 @@ namespace {
switch (key.getKind()) {
case DeclBaseName::Kind::Normal:
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key.getIdentifier().str(), 0);
return llvm::djbHash(key.getIdentifier().str(), SWIFTMODULE_HASH_SEED);
case DeclBaseName::Kind::Subscript:
return static_cast<uint8_t>(DeclNameKind::Subscript);
case DeclBaseName::Kind::Constructor:
Expand Down Expand Up @@ -4402,8 +4398,7 @@ namespace {

hash_value_type ComputeHash(key_type_ref key) {
llvm::SmallString<32> scratch;
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key.getString(scratch), 0);
return llvm::djbHash(key.getString(scratch), SWIFTMODULE_HASH_SEED);
}

std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &out,
Expand Down
3 changes: 1 addition & 2 deletions lib/Serialization/SerializeDoc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,7 @@ class DeclCommentTableInfo {

hash_value_type ComputeHash(key_type_ref key) {
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTDOC_HASH_SEED_5_1);
}

std::pair<unsigned, unsigned>
Expand Down
3 changes: 1 addition & 2 deletions lib/Serialization/SerializeSIL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ namespace {

hash_value_type ComputeHash(key_type_ref key) {
assert(!key.empty());
// FIXME: DJB seed=0, audit whether the default seed could be used.
return llvm::djbHash(key, 0);
return llvm::djbHash(key, SWIFTMODULE_HASH_SEED);
}

std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &out,
Expand Down
24 changes: 13 additions & 11 deletions test/Frontend/sil-merge-partial-modules.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

// RUN: %target-swift-frontend -emit-module %t/partial.a.swiftmodule %t/partial.b.swiftmodule -module-name test -enable-library-evolution -sil-merge-partial-modules -disable-diagnostic-passes -disable-sil-perf-optzns -o %t/test.swiftmodule

// RUN: %target-sil-opt %t/test.swiftmodule -disable-sil-linking > %t/dump.sil
// RUN: %target-sil-opt %t/test.swiftmodule -disable-sil-linking -emit-sorted-sil > %t/dump.sil
// RUN: %FileCheck %s < %t/dump.sil
// RUN: %FileCheck %s --check-prefix=NEGATIVE < %t/dump.sil

Expand Down Expand Up @@ -40,28 +40,30 @@ public class CircleManager : ShapeManager {
public override func manage() {}
}

// FIXME: Why is the definition order totally random?
// CHECK-LABEL: sil [canonical] @$s4test14publicFunctionyyF : $@convention(thin) () -> ()

// CHECK-LABEL: sil [serialized] [canonical] @$s4test17inlinableFunctionyyF : $@convention(thin) () -> () {
// CHECK: function_ref @$s4test17inlinableFunctionyyFyycfU_
// CHECK: }

// CHECK-LABEL: sil shared [serialized] [canonical] @$s4test17inlinableFunctionyyFyycfU_ : $@convention(thin) () -> () {
// CHECK: function_ref @$s4test17versionedFunctionyyF
// CHECK: }

// CHECK-LABEL: sil [canonical] @$s4test17versionedFunctionyyF : $@convention(thin) () -> ()

// CHECK-LABEL: sil [canonical] @$s4test9RectangleV4areaSfvg : $@convention(method) (Rectangle) -> Float

// CHECK-LABEL: sil shared [transparent] [serialized] [thunk] [canonical] @$s4test9RectangleVAA5ShapeA2aDP4drawyyFTW : $@convention(witness_method: Shape) (@in_guaranteed Rectangle) -> () {
// CHECK-LABEL: sil [serialized] [canonical] @$s4test9RectangleV4drawyyF : $@convention(method) (Rectangle) -> () {
// CHECK: function_ref @$s4test14publicFunctionyyF
// CHECK: }

// CHECK-LABEL: sil [canonical] @$s4test14publicFunctionyyF : $@convention(thin) () -> ()

// CHECK-LABEL: sil shared [transparent] [serialized] [thunk] [canonical] @$s4test9RectangleVAA5ShapeA2aDP4areaSfvgTW : $@convention(witness_method: Shape) (@in_guaranteed Rectangle) -> Float {
// CHECK: function_ref @$s4test9RectangleV4areaSfvg
// CHECK: }

// CHECK-LABEL: sil shared [serialized] [canonical] @$s4test17inlinableFunctionyyFyycfU_ : $@convention(thin) () -> () {
// CHECK: function_ref @$s4test17versionedFunctionyyF
// CHECK: }

// CHECK-LABEL: sil [serialized] [canonical] @$s4test17inlinableFunctionyyF : $@convention(thin) () -> () {
// CHECK: function_ref @$s4test17inlinableFunctionyyFyycfU_
// CHECK-LABEL: sil shared [transparent] [serialized] [thunk] [canonical] @$s4test9RectangleVAA5ShapeA2aDP4drawyyFTW : $@convention(witness_method: Shape) (@in_guaranteed Rectangle) -> () {
// CHECK: function_ref @$s4test9RectangleV4drawyyF
// CHECK: }

// CHECK-LABEL: sil_witness_table [serialized] Rectangle: Shape module test {
Expand Down
6 changes: 3 additions & 3 deletions test/SIL/Serialization/Recovery/function.sil
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: %empty-directory(%t)
// RUN: %target-swift-frontend -parse-sil %s -emit-sib -o %t/Library.sib -module-name Library -I %S/Inputs/good-modules -parse-stdlib
// RUN: %target-sil-opt %t/Library.sib -I %S/Inputs/good-modules | %FileCheck %s
// RUN: %target-sil-opt %t/Library.sib -I %S/Inputs/bad-modules | %FileCheck -check-prefix=CHECK-RECOVERY %s
// RUN: %target-sil-opt %t/Library.sib -I %S/Inputs/bad-modules | %FileCheck -check-prefix=CHECK-RECOVERY-NEGATIVE %s
// RUN: %target-sil-opt %t/Library.sib -I %S/Inputs/good-modules -emit-sorted-sil | %FileCheck %s
// RUN: %target-sil-opt %t/Library.sib -I %S/Inputs/bad-modules -emit-sorted-sil | %FileCheck -check-prefix=CHECK-RECOVERY %s
// RUN: %target-sil-opt %t/Library.sib -I %S/Inputs/bad-modules -emit-sorted-sil | %FileCheck -check-prefix=CHECK-RECOVERY-NEGATIVE %s

// CHECK-LABEL: sil_stage raw
// CHECK-RECOVERY-LABEL: sil_stage raw
Expand Down
22 changes: 11 additions & 11 deletions test/SIL/Serialization/assignattr.sil
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,12 @@
// RUN: %empty-directory(%t)
// RUN: %target-sil-opt %s -emit-sib -o %t/tmp.sib -module-name assignattr
// RUN: %target-sil-opt %t/tmp.sib -o %t/tmp.2.sib -module-name assignattr
// RUN: %target-sil-opt %t/tmp.2.sib -module-name assignattr | %FileCheck %s
// RUN: %target-sil-opt %t/tmp.2.sib -module-name assignattr -emit-sorted-sil | %FileCheck %s

sil_stage raw

import Builtin

// CHECK-LABEL: sil [serialized] [ossa] @trivial_assign : $@convention(thin) (@in Builtin.Int32, Builtin.Int32) -> () {
// CHECK: bb0([[ARG1:%[0-9]+]] : $*Builtin.Int32, [[ARG2:%[0-9]+]] : $Builtin.Int32):
// CHECK: assign [[ARG2]] to [init] [[ARG1]] : $*Builtin.Int32
sil [serialized] [ossa] @trivial_assign : $@convention(thin) (@in Builtin.Int32, Builtin.Int32) -> () {
bb0(%0 : $*Builtin.Int32, %1 : $Builtin.Int32):
assign %1 to [init] %0 : $*Builtin.Int32
%2 = tuple()
return %2 : $()
}

// CHECK-LABEL: sil [serialized] [ossa] @non_trivial_assign : $@convention(thin) (@in Builtin.NativeObject, @owned Builtin.NativeObject) -> () {
// CHECK: bb0([[ARG1:%[0-9]+]] : $*Builtin.NativeObject, [[ARG2:%[0-9]+]] : @owned $Builtin.NativeObject):
// CHECK: [[ARG2_COPY:%.*]] = copy_value [[ARG2]]
Expand All @@ -31,3 +21,13 @@ bb0(%0 : $*Builtin.NativeObject, %1 : @owned $Builtin.NativeObject):
%9999 = tuple()
return %9999 : $()
}

// CHECK-LABEL: sil [serialized] [ossa] @trivial_assign : $@convention(thin) (@in Builtin.Int32, Builtin.Int32) -> () {
// CHECK: bb0([[ARG1:%[0-9]+]] : $*Builtin.Int32, [[ARG2:%[0-9]+]] : $Builtin.Int32):
// CHECK: assign [[ARG2]] to [init] [[ARG1]] : $*Builtin.Int32
sil [serialized] [ossa] @trivial_assign : $@convention(thin) (@in Builtin.Int32, Builtin.Int32) -> () {
bb0(%0 : $*Builtin.Int32, %1 : $Builtin.Int32):
assign %1 to [init] %0 : $*Builtin.Int32
%2 = tuple()
return %2 : $()
}
Loading