Skip to content

[stdlib] Set, Dictionary: Prepare for per-instance hash seeds #19589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 21 additions & 24 deletions stdlib/public/core/DictionaryStorage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -359,26 +359,31 @@ final internal class _DictionaryStorage<Key: Hashable, Value>
extension _DictionaryStorage {
@usableFromInline
@_effects(releasenone)
internal static func reallocate(
internal static func copy(
original: _RawDictionaryStorage
) -> _DictionaryStorage {
return allocate(
scale: original._scale,
age: original._age,
seed: original._seed)
}

@usableFromInline
@_effects(releasenone)
static internal func resize(
original: _RawDictionaryStorage,
capacity: Int
) -> (storage: _DictionaryStorage, rehash: Bool) {
_sanityCheck(capacity >= original._count)
capacity: Int,
move: Bool
) -> _DictionaryStorage {
let scale = _HashTable.scale(forCapacity: capacity)
let rehash = (scale != original._scale)
let newStorage = _DictionaryStorage<Key, Value>.allocate(
scale: scale,
// Invalidate indices if we're rehashing.
age: rehash ? nil : original._age
)
return (newStorage, rehash)
return allocate(scale: scale, age: nil, seed: nil)
}

@usableFromInline
@_effects(releasenone)
static internal func allocate(capacity: Int) -> _DictionaryStorage {
let scale = _HashTable.scale(forCapacity: capacity)
return allocate(scale: scale, age: nil)
return allocate(scale: scale, age: nil, seed: nil)
}

#if _runtime(_ObjC)
Expand All @@ -390,13 +395,14 @@ extension _DictionaryStorage {
) -> _DictionaryStorage {
let scale = _HashTable.scale(forCapacity: capacity)
let age = _HashTable.age(for: cocoa.object)
return allocate(scale: scale, age: age)
return allocate(scale: scale, age: age, seed: nil)
}
#endif

static internal func allocate(
scale: Int8,
age: Int32?
age: Int32?,
seed: Int?
) -> _DictionaryStorage {
// The entry count must be representable by an Int value; hence the scale's
// peculiar upper bound.
Expand Down Expand Up @@ -432,19 +438,10 @@ extension _DictionaryStorage {
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
}

storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
storage._rawKeys = UnsafeMutableRawPointer(keysAddr)
storage._rawValues = UnsafeMutableRawPointer(valuesAddr)

// We use a slightly different hash seed whenever we change the size of the
// hash table, so that we avoid certain copy operations becoming quadratic,
// without breaking value semantics. (For background details, see
// https://bugs.swift.org/browse/SR-3268)

// FIXME: Use true per-instance seeding instead. Per-capacity seeding still
// leaves hash values the same in same-sized tables, which may affect
// operations on two tables at once. (E.g., union.)
storage._seed = Int(scale)

// Initialize hash table metadata.
storage._hashTable.clear()
return storage
Expand Down
18 changes: 6 additions & 12 deletions stdlib/public/core/DictionaryVariant.swift
Original file line number Diff line number Diff line change
Expand Up @@ -328,28 +328,22 @@ extension Dictionary._Variant {
}
}

/// Ensure uniquely held native storage, while preserving the given index.
/// (If the variant had bridged storage, then the returned index will be the
/// corresponding native representation. Otherwise it's kept the same.)
@inlinable
@inline(__always)
internal mutating func ensureUniqueNative() -> _NativeDictionary<Key, Value> {
switch self {
case .native:
let isUnique = isUniquelyReferenced()
if !isUnique {
let rehashed = asNative.copy(capacity: asNative.capacity)
_sanityCheck(!rehashed)
}
return asNative
#if _runtime(_ObjC)
case .cocoa(let cocoa):
if case .cocoa(let cocoa) = self {
cocoaPath()
let native = _NativeDictionary<Key, Value>(cocoa)
self = .native(native)
return native
}
#endif
let isUnique = isUniquelyReferenced()
if !isUnique {
asNative.copy()
}
return asNative
}

@inlinable
Expand Down
31 changes: 31 additions & 0 deletions stdlib/public/core/HashTable.swift
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,37 @@ extension _HashTable {
let hash = ObjectIdentifier(cocoa).hashValue
return Int32(truncatingIfNeeded: hash)
}

internal static func hashSeed(
for object: AnyObject,
scale: Int8
) -> Int {
#if false // FIXME: Enable per-instance seeding
// We generate a new hash seed whenever a new hash table is allocated and
// whenever an existing table is resized, so that we avoid certain copy
// operations becoming quadratic. (For background details, see
// https://bugs.swift.org/browse/SR-3268)
//
// Note that we do reuse the existing seed when making copy-on-write copies
// so that we avoid breaking value semantics.
if Hasher._isDeterministic {
// When we're using deterministic hashing, the scale value as the seed is
// still allowed, and it covers most cases. (Unfortunately some operations
// that merge two similar-sized hash tables will still be quadratic.)
return Int(scale)
}
// Use the object address as the hash seed. This is cheaper than
// SystemRandomNumberGenerator, while it has the same practical effect.
// Addresses aren't entirely random, but that's not the goal here -- the
// 128-bit execution seed takes care of randomization. We only need to
// guarantee that no two tables with the same seed can coexist at the same
// time (apart from copy-on-write derivatives of the same table).
return unsafeBitCast(object, to: Int.self)
#else
// Use per-capacity seeding for now.
return Int(scale)
#endif
}
}

extension _HashTable {
Expand Down
72 changes: 46 additions & 26 deletions stdlib/public/core/NativeDictionary.swift
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,11 @@ extension _NativeDictionary { // ensureUnique
@inlinable
internal mutating func resize(capacity: Int) {
let capacity = Swift.max(capacity, self.capacity)
let result = _NativeDictionary(
_DictionaryStorage<Key, Value>.allocate(capacity: capacity))
let newStorage = _DictionaryStorage<Key, Value>.resize(
original: _storage,
capacity: capacity,
move: true)
let result = _NativeDictionary(newStorage)
if count > 0 {
for bucket in hashTable {
let key = (_keys + bucket.offset).move()
Expand All @@ -199,31 +202,40 @@ extension _NativeDictionary { // ensureUnique
}

@inlinable
internal mutating func copy(capacity: Int) -> Bool {
internal mutating func copyAndResize(capacity: Int) {
let capacity = Swift.max(capacity, self.capacity)
let (newStorage, rehash) = _DictionaryStorage<Key, Value>.reallocate(
let newStorage = _DictionaryStorage<Key, Value>.resize(
original: _storage,
capacity: capacity)
capacity: capacity,
move: false)
let result = _NativeDictionary(newStorage)
if count > 0 {
if rehash {
for bucket in hashTable {
result._unsafeInsertNew(
key: self.uncheckedKey(at: bucket),
value: self.uncheckedValue(at: bucket))
}
} else {
result.hashTable.copyContents(of: hashTable)
result._storage._count = self.count
for bucket in hashTable {
let key = uncheckedKey(at: bucket)
let value = uncheckedValue(at: bucket)
result.uncheckedInitialize(at: bucket, toKey: key, value: value)
}
for bucket in hashTable {
result._unsafeInsertNew(
key: self.uncheckedKey(at: bucket),
value: self.uncheckedValue(at: bucket))
}
}
_storage = result._storage
}

@inlinable
internal mutating func copy() {
let newStorage = _DictionaryStorage<Key, Value>.copy(original: _storage)
_sanityCheck(newStorage._scale == _storage._scale)
_sanityCheck(newStorage._age == _storage._age)
_sanityCheck(newStorage._seed == _storage._seed)
let result = _NativeDictionary(newStorage)
if count > 0 {
result.hashTable.copyContents(of: hashTable)
result._storage._count = self.count
for bucket in hashTable {
let key = uncheckedKey(at: bucket)
let value = uncheckedValue(at: bucket)
result.uncheckedInitialize(at: bucket, toKey: key, value: value)
}
}
_storage = result._storage
return rehash
}

/// Ensure storage of self is uniquely held and can hold at least `capacity`
Expand All @@ -234,10 +246,15 @@ extension _NativeDictionary { // ensureUnique
if _fastPath(capacity <= self.capacity && isUnique) {
return false
}
guard isUnique else {
return copy(capacity: capacity)
if isUnique {
resize(capacity: capacity)
return true
}
if capacity <= self.capacity {
copy()
return false
}
resize(capacity: capacity)
copyAndResize(capacity: capacity)
return true
}

Expand Down Expand Up @@ -552,7 +569,8 @@ extension _NativeDictionary { // Deletion
let scale = self._storage._scale
_storage = _DictionaryStorage<Key, Value>.allocate(
scale: scale,
age: nil)
age: nil,
seed: nil)
return
}
for bucket in hashTable {
Expand All @@ -570,8 +588,10 @@ extension _NativeDictionary { // High-level operations
internal func mapValues<T>(
_ transform: (Value) throws -> T
) rethrows -> _NativeDictionary<Key, T> {
let result = _NativeDictionary<Key, T>(capacity: capacity)
// Because the keys in the current and new buffer are the same, we can
let resultStorage = _DictionaryStorage<Key, T>.copy(original: _storage)
_sanityCheck(resultStorage._seed == _storage._seed)
let result = _NativeDictionary<Key, T>(resultStorage)
// Because the current and new buffer have the same scale and seed, we can
// initialize to the same locations in the new buffer, skipping hash value
// recalculations.
for bucket in hashTable {
Expand Down
61 changes: 40 additions & 21 deletions stdlib/public/core/NativeSet.swift
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,10 @@ extension _NativeSet { // ensureUnique
@inlinable
internal mutating func resize(capacity: Int) {
let capacity = Swift.max(capacity, self.capacity)
let result = _NativeSet(_SetStorage<Element>.allocate(capacity: capacity))
let result = _NativeSet(_SetStorage<Element>.resize(
original: _storage,
capacity: capacity,
move: true))
if count > 0 {
for bucket in hashTable {
let element = (self._elements + bucket.offset).move()
Expand All @@ -169,28 +172,36 @@ extension _NativeSet { // ensureUnique
}

@inlinable
internal mutating func copy(capacity: Int) -> Bool {
internal mutating func copyAndResize(capacity: Int) {
let capacity = Swift.max(capacity, self.capacity)
let (newStorage, rehash) = _SetStorage<Element>.reallocate(
original: _storage,
capacity: capacity)
let result = _NativeSet(_SetStorage<Element>.resize(
original: _storage,
capacity: capacity,
move: false))
if count > 0 {
for bucket in hashTable {
result._unsafeInsertNew(self.uncheckedElement(at: bucket))
}
}
_storage = result._storage
}

@inlinable
internal mutating func copy() {
let newStorage = _SetStorage<Element>.copy(original: _storage)
_sanityCheck(newStorage._scale == _storage._scale)
_sanityCheck(newStorage._age == _storage._age)
_sanityCheck(newStorage._seed == _storage._seed)
let result = _NativeSet(newStorage)
if count > 0 {
if rehash {
for bucket in hashTable {
result._unsafeInsertNew(self.uncheckedElement(at: bucket))
}
} else {
result.hashTable.copyContents(of: hashTable)
result._storage._count = self.count
for bucket in hashTable {
let element = uncheckedElement(at: bucket)
result.uncheckedInitialize(at: bucket, to: element)
}
result.hashTable.copyContents(of: hashTable)
result._storage._count = self.count
for bucket in hashTable {
let element = uncheckedElement(at: bucket)
result.uncheckedInitialize(at: bucket, to: element)
}
}
_storage = result._storage
return rehash
}

/// Ensure storage of self is uniquely held and can hold at least `capacity`
Expand All @@ -201,10 +212,15 @@ extension _NativeSet { // ensureUnique
if _fastPath(capacity <= self.capacity && isUnique) {
return false
}
guard isUnique else {
return copy(capacity: capacity)
if isUnique {
resize(capacity: capacity)
return true
}
if capacity <= self.capacity {
copy()
return false
}
resize(capacity: capacity)
copyAndResize(capacity: capacity)
return true
}

Expand Down Expand Up @@ -451,7 +467,10 @@ extension _NativeSet { // Deletion
internal mutating func removeAll(isUnique: Bool) {
guard isUnique else {
let scale = self._storage._scale
_storage = _SetStorage<Element>.allocate(scale: scale, age: nil)
_storage = _SetStorage<Element>.allocate(
scale: scale,
age: nil,
seed: nil)
return
}
for bucket in hashTable {
Expand Down
Loading