Skip to content

Commit ee4ad7a

Browse files
authored
Merge pull request #19589 from lorentey/prepare-for-per-instance-seeds
[stdlib] Set, Dictionary: Prepare for per-instance hash seeds
2 parents 6f51edc + b92c301 commit ee4ad7a

File tree

6 files changed

+163
-107
lines changed

6 files changed

+163
-107
lines changed

stdlib/public/core/DictionaryStorage.swift

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -359,26 +359,31 @@ final internal class _DictionaryStorage<Key: Hashable, Value>
359359
extension _DictionaryStorage {
360360
@usableFromInline
361361
@_effects(releasenone)
362-
internal static func reallocate(
362+
internal static func copy(
363+
original: _RawDictionaryStorage
364+
) -> _DictionaryStorage {
365+
return allocate(
366+
scale: original._scale,
367+
age: original._age,
368+
seed: original._seed)
369+
}
370+
371+
@usableFromInline
372+
@_effects(releasenone)
373+
static internal func resize(
363374
original: _RawDictionaryStorage,
364-
capacity: Int
365-
) -> (storage: _DictionaryStorage, rehash: Bool) {
366-
_sanityCheck(capacity >= original._count)
375+
capacity: Int,
376+
move: Bool
377+
) -> _DictionaryStorage {
367378
let scale = _HashTable.scale(forCapacity: capacity)
368-
let rehash = (scale != original._scale)
369-
let newStorage = _DictionaryStorage<Key, Value>.allocate(
370-
scale: scale,
371-
// Invalidate indices if we're rehashing.
372-
age: rehash ? nil : original._age
373-
)
374-
return (newStorage, rehash)
379+
return allocate(scale: scale, age: nil, seed: nil)
375380
}
376381

377382
@usableFromInline
378383
@_effects(releasenone)
379384
static internal func allocate(capacity: Int) -> _DictionaryStorage {
380385
let scale = _HashTable.scale(forCapacity: capacity)
381-
return allocate(scale: scale, age: nil)
386+
return allocate(scale: scale, age: nil, seed: nil)
382387
}
383388

384389
#if _runtime(_ObjC)
@@ -390,13 +395,14 @@ extension _DictionaryStorage {
390395
) -> _DictionaryStorage {
391396
let scale = _HashTable.scale(forCapacity: capacity)
392397
let age = _HashTable.age(for: cocoa.object)
393-
return allocate(scale: scale, age: age)
398+
return allocate(scale: scale, age: age, seed: nil)
394399
}
395400
#endif
396401

397402
static internal func allocate(
398403
scale: Int8,
399-
age: Int32?
404+
age: Int32?,
405+
seed: Int?
400406
) -> _DictionaryStorage {
401407
// The entry count must be representable by an Int value; hence the scale's
402408
// peculiar upper bound.
@@ -432,19 +438,10 @@ extension _DictionaryStorage {
432438
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
433439
}
434440

441+
storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
435442
storage._rawKeys = UnsafeMutableRawPointer(keysAddr)
436443
storage._rawValues = UnsafeMutableRawPointer(valuesAddr)
437444

438-
// We use a slightly different hash seed whenever we change the size of the
439-
// hash table, so that we avoid certain copy operations becoming quadratic,
440-
// without breaking value semantics. (For background details, see
441-
// https://bugs.swift.org/browse/SR-3268)
442-
443-
// FIXME: Use true per-instance seeding instead. Per-capacity seeding still
444-
// leaves hash values the same in same-sized tables, which may affect
445-
// operations on two tables at once. (E.g., union.)
446-
storage._seed = Int(scale)
447-
448445
// Initialize hash table metadata.
449446
storage._hashTable.clear()
450447
return storage

stdlib/public/core/DictionaryVariant.swift

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -327,28 +327,22 @@ extension Dictionary._Variant {
327327
}
328328
}
329329

330-
/// Ensure uniquely held native storage, while preserving the given index.
331-
/// (If the variant had bridged storage, then the returned index will be the
332-
/// corresponding native representation. Otherwise it's kept the same.)
333330
@inlinable
334331
@inline(__always)
335332
internal mutating func ensureUniqueNative() -> _NativeDictionary<Key, Value> {
336-
switch self {
337-
case .native:
338-
let isUnique = isUniquelyReferenced()
339-
if !isUnique {
340-
let rehashed = asNative.copy(capacity: asNative.capacity)
341-
_sanityCheck(!rehashed)
342-
}
343-
return asNative
344333
#if _runtime(_ObjC)
345-
case .cocoa(let cocoa):
334+
if case .cocoa(let cocoa) = self {
346335
cocoaPath()
347336
let native = _NativeDictionary<Key, Value>(cocoa)
348337
self = .native(native)
349338
return native
339+
}
350340
#endif
341+
let isUnique = isUniquelyReferenced()
342+
if !isUnique {
343+
asNative.copy()
351344
}
345+
return asNative
352346
}
353347

354348
@inlinable

stdlib/public/core/HashTable.swift

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,37 @@ extension _HashTable {
8989
let hash = ObjectIdentifier(cocoa).hashValue
9090
return Int32(truncatingIfNeeded: hash)
9191
}
92+
93+
internal static func hashSeed(
94+
for object: AnyObject,
95+
scale: Int8
96+
) -> Int {
97+
#if false // FIXME: Enable per-instance seeding
98+
// We generate a new hash seed whenever a new hash table is allocated and
99+
// whenever an existing table is resized, so that we avoid certain copy
100+
// operations becoming quadratic. (For background details, see
101+
// https://bugs.swift.org/browse/SR-3268)
102+
//
103+
// Note that we do reuse the existing seed when making copy-on-write copies
104+
// so that we avoid breaking value semantics.
105+
if Hasher._isDeterministic {
106+
// When we're using deterministic hashing, the scale value as the seed is
107+
// still allowed, and it covers most cases. (Unfortunately some operations
108+
// that merge two similar-sized hash tables will still be quadratic.)
109+
return Int(scale)
110+
}
111+
// Use the object address as the hash seed. This is cheaper than
112+
// SystemRandomNumberGenerator, while it has the same practical effect.
113+
// Addresses aren't entirely random, but that's not the goal here -- the
114+
// 128-bit execution seed takes care of randomization. We only need to
115+
// guarantee that no two tables with the same seed can coexist at the same
116+
// time (apart from copy-on-write derivatives of the same table).
117+
return unsafeBitCast(object, to: Int.self)
118+
#else
119+
// Use per-capacity seeding for now.
120+
return Int(scale)
121+
#endif
122+
}
92123
}
93124

94125
extension _HashTable {

stdlib/public/core/NativeDictionary.swift

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,11 @@ extension _NativeDictionary { // ensureUnique
182182
@inlinable
183183
internal mutating func resize(capacity: Int) {
184184
let capacity = Swift.max(capacity, self.capacity)
185-
let result = _NativeDictionary(
186-
_DictionaryStorage<Key, Value>.allocate(capacity: capacity))
185+
let newStorage = _DictionaryStorage<Key, Value>.resize(
186+
original: _storage,
187+
capacity: capacity,
188+
move: true)
189+
let result = _NativeDictionary(newStorage)
187190
if count > 0 {
188191
for bucket in hashTable {
189192
let key = (_keys + bucket.offset).move()
@@ -199,31 +202,40 @@ extension _NativeDictionary { // ensureUnique
199202
}
200203

201204
@inlinable
202-
internal mutating func copy(capacity: Int) -> Bool {
205+
internal mutating func copyAndResize(capacity: Int) {
203206
let capacity = Swift.max(capacity, self.capacity)
204-
let (newStorage, rehash) = _DictionaryStorage<Key, Value>.reallocate(
207+
let newStorage = _DictionaryStorage<Key, Value>.resize(
205208
original: _storage,
206-
capacity: capacity)
209+
capacity: capacity,
210+
move: false)
207211
let result = _NativeDictionary(newStorage)
208212
if count > 0 {
209-
if rehash {
210-
for bucket in hashTable {
211-
result._unsafeInsertNew(
212-
key: self.uncheckedKey(at: bucket),
213-
value: self.uncheckedValue(at: bucket))
214-
}
215-
} else {
216-
result.hashTable.copyContents(of: hashTable)
217-
result._storage._count = self.count
218-
for bucket in hashTable {
219-
let key = uncheckedKey(at: bucket)
220-
let value = uncheckedValue(at: bucket)
221-
result.uncheckedInitialize(at: bucket, toKey: key, value: value)
222-
}
213+
for bucket in hashTable {
214+
result._unsafeInsertNew(
215+
key: self.uncheckedKey(at: bucket),
216+
value: self.uncheckedValue(at: bucket))
217+
}
218+
}
219+
_storage = result._storage
220+
}
221+
222+
@inlinable
223+
internal mutating func copy() {
224+
let newStorage = _DictionaryStorage<Key, Value>.copy(original: _storage)
225+
_sanityCheck(newStorage._scale == _storage._scale)
226+
_sanityCheck(newStorage._age == _storage._age)
227+
_sanityCheck(newStorage._seed == _storage._seed)
228+
let result = _NativeDictionary(newStorage)
229+
if count > 0 {
230+
result.hashTable.copyContents(of: hashTable)
231+
result._storage._count = self.count
232+
for bucket in hashTable {
233+
let key = uncheckedKey(at: bucket)
234+
let value = uncheckedValue(at: bucket)
235+
result.uncheckedInitialize(at: bucket, toKey: key, value: value)
223236
}
224237
}
225238
_storage = result._storage
226-
return rehash
227239
}
228240

229241
/// Ensure storage of self is uniquely held and can hold at least `capacity`
@@ -234,10 +246,15 @@ extension _NativeDictionary { // ensureUnique
234246
if _fastPath(capacity <= self.capacity && isUnique) {
235247
return false
236248
}
237-
guard isUnique else {
238-
return copy(capacity: capacity)
249+
if isUnique {
250+
resize(capacity: capacity)
251+
return true
252+
}
253+
if capacity <= self.capacity {
254+
copy()
255+
return false
239256
}
240-
resize(capacity: capacity)
257+
copyAndResize(capacity: capacity)
241258
return true
242259
}
243260

@@ -551,7 +568,8 @@ extension _NativeDictionary { // Deletion
551568
let scale = self._storage._scale
552569
_storage = _DictionaryStorage<Key, Value>.allocate(
553570
scale: scale,
554-
age: nil)
571+
age: nil,
572+
seed: nil)
555573
return
556574
}
557575
for bucket in hashTable {
@@ -569,8 +587,10 @@ extension _NativeDictionary { // High-level operations
569587
internal func mapValues<T>(
570588
_ transform: (Value) throws -> T
571589
) rethrows -> _NativeDictionary<Key, T> {
572-
let result = _NativeDictionary<Key, T>(capacity: capacity)
573-
// Because the keys in the current and new buffer are the same, we can
590+
let resultStorage = _DictionaryStorage<Key, T>.copy(original: _storage)
591+
_sanityCheck(resultStorage._seed == _storage._seed)
592+
let result = _NativeDictionary<Key, T>(resultStorage)
593+
// Because the current and new buffer have the same scale and seed, we can
574594
// initialize to the same locations in the new buffer, skipping hash value
575595
// recalculations.
576596
for bucket in hashTable {

stdlib/public/core/NativeSet.swift

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,10 @@ extension _NativeSet { // ensureUnique
154154
@inlinable
155155
internal mutating func resize(capacity: Int) {
156156
let capacity = Swift.max(capacity, self.capacity)
157-
let result = _NativeSet(_SetStorage<Element>.allocate(capacity: capacity))
157+
let result = _NativeSet(_SetStorage<Element>.resize(
158+
original: _storage,
159+
capacity: capacity,
160+
move: true))
158161
if count > 0 {
159162
for bucket in hashTable {
160163
let element = (self._elements + bucket.offset).move()
@@ -169,28 +172,36 @@ extension _NativeSet { // ensureUnique
169172
}
170173

171174
@inlinable
172-
internal mutating func copy(capacity: Int) -> Bool {
175+
internal mutating func copyAndResize(capacity: Int) {
173176
let capacity = Swift.max(capacity, self.capacity)
174-
let (newStorage, rehash) = _SetStorage<Element>.reallocate(
175-
original: _storage,
176-
capacity: capacity)
177+
let result = _NativeSet(_SetStorage<Element>.resize(
178+
original: _storage,
179+
capacity: capacity,
180+
move: false))
181+
if count > 0 {
182+
for bucket in hashTable {
183+
result._unsafeInsertNew(self.uncheckedElement(at: bucket))
184+
}
185+
}
186+
_storage = result._storage
187+
}
188+
189+
@inlinable
190+
internal mutating func copy() {
191+
let newStorage = _SetStorage<Element>.copy(original: _storage)
192+
_sanityCheck(newStorage._scale == _storage._scale)
193+
_sanityCheck(newStorage._age == _storage._age)
194+
_sanityCheck(newStorage._seed == _storage._seed)
177195
let result = _NativeSet(newStorage)
178196
if count > 0 {
179-
if rehash {
180-
for bucket in hashTable {
181-
result._unsafeInsertNew(self.uncheckedElement(at: bucket))
182-
}
183-
} else {
184-
result.hashTable.copyContents(of: hashTable)
185-
result._storage._count = self.count
186-
for bucket in hashTable {
187-
let element = uncheckedElement(at: bucket)
188-
result.uncheckedInitialize(at: bucket, to: element)
189-
}
197+
result.hashTable.copyContents(of: hashTable)
198+
result._storage._count = self.count
199+
for bucket in hashTable {
200+
let element = uncheckedElement(at: bucket)
201+
result.uncheckedInitialize(at: bucket, to: element)
190202
}
191203
}
192204
_storage = result._storage
193-
return rehash
194205
}
195206

196207
/// Ensure storage of self is uniquely held and can hold at least `capacity`
@@ -201,10 +212,15 @@ extension _NativeSet { // ensureUnique
201212
if _fastPath(capacity <= self.capacity && isUnique) {
202213
return false
203214
}
204-
guard isUnique else {
205-
return copy(capacity: capacity)
215+
if isUnique {
216+
resize(capacity: capacity)
217+
return true
218+
}
219+
if capacity <= self.capacity {
220+
copy()
221+
return false
206222
}
207-
resize(capacity: capacity)
223+
copyAndResize(capacity: capacity)
208224
return true
209225
}
210226

@@ -450,7 +466,10 @@ extension _NativeSet { // Deletion
450466
internal mutating func removeAll(isUnique: Bool) {
451467
guard isUnique else {
452468
let scale = self._storage._scale
453-
_storage = _SetStorage<Element>.allocate(scale: scale, age: nil)
469+
_storage = _SetStorage<Element>.allocate(
470+
scale: scale,
471+
age: nil,
472+
seed: nil)
454473
return
455474
}
456475
for bucket in hashTable {

0 commit comments

Comments
 (0)