Skip to content

Commit 91c5a31

Browse files
committed
[stdlib] Set, Dictionary: Prepare for per-instance hash seeds
Implement the per-instance seeding infrastructure without actually enabling per-instance seeding.
1 parent e5d711c commit 91c5a31

File tree

5 files changed

+59
-38
lines changed

5 files changed

+59
-38
lines changed

stdlib/public/core/DictionaryStorage.swift

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -366,19 +366,21 @@ extension _DictionaryStorage {
366366
_sanityCheck(capacity >= original._count)
367367
let scale = _HashTable.scale(forCapacity: capacity)
368368
let rehash = (scale != original._scale)
369-
let newStorage = _DictionaryStorage<Key, Value>.allocate(
370-
scale: scale,
371-
// Invalidate indices if we're rehashing.
372-
age: rehash ? nil : original._age
373-
)
369+
if rehash {
370+
return (.allocate(scale: scale, age: nil, seed: nil), rehash)
371+
}
372+
return (
373+
.allocate(scale: scale, age: original._age, seed: original._seed),
374+
rehash)
375+
}
374376
return (newStorage, rehash)
375377
}
376378

377379
@usableFromInline
378380
@_effects(releasenone)
379381
static internal func allocate(capacity: Int) -> _DictionaryStorage {
380382
let scale = _HashTable.scale(forCapacity: capacity)
381-
return allocate(scale: scale, age: nil)
383+
return allocate(scale: scale, age: nil, seed: nil)
382384
}
383385

384386
#if _runtime(_ObjC)
@@ -396,7 +398,8 @@ extension _DictionaryStorage {
396398

397399
static internal func allocate(
398400
scale: Int8,
399-
age: Int32?
401+
age: Int32?,
402+
seed: Int?
400403
) -> _DictionaryStorage {
401404
// The entry count must be representable by an Int value; hence the scale's
402405
// peculiar upper bound.
@@ -432,19 +435,10 @@ extension _DictionaryStorage {
432435
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
433436
}
434437

438+
storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
435439
storage._rawKeys = UnsafeMutableRawPointer(keysAddr)
436440
storage._rawValues = UnsafeMutableRawPointer(valuesAddr)
437441

438-
// We use a slightly different hash seed whenever we change the size of the
439-
// hash table, so that we avoid certain copy operations becoming quadratic,
440-
// without breaking value semantics. (For background details, see
441-
// https://bugs.swift.org/browse/SR-3268)
442-
443-
// FIXME: Use true per-instance seeding instead. Per-capacity seeding still
444-
// leaves hash values the same in same-sized tables, which may affect
445-
// operations on two tables at once. (E.g., union.)
446-
storage._seed = Int(scale)
447-
448442
// Initialize hash table metadata.
449443
storage._hashTable.clear()
450444
return storage

stdlib/public/core/HashTable.swift

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,37 @@ extension _HashTable {
8989
let hash = ObjectIdentifier(cocoa).hashValue
9090
return Int32(truncatingIfNeeded: hash)
9191
}
92+
93+
internal static func hashSeed(
94+
for object: AnyObject,
95+
scale: Int
96+
) -> Int {
97+
#if false // FIXME: Enable per-instance seeding
98+
// We generate a new hash seed whenever a new hash table is allocated and
99+
// whenever an existing table is resized, so that we avoid certain copy
100+
// operations becoming quadratic. (For background details, see
101+
// https://bugs.swift.org/browse/SR-3268)
102+
//
103+
// Note that we do reuse the existing seed when making copy-on-write copies
104+
// so that we avoid breaking value semantics.
105+
if Hasher._isDeterministic {
106+
// When we're using deterministic hashing, the scale value as the seed is
107+
// still allowed, and it covers most cases. (Unfortunately some operations
108+
// that merge two similar-sized hash tables will still be quadratic.)
109+
return scale
110+
}
111+
// Use the object address as the hash seed. This is cheaper than
112+
// SystemRandomNumberGenerator, while it has the same practical effect.
113+
// Addresses aren't entirely random, but that's not the goal here -- the
114+
// 128-bit execution seed takes care of randomization. We only need to
115+
// guarantee that no two tables with the same seed can coexist at the same
116+
// time (apart from copy-on-write derivatives of the same table).
117+
return unsafeBitCast(object, to: Int.self)
118+
#else
119+
// Use per-capacity seeding for now.
120+
return scale
121+
#endif
122+
}
92123
}
93124

94125
extension _HashTable {

stdlib/public/core/NativeDictionary.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,8 @@ extension _NativeDictionary { // Deletion
552552
let scale = self._storage._scale
553553
_storage = _DictionaryStorage<Key, Value>.allocate(
554554
scale: scale,
555-
age: nil)
555+
age: nil,
556+
seed: nil)
556557
return
557558
}
558559
for bucket in hashTable {

stdlib/public/core/NativeSet.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,10 @@ extension _NativeSet { // Deletion
451451
internal mutating func removeAll(isUnique: Bool) {
452452
guard isUnique else {
453453
let scale = self._storage._scale
454-
_storage = _SetStorage<Element>.allocate(scale: scale, age: nil)
454+
_storage = _SetStorage<Element>.allocate(
455+
scale: scale,
456+
age: nil,
457+
seed: nil)
455458
return
456459
}
457460
for bucket in hashTable {

stdlib/public/core/SetStorage.swift

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -292,19 +292,19 @@ extension _SetStorage {
292292
_sanityCheck(capacity >= original._count)
293293
let scale = _HashTable.scale(forCapacity: capacity)
294294
let rehash = (scale != original._scale)
295-
let newStorage = _SetStorage<Element>.allocate(
296-
scale: scale,
297-
// Invalidate indices if we're rehashing.
298-
age: rehash ? nil : original._age
299-
)
300-
return (newStorage, rehash)
295+
if rehash {
296+
return .allocate(scale: scale, age: nil, seed: nil)
297+
}
298+
return (
299+
.allocate(scale: scale, age: original._age, seed: original._seed),
300+
rehash)
301301
}
302302

303303
@usableFromInline
304304
@_effects(releasenone)
305305
static internal func allocate(capacity: Int) -> _SetStorage {
306306
let scale = _HashTable.scale(forCapacity: capacity)
307-
return allocate(scale: scale, age: nil)
307+
return allocate(scale: scale, age: nil, seed: nil)
308308
}
309309

310310
#if _runtime(_ObjC)
@@ -316,13 +316,14 @@ extension _SetStorage {
316316
) -> _SetStorage {
317317
let scale = _HashTable.scale(forCapacity: capacity)
318318
let age = _HashTable.age(for: cocoa.object)
319-
return allocate(scale: scale, age: age)
319+
return allocate(scale: scale, age: age, seed: nil)
320320
}
321321
#endif
322322

323323
static internal func allocate(
324324
scale: Int8,
325-
age: Int32?
325+
age: Int32?,
326+
seed: Int?
326327
) -> _SetStorage {
327328
// The entry count must be representable by an Int value; hence the scale's
328329
// peculiar upper bound.
@@ -354,18 +355,9 @@ extension _SetStorage {
354355
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
355356
}
356357

358+
storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
357359
storage._rawElements = UnsafeMutableRawPointer(elementsAddr)
358360

359-
// We use a slightly different hash seed whenever we change the size of the
360-
// hash table, so that we avoid certain copy operations becoming quadratic,
361-
// without breaking value semantics. (For background details, see
362-
// https://bugs.swift.org/browse/SR-3268)
363-
364-
// FIXME: Use true per-instance seeding instead. Per-capacity seeding still
365-
// leaves hash values the same in same-sized tables, which may affect
366-
// operations on two tables at once. (E.g., union.)
367-
storage._seed = Int(scale)
368-
369361
// Initialize hash table metadata.
370362
storage._hashTable.clear()
371363
return storage

0 commit comments

Comments
 (0)