Skip to content

Commit b1a3ece

Browse files
committed
[temp] Per-instance hash seeds
1 parent 1c69181 commit b1a3ece

File tree

5 files changed

+61
-39
lines changed

5 files changed

+61
-39
lines changed

stdlib/public/core/DictionaryStorage.swift

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -366,24 +366,30 @@ extension _DictionaryStorage {
366366
_sanityCheck(capacity >= original._count)
367367
let scale = _HashTable.scale(forCapacity: capacity)
368368
let rehash = (scale != original._scale)
369-
let newStorage = _DictionaryStorage<Key, Value>.allocate(
370-
scale: scale,
371-
// Invalidate indices if we're rehashing.
372-
age: rehash ? nil : original._age
373-
)
369+
let newStorage: _DictionaryStorage<Key, Value>
370+
if rehash {
371+
// Invalidate indices and generate a new seed.
372+
newStorage = .allocate(scale: scale, age: nil, seed: nil)
373+
} else {
374+
newStorage = .allocate(
375+
scale: scale,
376+
age: original._age,
377+
seed: original._seed)
378+
}
374379
return (newStorage, rehash)
375380
}
376381

377382
@usableFromInline
378383
@_effects(releasenone)
379384
static internal func allocate(capacity: Int) -> _DictionaryStorage {
380385
let scale = _HashTable.scale(forCapacity: capacity)
381-
return allocate(scale: scale, age: nil)
386+
return allocate(scale: scale, age: nil, seed: nil)
382387
}
383388

384389
static internal func allocate(
385390
scale: Int8,
386-
age: Int32?
391+
age: Int32?,
392+
seed: Int?
387393
) -> _DictionaryStorage {
388394
// The entry count must be representable by an Int value; hence the scale's
389395
// peculiar upper bound.
@@ -419,19 +425,10 @@ extension _DictionaryStorage {
419425
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
420426
}
421427

428+
storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
422429
storage._rawKeys = UnsafeMutableRawPointer(keysAddr)
423430
storage._rawValues = UnsafeMutableRawPointer(valuesAddr)
424431

425-
// We use a slightly different hash seed whenever we change the size of the
426-
// hash table, so that we avoid certain copy operations becoming quadratic,
427-
// without breaking value semantics. (For background details, see
428-
// https://bugs.swift.org/browse/SR-3268)
429-
430-
// FIXME: Use true per-instance seeding instead. Per-capacity seeding still
431-
// leaves hash values the same in same-sized tables, which may affect
432-
// operations on two tables at once. (E.g., union.)
433-
storage._seed = Int(scale)
434-
435432
// Initialize hash table metadata.
436433
storage._hashTable.clear()
437434
return storage

stdlib/public/core/HashTable.swift

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,32 @@ extension _HashTable {
8383
_sanityCheck(self.capacity(forScale: scale) >= capacity)
8484
return scale
8585
}
86+
87+
internal static func hashSeed(
88+
for object: AnyObject,
89+
scale: Int8
90+
) -> Int {
91+
// We generate a new hash seed whenever a new hash table is allocated and
92+
// whenever an existing table is resized, so that we avoid certain copy
93+
// operations becoming quadratic. (For background details, see
94+
// https://bugs.swift.org/browse/SR-3268)
95+
//
96+
// Note that we do reuse the existing seed when making copy-on-write copies
97+
// so that we avoid breaking value semantics.
98+
if Hasher._isDeterministic {
99+
// When we're using deterministic hashing, the scale value as the seed is
100+
// still allowed, and it covers most cases. (Unfortunately some operations
101+
// that merge two similar-sized hash tables will still be quadratic.)
102+
return Int(scale)
103+
}
104+
// Use the object address as the hash seed. This is cheaper than
105+
// SystemRandomNumberGenerator, while it has the same practical effect.
106+
// Addresses aren't entirely random, but that's not the goal here -- the
107+
// 128-bit execution seed takes care of randomization. We only need to
108+
// guarantee that no two tables with the same seed can coexist at the same
109+
// time (apart from copy-on-write derivatives of the same table).
110+
return unsafeBitCast(object, to: Int.self)
111+
}
86112
}
87113

88114
extension _HashTable {

stdlib/public/core/NativeDictionary.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -535,10 +535,10 @@ extension _NativeDictionary { // Deletion
535535
@usableFromInline
536536
internal mutating func removeAll(isUnique: Bool) {
537537
guard isUnique else {
538-
let scale = self._storage._scale
539538
_storage = _DictionaryStorage<Key, Value>.allocate(
540-
scale: scale,
541-
age: nil)
539+
scale: _storage._scale,
540+
age: nil,
541+
seed: nil)
542542
return
543543
}
544544
for bucket in hashTable {

stdlib/public/core/NativeSet.swift

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,10 @@ extension _NativeSet { // Deletion
431431
@usableFromInline
432432
internal mutating func removeAll(isUnique: Bool) {
433433
guard isUnique else {
434-
let scale = self._storage._scale
435-
_storage = _SetStorage<Element>.allocate(scale: scale, age: nil)
434+
_storage = _SetStorage<Element>.allocate(
435+
scale: _storage._scale,
436+
age: nil,
437+
seed: nil)
436438
return
437439
}
438440
for bucket in hashTable {

stdlib/public/core/SetStorage.swift

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -292,24 +292,30 @@ extension _SetStorage {
292292
_sanityCheck(capacity >= original._count)
293293
let scale = _HashTable.scale(forCapacity: capacity)
294294
let rehash = (scale != original._scale)
295-
let newStorage = _SetStorage<Element>.allocate(
296-
scale: scale,
297-
// Invalidate indices if we're rehashing.
298-
age: rehash ? nil : original._age
299-
)
295+
let newStorage: _SetStorage<Element>
296+
if rehash {
297+
// Invalidate indices and generate a new seed.
298+
newStorage = .allocate(scale: scale, age: nil, seed: nil)
299+
} else {
300+
newStorage = .allocate(
301+
scale: scale,
302+
age: original._age,
303+
seed: original._seed)
304+
}
300305
return (newStorage, rehash)
301306
}
302307

303308
@usableFromInline
304309
@_effects(releasenone)
305310
static internal func allocate(capacity: Int) -> _SetStorage {
306311
let scale = _HashTable.scale(forCapacity: capacity)
307-
return allocate(scale: scale, age: nil)
312+
return allocate(scale: scale, age: nil, seed: nil)
308313
}
309314

310315
static internal func allocate(
311316
scale: Int8,
312-
age: Int32?
317+
age: Int32?,
318+
seed: Int?
313319
) -> _SetStorage {
314320
// The entry count must be representable by an Int value; hence the scale's
315321
// peculiar upper bound.
@@ -341,18 +347,9 @@ extension _SetStorage {
341347
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
342348
}
343349

350+
storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
344351
storage._rawElements = UnsafeMutableRawPointer(elementsAddr)
345352

346-
// We use a slightly different hash seed whenever we change the size of the
347-
// hash table, so that we avoid certain copy operations becoming quadratic,
348-
// without breaking value semantics. (For background details, see
349-
// https://bugs.swift.org/browse/SR-3268)
350-
351-
// FIXME: Use true per-instance seeding instead. Per-capacity seeding still
352-
// leaves hash values the same in same-sized tables, which may affect
353-
// operations on two tables at once. (E.g., union.)
354-
storage._seed = Int(scale)
355-
356353
// Initialize hash table metadata.
357354
storage._hashTable.clear()
358355
return storage

0 commit comments

Comments
 (0)