Skip to content

stdlib: some small improvements for Dictionary and Set #4887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions stdlib/public/core/HashedCollections.swift.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -2523,7 +2523,7 @@ final internal class _Native${Self}StorageImpl<${TypeParameters}> {

@_versioned
internal var _capacity: Int {
return _body.capacity
return _assumeNonNegative(_body.capacity)
Copy link
Contributor

@Gankra Gankra Sep 20, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If only there were some way to communicate this in the type system... 🤔

Which remaining assertions is this eliminating after you've moved all these checks to debug-only?

Note from my own digging in the compiler: this is lowered to value ranges on loads (fast), and not llvm.assume (slow). Good!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually can't remember exactly, but there were some "< 0"-checks in llvm code.

}

@_versioned
Expand All @@ -2532,7 +2532,7 @@ final internal class _Native${Self}StorageImpl<${TypeParameters}> {
_body.count = newValue
}
get {
return _body.count
return _assumeNonNegative(_body.count)
}
}

Expand Down Expand Up @@ -2703,7 +2703,7 @@ struct _Native${Self}Storage<${TypeParametersDecl}> :
@_versioned
@inline(__always)
internal func key(at i: Int) -> Key {
_precondition(i >= 0 && i < capacity)
_sanityCheck(i >= 0 && i < capacity)
_sanityCheck(isInitializedEntry(at: i))

let res = (keys + i).pointee
Expand All @@ -2713,7 +2713,7 @@ struct _Native${Self}Storage<${TypeParametersDecl}> :

@_versioned
internal func isInitializedEntry(at i: Int) -> Bool {
_precondition(i >= 0 && i < capacity)
_sanityCheck(i >= 0 && i < capacity)
return initializedEntries[i]
}

Expand Down Expand Up @@ -2747,7 +2747,7 @@ struct _Native${Self}Storage<${TypeParametersDecl}> :
}

internal func setKey(_ key: Key, at i: Int) {
_precondition(i >= 0 && i < capacity)
_sanityCheck(i >= 0 && i < capacity)
_sanityCheck(isInitializedEntry(at: i))

(keys + i).pointee = key
Expand Down Expand Up @@ -2804,8 +2804,9 @@ struct _Native${Self}Storage<${TypeParametersDecl}> :
}

@_versioned
@inline(__always) // For performance reasons.
internal func _bucket(_ k: Key) -> Int {
return _squeezeHashValue(k.hashValue, 0..<capacity)
return _squeezeHashValue(k.hashValue, capacity)
}

@_versioned
Expand Down Expand Up @@ -2933,6 +2934,7 @@ struct _Native${Self}Storage<${TypeParametersDecl}> :
}

internal func assertingGet(_ i: Index) -> SequenceElement {
_precondition(i.offset >= 0 && i.offset < capacity)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this fixing an existing mem-safety bug, or just compensating for the removal of asserts elsewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's compensating for the removed precondition in isInitializedEntry (called in the next line)

_precondition(
isInitializedEntry(at: i.offset),
"attempting to access ${Self} elements using an invalid Index")
Expand Down
50 changes: 15 additions & 35 deletions stdlib/public/core/Hashing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -147,51 +147,31 @@ func _mixInt(_ value: Int) -> Int {
#endif
}

/// Given a hash value, returns an integer value within the given range that
/// corresponds to a hash value.
/// Given a hash value, returns an integer value in the range of
/// 0..<`upperBound` that corresponds to a hash value.
///
/// The `upperBound` must be positive and a power of 2.
///
/// This function is superior to computing the remainder of `hashValue` by
/// the range length. Some types have bad hash functions; sometimes simple
/// patterns in data sets create patterns in hash values and applying the
/// remainder operation just throws away even more information and invites
/// even more hash collisions. This effect is especially bad if the length
/// of the required range is a power of two -- applying the remainder
/// operation just throws away high bits of the hash (which would not be
/// a problem if the hash was known to be good). This function mixes the
/// bits in the hash value to compensate for such cases.
/// even more hash collisions. This effect is especially bad because the
/// range is a power of two, which means to throws away high bits of the hash
/// (which would not be a problem if the hash was known to be good). This
/// function mixes the bits in the hash value to compensate for such cases.
///
/// Of course, this function is a compressing function, and applying it to a
/// hash value does not change anything fundamentally: collisions are still
/// possible, and it does not prevent malicious users from constructing data
/// sets that will exhibit pathological collisions.
public // @testable
func _squeezeHashValue(_ hashValue: Int, _ resultRange: Range<Int>) -> Int {
// Length of a Range<Int> does not fit into an Int, but fits into an UInt.
// An efficient way to compute the length is to rely on two's complement
// arithmetic.
let resultCardinality =
UInt(bitPattern: resultRange.upperBound &- resultRange.lowerBound)

// Calculate the result as `UInt` to handle the case when
// `resultCardinality >= Int.max`.
let unsignedResult =
_squeezeHashValue(hashValue, UInt(0)..<resultCardinality)

// We perform the unchecked arithmetic on `UInt` (instead of doing
// straightforward computations on `Int`) in order to handle the following
// tricky case: `startIndex` is negative, and `resultCardinality >= Int.max`.
// We cannot convert the latter to `Int`.
return
Int(bitPattern:
UInt(bitPattern: resultRange.lowerBound) &+ unsignedResult)
}
func _squeezeHashValue(_ hashValue: Int, _ upperBound: Int) -> Int {
_sanityCheck(_isPowerOf2(upperBound))
let mixedHashValue = _mixInt(hashValue)

public // @testable
func _squeezeHashValue(_ hashValue: Int, _ resultRange: Range<UInt>) -> UInt {
let mixedHashValue = UInt(bitPattern: _mixInt(hashValue))
let resultCardinality: UInt = resultRange.upperBound - resultRange.lowerBound
if _isPowerOf2(resultCardinality) {
return mixedHashValue & (resultCardinality - 1)
}
return resultRange.lowerBound + (mixedHashValue % resultCardinality)
// As `upperBound` is a power of two we can do a bitwise-and to calculate
// mixedHashValue % upperBound.
return mixedHashValue & (upperBound &- 1)
}

13 changes: 13 additions & 0 deletions test/SILOptimizer/no_traps_in_dict_loopup.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %target-swift-frontend -O -emit-ir -parse-as-library -primary-file %s | %FileCheck %s
// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib


// A dictionary lookup should not contain any trap. Nothing can go wrong
// from the user's perspective.

// CHECK-NOT: llvm.trap

public func testit(_ s: [Int : Int]) -> Int? {
return s[27]
}

56 changes: 11 additions & 45 deletions validation-test/stdlib/Hashing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,54 +72,20 @@ HashingTestSuite.test("_mixInt/GoldenValues") {

HashingTestSuite.test("_squeezeHashValue/Int") {
// Check that the function can return values that cover the whole range.
func checkRange(_ r: Range<Int>) {
var results = [Int : Void]()
for _ in 0..<(14 * (r.upperBound - r.lowerBound)) {
func checkRange(_ r: Int) {
var results = Set<Int>()
for _ in 0..<(14 * r) {
let v = _squeezeHashValue(randInt(), r)
expectTrue(r ~= v)
if results[v] == nil {
results[v] = Void()
}
expectTrue(v < r)
results.insert(v)
}
expectEqual(r.upperBound - r.lowerBound, results.count)
expectEqual(r, results.count)
}
checkRange(Int.min..<(Int.min+10))
checkRange(0..<4)
checkRange(0..<8)
checkRange(-5..<5)
checkRange((Int.max-10)..<(Int.max-1))

// Check that we can handle ranges that span more than `Int.max`.
#if arch(i386) || arch(arm)
expectEqual(-0x6e477d37, _squeezeHashValue(0, Int.min..<(Int.max - 1)))
expectEqual(0x38a3ea26, _squeezeHashValue(2, Int.min..<(Int.max - 1)))
#elseif arch(x86_64) || arch(arm64) || arch(powerpc64) || arch(powerpc64le) || arch(s390x)
expectEqual(0x32b24f688dc4164d, _squeezeHashValue(0, Int.min..<(Int.max - 1)))
expectEqual(-0x6d1cc14f97aa822, _squeezeHashValue(1, Int.min..<(Int.max - 1)))
#else
fatalError("unimplemented")
#endif
}

HashingTestSuite.test("_squeezeHashValue/UInt") {
// Check that the function can return values that cover the whole range.
func checkRange(_ r: Range<UInt>) {
var results = [UInt : Void]()
let cardinality = r.upperBound - r.lowerBound
for _ in 0..<(10*cardinality) {
let v = _squeezeHashValue(randInt(), r)
expectTrue(r ~= v)
if results[v] == nil {
results[v] = Void()
}
}
expectEqual(Int(cardinality), results.count)
}
checkRange(0..<4)
checkRange(0..<8)
checkRange(0..<10)
checkRange(10..<20)
checkRange((UInt.max-10)..<(UInt.max-1))
checkRange(1)
checkRange(2)
checkRange(4)
checkRange(8)
checkRange(16)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might as well toss a checkRange(1) in here as the ultimate degeneracy test? (just verifies _squeezeHashValue always returns 0)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add it


HashingTestSuite.test("String/hashValue/topBitsSet") {
Expand Down