Skip to content

[stdlib] Resolving some FIXME comments on Set type. #20631

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 15, 2018
84 changes: 84 additions & 0 deletions benchmark/single-source/SetTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ let size = 400
let half = size / 2
let quarter = size / 4

// Construction of empty sets.
let setE: Set<Int> = []
let setOE: Set<Box<Int>> = []

// Construction kit for sets with 25% overlap
let setAB = Set(0 ..< size) // 0 ..< 400
let setCD = Set(size ..< 2 * size) // 400 ..< 800
Expand Down Expand Up @@ -53,6 +57,11 @@ let setQ: Set<Int> = {

public let SetTests = [
// Mnemonic: number after name is percentage of common elements in input sets.
BenchmarkInfo(
name: "Set.Empty.IsSubsetInt0",
runFunction: { n in run_SetIsSubsetInt(setE, setAB, true, 5000 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setE, setAB]) }),
BenchmarkInfo(
name: "SetIsSubsetInt0",
runFunction: { n in run_SetIsSubsetInt(setAB, setCD, false, 5000 * n) },
Expand Down Expand Up @@ -84,6 +93,47 @@ public let SetTests = [
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setP, setQ]) }),

BenchmarkInfo(
name: "Set.Empty.IsDisjointInt0",
runFunction: { n in run_SetIsDisjointInt(setE, setAB, true, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setE, setAB]) }),
BenchmarkInfo(
name: "Set.Empty.IsDisjointBox0",
runFunction: { n in run_SetIsDisjointBox(setOE, setOAB, true, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOE, setOAB]) }),
BenchmarkInfo(
name: "SetIsDisjointInt0",
runFunction: { n in run_SetIsDisjointInt(setAB, setCD, true, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setAB, setCD]) }),
BenchmarkInfo(
name: "SetIsDisjointBox0",
runFunction: { n in run_SetIsDisjointBox(setOAB, setOCD, true, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOAB, setOCD]) }),
BenchmarkInfo(
name: "SetIsDisjointInt25",
runFunction: { n in run_SetIsDisjointInt(setB, setAB, false, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setB, setAB]) }),
BenchmarkInfo(
name: "SetIsDisjointBox25",
runFunction: { n in run_SetIsDisjointBox(setOB, setOAB, false, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOB, setOAB]) }),
BenchmarkInfo(
name: "SetIsDisjointInt50",
runFunction: { n in run_SetIsDisjointInt(setY, setXY, false, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setY, setXY]) }),
BenchmarkInfo(
name: "SetIsDisjointInt100",
runFunction: { n in run_SetIsDisjointInt(setP, setQ, false, 50 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setP, setQ]) }),

BenchmarkInfo(
name: "SetSymmetricDifferenceInt0",
runFunction: { n in run_SetSymmetricDifferenceInt(setAB, setCD, countABCD, 10 * n) },
Expand Down Expand Up @@ -177,6 +227,16 @@ public let SetTests = [
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setP, setQ]) }),

BenchmarkInfo(
name: "Set.Empty.SubtractingInt0",
runFunction: { n in run_SetSubtractingInt(setE, setAB, 0, 10 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setE, setAB]) }),
BenchmarkInfo(
name: "Set.Empty.SubtractingBox0",
runFunction: { n in run_SetSubtractingBox(setOE, setOAB, 0, 10 * n) },
tags: [.validation, .api, .Set],
setUpFunction: { blackHole([setOE, setOAB]) }),
BenchmarkInfo(
name: "SetSubtractingInt0",
runFunction: { n in run_SetSubtractingInt(setAB, setCD, countAB, 10 * n) },
Expand Down Expand Up @@ -296,6 +356,18 @@ public func run_SetSubtractingInt(
}
}

@inline(never)
public func run_SetIsDisjointInt(
_ a: Set<Int>,
_ b: Set<Int>,
_ r: Bool,
_ n: Int) {
for _ in 0 ..< n {
let isDisjoint = a.isDisjoint(with: identity(b))
CheckResults(isDisjoint == r)
}
}

class Box<T : Hashable> : Hashable {
var value: T

Expand Down Expand Up @@ -371,3 +443,15 @@ func run_SetSubtractingBox(
CheckResults(and.count == r)
}
}

@inline(never)
func run_SetIsDisjointBox(
_ a: Set<Box<Int>>,
_ b: Set<Box<Int>>,
_ r: Bool,
_ n: Int) {
for _ in 0 ..< n {
let isDisjoint = a.isDisjoint(with: identity(b))
CheckResults(isDisjoint == r)
}
}
33 changes: 23 additions & 10 deletions stdlib/public/core/Set.swift
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,8 @@ extension Set: SetAlgebra {
@inlinable
public func isSubset<S: Sequence>(of possibleSuperset: S) -> Bool
where S.Element == Element {
// FIXME(performance): isEmpty fast path, here and elsewhere.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As noted in the comment, there are places "elsewhere" that could use an isEmpty fast path. Before deleting the fixme, it'd be important to identify where those other places are and either implement the fast path or add the fixme comment there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For sure @xwu 👍
I've checked the methods on Set and added it in all places that I could find it would be possible :))

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not so sure you got everything. Wouldn’t isSuperset benefit from an isEmpty fast path?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've check this isSuperset, but didn't think it has benefit, is an interesting case because just because we have two cases there

let s: Set<String> = []

print(s.isSuperset(of: [])) //false
print(s.isSuperset(of: ["A", "B"])) //true

So to add an isEmpty fast path will require, check also possibleSubset for empty too, but since it is a arbitrary sequence it doens't have an isEmpty and as far as I know we could not do this check on underestimatedCount.

Also looking to the implementation(below) no matter what is the size of possibleSubset if self is empty it will return on the first iteration because contains will return false. So there's no performance problem because it will not iterate unnecessarily. Basically, that is the reason I think is not a benefit :))

public func isSuperset<S: Sequence>(of possibleSubset: __owned S) -> Bool
    where S.Element == Element {
    for member in possibleSubset {
      if !contains(member) {
        return false
      }
    }
    return true
  }

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out there was more "elsewhere": isStrictSubset and isStrictSuperset with Sequence parameter could probably also benefit form optimizations for isEmpty cases... see #24156 (comment).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@palimondo Sorry I miss those 😅, I'll take a look and I can patch up a PR later today :))

Copy link
Contributor

@palimondo palimondo May 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem. Please don't go in there now, #21300 is about to drastically change how that's handled and I'm already looking into how to handle isEmpty cases on top of it… it only pays the cheap creation of _Bitset(capacity: self.bucketCount) and will bail out of the Sequence consuming loop ASAP.

I just couldn't resist noting how @xwu's pedantry usually turns out to be right, even though it feels pretty annoying to be on the receiving end of it (speaking from experience 😉).

guard !isEmpty else { return true }

let other = Set(possibleSuperset)
return isSubset(of: other)
}
Expand Down Expand Up @@ -763,10 +764,12 @@ extension Set: SetAlgebra {
@inlinable
public func isSuperset<S: Sequence>(of possibleSubset: __owned S) -> Bool
where S.Element == Element {
// FIXME(performance): Don't build a set; just ask if every element is in
// `self`.
let other = Set(possibleSubset)
return other.isSubset(of: self)
for member in possibleSubset {
if !contains(member) {
return false
}
}
return true
}

/// Returns a Boolean value that indicates whether the set is a strict
Expand Down Expand Up @@ -811,9 +814,7 @@ extension Set: SetAlgebra {
@inlinable
public func isDisjoint<S: Sequence>(with other: S) -> Bool
where S.Element == Element {
// FIXME(performance): Don't need to build a set.
let otherSet = Set(other)
return isDisjoint(with: otherSet)
return _isDisjoint(with: other)
}

/// Returns a new set with the elements of both this set and the given
Expand Down Expand Up @@ -920,6 +921,10 @@ extension Set: SetAlgebra {
@inlinable
internal mutating func _subtract<S: Sequence>(_ other: S)
where S.Element == Element {
// If self is empty we don't need to iterate over `other` because there's
// nothing to remove on self.
guard !isEmpty else { return }

for item in other {
remove(item)
}
Expand Down Expand Up @@ -1121,8 +1126,16 @@ extension Set {
/// otherwise, `false`.
@inlinable
public func isDisjoint(with other: Set<Element>) -> Bool {
for member in self {
if other.contains(member) {
return _isDisjoint(with: other)
}

@inlinable
internal func _isDisjoint<S: Sequence>(with other: S) -> Bool
where S.Element == Element {
guard !isEmpty else { return true }

for member in other {
if contains(member) {
return false
}
}
Expand Down