Skip to content

[stdlib] Fix handling of duplicate items in generic Set.intersection #59417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions stdlib/public/core/NativeSet.swift
Original file line number Diff line number Diff line change
Expand Up @@ -739,20 +739,29 @@ extension _NativeSet {
internal __consuming func intersection(
_ other: _NativeSet<Element>
) -> _NativeSet<Element> {
// Prefer to iterate over the smaller set. However, we must be careful to
// only include elements from `self`, not `other`.
guard self.count <= other.count else {
return genericIntersection(other)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Note: we could continue forwarding to genericIntersection in the new code, but extra branches in the new variant would've unnecessarily slowed down this code path.)

}
// Rather than directly creating a new set, mark common elements in a bitset
// first. This minimizes hashing, and ensures that we'll have an exact count
// for the result set, preventing rehashings during insertions.
return _UnsafeBitset.withTemporaryBitset(capacity: bucketCount) { bitset in
// Rather than directly creating a new set, mark common elements in a
// bitset first. This minimizes hashing, and ensures that we'll have an
// exact count for the result set, preventing rehashings during
// insertions.
_UnsafeBitset.withTemporaryBitset(capacity: bucketCount) { bitset in
var count = 0
for bucket in hashTable {
if other.find(uncheckedElement(at: bucket)).found {
bitset.uncheckedInsert(bucket.offset)
count += 1
// Prefer to iterate over the smaller set. However, we must be careful to
// only include elements from `self`, not `other`.
if self.count > other.count {
for element in other {
let (bucket, found) = find(element)
if found {
// `other` is a `Set`, so we can assume it doesn't have duplicates.
bitset.uncheckedInsert(bucket.offset)
count += 1
}
}
} else {
for bucket in hashTable {
if other.find(uncheckedElement(at: bucket)).found {
bitset.uncheckedInsert(bucket.offset)
count += 1
}
}
}
return extractSubset(using: bitset, count: count)
Expand All @@ -771,8 +780,9 @@ extension _NativeSet {
var count = 0
for element in other {
let (bucket, found) = find(element)
if found {
bitset.uncheckedInsert(bucket.offset)
// Note: we need to be careful not to increment `count` here if the
// element is a duplicate item.
if found, bitset.uncheckedInsert(bucket.offset) {
count += 1
}
}
Expand Down
19 changes: 19 additions & 0 deletions validation-test/stdlib/SetOperations.swift.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,25 @@ suite.test("intersection.${inputKind}.${argumentKind}") {
% end
% end

% for inputKind, inputGenerator in inputKinds.items():
% for argumentKind, argumentGenerator in argumentKinds.items():
% needsCocoa = inputKind == "cocoa" or argumentKind == "cocoa"
% if needsCocoa:
#if _runtime(_ObjC)
% end
suite.test("intersection.same-number-of-duplicates-as-missing-items") {
let a = ${inputGenerator}([3, 6, 0, 1, 5, 2, 4], identity: 1)
let b = ${argumentGenerator}([0, 1, 1, 2, 3, 4, 5], identity: 2)
let c = a.intersection(b)
expectEqual(c.count, 6)
expectEqual(c, makeNativeSet(0 ..< 6, identity: 1))
}
% if needsCocoa:
#endif
% end
% end
% end

% for inputKind, inputGenerator in inputKinds.items():
% for argumentKind, argumentGenerator in argumentKinds.items():
% needsCocoa = inputKind == "cocoa" or argumentKind == "cocoa"
Expand Down